)]}'
{"/COMMIT_MSG":[{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":6,"context_line":""},{"line_number":7,"context_line":"Proposes NUMA topology with RPs"},{"line_number":8,"context_line":""},{"line_number":9,"context_line":"That\u0027s the spec for blueprint numa-topology-with-rps"},{"line_number":10,"context_line":""},{"line_number":11,"context_line":"Change-Id: I0c804743db77da5717c9c37e3c5ba57a9a3950ad"}],"source_content_type":"text/x-gerrit-commit-message","patch_set":3,"id":"df7087c5_2d209cf7","line":9,"range":{"start_line":9,"start_character":0,"end_line":9,"end_character":6},"updated":"2018-03-19 22:08:41.000000000","message":"This is :P\n\nAnd you totally can\u0027t get away with this as a commit message.  Copy/paste the first paragraph of the spec.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"}],"specs/rocky/approved/numa-topology-with-rps.rst":[{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":12,"context_line":""},{"line_number":13,"context_line":"Now that `Nested Resource Providers`_ is a thing in both Placement API and"},{"line_number":14,"context_line":"Nova compute nodes, we could use the Resource Providers tree for explaining"},{"line_number":15,"context_line":"the relationship in between a root Resource Provider (root RP) ie. a compute,"},{"line_number":16,"context_line":"and one or more Non-Uniform Memory Access (NUMA) nodes (aka. cells), each of"},{"line_number":17,"context_line":"them having separate resources, like memory or PCI devices, even if any core"},{"line_number":18,"context_line":"can access those resources."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_6d6bf41e","line":15,"range":{"start_line":15,"start_character":69,"end_line":15,"end_character":76},"updated":"2018-03-19 22:08:41.000000000","message":"compute *node*, tbc","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":12,"context_line":""},{"line_number":13,"context_line":"Now that `Nested Resource Providers`_ is a thing in both Placement API and"},{"line_number":14,"context_line":"Nova compute nodes, we could use the Resource Providers tree for explaining"},{"line_number":15,"context_line":"the relationship in between a root Resource Provider (root RP) ie. a compute,"},{"line_number":16,"context_line":"and one or more Non-Uniform Memory Access (NUMA) nodes (aka. cells), each of"},{"line_number":17,"context_line":"them having separate resources, like memory or PCI devices, even if any core"},{"line_number":18,"context_line":"can access those resources."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_0d7d98e6","line":15,"range":{"start_line":15,"start_character":17,"end_line":15,"end_character":19},"updated":"2018-03-19 22:08:41.000000000","message":"strike","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"How a specific pCPU within the NUMA node would be pinned for a vGPU would still"},{"line_number":30,"context_line":"be done out of the Placement API."},{"line_number":31,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_3c2366bf","line":28,"range":{"start_line":28,"start_character":5,"end_line":28,"end_character":71},"updated":"2018-03-14 16:44:32.000000000","message":"how would we store the information about which numa node was selected in order to pass it on to the compute node?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"How a specific pCPU within the NUMA node would be pinned for a vGPU would still"},{"line_number":30,"context_line":"be done out of the Placement API."},{"line_number":31,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_a68afd66","line":28,"range":{"start_line":28,"start_character":5,"end_line":28,"end_character":71},"in_reply_to":"df7087c5_3c2366bf","updated":"2018-03-14 19:21:49.000000000","message":"If the NUMA node is a resource provider and some resources were claimed for the instance, then the resource provider\u0027s UUID will be in the allocation request that is received by the compute node and we could look up the NUMA node that way.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"How a specific pCPU within the NUMA node would be pinned for a vGPU would still"},{"line_number":30,"context_line":"be done out of the Placement API."},{"line_number":31,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_4d4b90af","line":28,"range":{"start_line":28,"start_character":5,"end_line":28,"end_character":71},"in_reply_to":"df7087c5_a68afd66","updated":"2018-03-19 22:08:41.000000000","message":"Yuh; it\u0027s the same way virt would know where *any* resource comes from when it receives the allocation request.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":48,"context_line":"        |CPU1| |CPU2|      |CPU3| |CPU4|"},{"line_number":49,"context_line":"        +----+ +----+      +----+ +----+"},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"Here, CPU1 and CPU2 would share the same memory thru a common bus, but not CPU3"},{"line_number":52,"context_line":"and CPU4 that would be on a second bus."},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Ideally, intensive applications that would require more than one core would"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_1b6f3fc6","line":51,"range":{"start_line":51,"start_character":62,"end_line":51,"end_character":65},"updated":"2018-03-23 17:00:31.000000000","message":"nit memory controller","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":48,"context_line":"        |CPU1| |CPU2|      |CPU3| |CPU4|"},{"line_number":49,"context_line":"        +----+ +----+      +----+ +----+"},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"Here, CPU1 and CPU2 would share the same memory thru a common bus, but not CPU3"},{"line_number":52,"context_line":"and CPU4 that would be on a second bus."},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Ideally, intensive applications that would require more than one core would"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_a8071518","line":51,"range":{"start_line":51,"start_character":48,"end_line":51,"end_character":52},"updated":"2018-03-15 13:47:38.000000000","message":"through","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":49,"context_line":"        +----+ +----+      +----+ +----+"},{"line_number":50,"context_line":""},{"line_number":51,"context_line":"Here, CPU1 and CPU2 would share the same memory thru a common bus, but not CPU3"},{"line_number":52,"context_line":"and CPU4 that would be on a second bus."},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Ideally, intensive applications that would require more than one core would"},{"line_number":55,"context_line":"try to make sure that both cores are allocated on the same NUMA node, or some"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_fb65abe2","line":52,"range":{"start_line":52,"start_character":35,"end_line":52,"end_character":38},"updated":"2018-03-23 17:00:31.000000000","message":"memory controller.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":51,"context_line":"Here, CPU1 and CPU2 would share the same memory thru a common bus, but not CPU3"},{"line_number":52,"context_line":"and CPU4 that would be on a second bus."},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Ideally, intensive applications that would require more than one core would"},{"line_number":55,"context_line":"try to make sure that both cores are allocated on the same NUMA node, or some"},{"line_number":56,"context_line":"performance penalties would occur."},{"line_number":57,"context_line":"Of course, for the moment, if you\u0027re an operator, you can provide a flavor for"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_3bd00351","line":54,"range":{"start_line":54,"start_character":9,"end_line":54,"end_character":18},"updated":"2018-03-23 17:00:31.000000000","message":"compute and io intensive","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":52,"context_line":"and CPU4 that would be on a second bus."},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Ideally, intensive applications that would require more than one core would"},{"line_number":55,"context_line":"try to make sure that both cores are allocated on the same NUMA node, or some"},{"line_number":56,"context_line":"performance penalties would occur."},{"line_number":57,"context_line":"Of course, for the moment, if you\u0027re an operator, you can provide a flavor for"},{"line_number":58,"context_line":"asking that by using an extra spec like:"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_bbe2d32e","line":55,"range":{"start_line":55,"start_character":47,"end_line":55,"end_character":68},"updated":"2018-03-23 17:00:31.000000000","message":"nit this is only true for compute or io bound workloads.\nworkloads that are memory bound may instead beift form spanning numa nodes due to the increased memory bandwith provided they are not share state between cores on different numa nodes.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":67,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":68,"context_line":""},{"line_number":69,"context_line":"Now, imagine a world where instead of having that extra spec only for libvirt,"},{"line_number":70,"context_line":"you could use a Placement extra spec..."},{"line_number":71,"context_line":""},{"line_number":72,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible from the vCPUs"},{"line_number":73,"context_line":"----------------------------------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_66f5c5ec","line":70,"range":{"start_line":70,"start_character":17,"end_line":70,"end_character":39},"updated":"2018-03-14 19:21:49.000000000","message":"there\u0027s no such thing as a Placement extra spec. I believe you are referring to the translation of flavor extra specs into the request groups that are sent to placement that contain resource amounts and traits?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":67,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":68,"context_line":""},{"line_number":69,"context_line":"Now, imagine a world where instead of having that extra spec only for libvirt,"},{"line_number":70,"context_line":"you could use a Placement extra spec..."},{"line_number":71,"context_line":""},{"line_number":72,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible from the vCPUs"},{"line_number":73,"context_line":"----------------------------------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_3b2c8346","line":70,"range":{"start_line":70,"start_character":17,"end_line":70,"end_character":39},"in_reply_to":"df7087c5_4de07097","updated":"2018-03-23 17:00:31.000000000","message":"i think this usecase is actully an anti example.\n\nthe hw:numa_* fields are defiend to specify the numa topoloy of the guest only. they have no relation to the numa topology of the host untill you spcify a guest to host numa afinity policy.\n\nso that said libvirt is the only dirver the supports all these options and it only supports a stict affinity policy where it pinnes a virtual numa node to host numa node. we could use placement to prefilter for that but we would therefore be saying that each guest virtual numa node will be schduled to a different host numa node wich is not required today.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":67,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":68,"context_line":""},{"line_number":69,"context_line":"Now, imagine a world where instead of having that extra spec only for libvirt,"},{"line_number":70,"context_line":"you could use a Placement extra spec..."},{"line_number":71,"context_line":""},{"line_number":72,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible from the vCPUs"},{"line_number":73,"context_line":"----------------------------------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_4de07097","line":70,"range":{"start_line":70,"start_character":17,"end_line":70,"end_character":39},"in_reply_to":"df7087c5_66f5c5ec","updated":"2018-03-19 22:08:41.000000000","message":"In any case, could you give an example here?  Or are you saving that for later?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":69,"context_line":"Now, imagine a world where instead of having that extra spec only for libvirt,"},{"line_number":70,"context_line":"you could use a Placement extra spec..."},{"line_number":71,"context_line":""},{"line_number":72,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible from the vCPUs"},{"line_number":73,"context_line":"----------------------------------------------------------------------------"},{"line_number":74,"context_line":""},{"line_number":75,"context_line":"Say now that the above NUMA topology with a 2-sockets, 4 cores no HT is having"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_0df4b85a","line":72,"range":{"start_line":72,"start_character":62,"end_line":72,"end_character":66},"updated":"2018-03-19 22:08:41.000000000","message":"to","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":87,"context_line":"       +----+ +----- +----+      +----+ +----+"},{"line_number":88,"context_line":""},{"line_number":89,"context_line":"Here, the physical GPU device (PGPU) would share a common bus with CPU1 and"},{"line_number":90,"context_line":"CPU2, but not with CPU3 and CPU3."},{"line_number":91,"context_line":""},{"line_number":92,"context_line":"In that case, imagine I\u0027d like to run a CUDA library like OpenACC for computing"},{"line_number":93,"context_line":"things like Artificial Intelligence calculations, or coin mining (heh), I\u0027d"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_86d33960","line":90,"range":{"start_line":90,"start_character":28,"end_line":90,"end_character":32},"updated":"2018-03-14 19:21:49.000000000","message":"CPU4","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":101,"context_line":"------------------------------------------"},{"line_number":102,"context_line":""},{"line_number":103,"context_line":"Given virt drivers can pass a resource providers tree to the compute service,"},{"line_number":104,"context_line":"for example the libvirt driver could create a tree for a (2-socket, 16 nodes)"},{"line_number":105,"context_line":"NUMA topology having one GPU device and one SRIOV PF like this:"},{"line_number":106,"context_line":""},{"line_number":107,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_e6b9d514","line":104,"range":{"start_line":104,"start_character":71,"end_line":104,"end_character":76},"updated":"2018-03-14 19:21:49.000000000","message":"core","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":115,"context_line":"                          /                 \\"},{"line_number":116,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":117,"context_line":"   | NUMA1_rp             |                 | NUMA2_rp             |"},{"line_number":118,"context_line":"   | NUMA_CORE: 8         |                 | NUMA_CORE: 8         |"},{"line_number":119,"context_line":"   | NUMA_MEMORY_MB: 4096 |                 | NUMA_MEMORY_MB: 4096 |"},{"line_number":120,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":121,"context_line":"            |                                 |"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_8661f9ab","line":118,"range":{"start_line":118,"start_character":46,"end_line":118,"end_character":55},"updated":"2018-03-14 19:21:49.000000000","message":"We chatted at the PTG about replacing this resource class name with \"PCPU\" to indicate a physical (logical) processor.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"c7532b12f6216894eb3e397b96ec8b5af9b3baa6","unresolved":false,"context_lines":[{"line_number":115,"context_line":"                          /                 \\"},{"line_number":116,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":117,"context_line":"   | NUMA1_rp             |                 | NUMA2_rp             |"},{"line_number":118,"context_line":"   | NUMA_CORE: 8         |                 | NUMA_CORE: 8         |"},{"line_number":119,"context_line":"   | NUMA_MEMORY_MB: 4096 |                 | NUMA_MEMORY_MB: 4096 |"},{"line_number":120,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":121,"context_line":"            |                                 |"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_94a5a2a4","line":118,"range":{"start_line":118,"start_character":46,"end_line":118,"end_character":55},"in_reply_to":"df7087c5_8661f9ab","updated":"2018-03-14 21:45:58.000000000","message":"I like PCPU better too. :)","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":116,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":117,"context_line":"   | NUMA1_rp             |                 | NUMA2_rp             |"},{"line_number":118,"context_line":"   | NUMA_CORE: 8         |                 | NUMA_CORE: 8         |"},{"line_number":119,"context_line":"   | NUMA_MEMORY_MB: 4096 |                 | NUMA_MEMORY_MB: 4096 |"},{"line_number":120,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":121,"context_line":"            |                                 |"},{"line_number":122,"context_line":"      +----------+              +-----------------+"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_66438514","line":119,"range":{"start_line":119,"start_character":46,"end_line":119,"end_character":60},"updated":"2018-03-14 19:21:49.000000000","message":"I\u0027d actually like to remove this resource class (or deprecate it). MEMORY_MB is the resource class to use.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":116,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":117,"context_line":"   | NUMA1_rp             |                 | NUMA2_rp             |"},{"line_number":118,"context_line":"   | NUMA_CORE: 8         |                 | NUMA_CORE: 8         |"},{"line_number":119,"context_line":"   | NUMA_MEMORY_MB: 4096 |                 | NUMA_MEMORY_MB: 4096 |"},{"line_number":120,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":121,"context_line":"            |                                 |"},{"line_number":122,"context_line":"      +----------+              +-----------------+"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_9b758ffd","line":119,"range":{"start_line":119,"start_character":46,"end_line":119,"end_character":60},"in_reply_to":"df7087c5_2d917cf2","updated":"2018-03-23 17:00:31.000000000","message":"the other consideration here is how to model hugepages.\ni would suggest moving memory into a sub RP of the numa node\nwith an inventory of MEMORY_MB and traits to reflect memory size. e.g. HW_MEM_PAGESIZE_4K,HW_MEM_PAGESIZE_2MB,HW_MEM_PAGESIZE_1G\n\nwe could then use the step_size on the invetores of memory_mb to enforce the allcoation of 4k,2mb,1G chunks form thos inventories.\n\nthat would allow use to remove the hw:mem_page_size extra spec in the future","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":116,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":117,"context_line":"   | NUMA1_rp             |                 | NUMA2_rp             |"},{"line_number":118,"context_line":"   | NUMA_CORE: 8         |                 | NUMA_CORE: 8         |"},{"line_number":119,"context_line":"   | NUMA_MEMORY_MB: 4096 |                 | NUMA_MEMORY_MB: 4096 |"},{"line_number":120,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":121,"context_line":"            |                                 |"},{"line_number":122,"context_line":"      +----------+              +-----------------+"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_88ac91ed","line":119,"range":{"start_line":119,"start_character":46,"end_line":119,"end_character":60},"in_reply_to":"df7087c5_66438514","updated":"2018-03-15 13:47:38.000000000","message":"To be clear, are you saying s/NUMA_MEMORY_MB/MEMORY_MB/ or remove this from the NUMA resource provider entirely?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":116,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":117,"context_line":"   | NUMA1_rp             |                 | NUMA2_rp             |"},{"line_number":118,"context_line":"   | NUMA_CORE: 8         |                 | NUMA_CORE: 8         |"},{"line_number":119,"context_line":"   | NUMA_MEMORY_MB: 4096 |                 | NUMA_MEMORY_MB: 4096 |"},{"line_number":120,"context_line":"   +----------------------+                 +----------------------+"},{"line_number":121,"context_line":"            |                                 |"},{"line_number":122,"context_line":"      +----------+              +-----------------+"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_2d917cf2","line":119,"range":{"start_line":119,"start_character":46,"end_line":119,"end_character":60},"in_reply_to":"df7087c5_88ac91ed","updated":"2018-03-19 22:08:41.000000000","message":"the former","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"1b14c072fa3688ca049fb4ce3ee16ec72f9be116","unresolved":false,"context_lines":[{"line_number":123,"context_line":"      | PGPU1_rp |              | PF_rp           |"},{"line_number":124,"context_line":"      | VGPU: 8  |              | SRIOV_NET_VF: 8 |"},{"line_number":125,"context_line":"      +----------+              +-----------------+"},{"line_number":126,"context_line":""},{"line_number":127,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":128,"context_line":"classes :"},{"line_number":129,"context_line":"* NUMA_CORE: for telling how many cores the NUMA node has (threaded or not)."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_7f345ade","line":126,"updated":"2018-03-15 10:13:29.000000000","message":"Let me make sure that the host in the picture have totally 32 logical cores; 16 for shared and 8+8 for dedicated.\n\nThey shouldn\u0027t be mixed or duplicated in both NUMA RP and COMPUTE RP. If we mix or duplicate them, there would be inconsistency with allocation candidates API response in my understanding.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":123,"context_line":"      | PGPU1_rp |              | PF_rp           |"},{"line_number":124,"context_line":"      | VGPU: 8  |              | SRIOV_NET_VF: 8 |"},{"line_number":125,"context_line":"      +----------+              +-----------------+"},{"line_number":126,"context_line":""},{"line_number":127,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":128,"context_line":"classes :"},{"line_number":129,"context_line":"* NUMA_CORE: for telling how many cores the NUMA node has (threaded or not)."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_a83f1585","line":126,"in_reply_to":"df7087c5_7f345ade","updated":"2018-03-15 13:47:38.000000000","message":"+1. I\u0027d like to think of these as entirely separate things, in line with the mixed-cpu-policy-host spec. Maybe we should link to that?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"1b14c072fa3688ca049fb4ce3ee16ec72f9be116","unresolved":false,"context_lines":[{"line_number":126,"context_line":""},{"line_number":127,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":128,"context_line":"classes :"},{"line_number":129,"context_line":"* NUMA_CORE: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_045b9386","line":129,"range":{"start_line":129,"start_character":34,"end_line":129,"end_character":39},"updated":"2018-03-15 10:13:29.000000000","message":"\"cores for pinning\"\n... we\u0027d like to not mix shared and dedicated cores here.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"c05ff8932da7d6d9018ca28a3e4f1679ae81d299","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_66df053a","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"updated":"2018-03-14 19:21:49.000000000","message":"I can see a case for keeping an inventory of VCPU on the compute node root provider, since a PCPU !\u003d VCPU. But I can\u0027t see a case for keeping an inventory of MEMORY_MB on the compute node root provider if NUMA node providers contain MEMORY_MB inventory that is associated with just that NUMA node. In that case, it\u0027s not like the compute node as a whole has some bank of memory that is *not* associated with one of the NUMA nodes.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"c7532b12f6216894eb3e397b96ec8b5af9b3baa6","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_f40f3e85","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_66df053a","updated":"2018-03-14 21:45:58.000000000","message":"Actually it sort of does, because (with libvirt at least) if you boot an instance that doesn\u0027t have a numa topology (so 4K pages, \"shared\" vCPUs, no PCI devices, no explicit guest numa topology) then nova does not specify NUMA affinity for it and it basically floats across all available host pCPUs.\n\nThis currently causes problems with accurate per-host-NUMA-node 4KB memory accounting since we don\u0027t know how much memory these floating instances have consumed from each host NUMA node.\n\nWithin my organization we changed it to explicitly constrain such instances to a single NUMA node, but we still had to use \"preferred\" rather than \"strict\" memory allocation for 4K pages because the host overhead per NUMA node is somewhat variable.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_880e7630","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_6e523c15","updated":"2018-03-19 22:08:41.000000000","message":"I still don\u0027t understand how this can work.  If we e.g. satisfy a request for NUMA_CORE:2, the allocation will happen against the NUMA RP\u0027s inventory, which now has 6 available for a total of 14 on the system.  But the compute node\u0027s VCPU inventory still shows 16 available.\n\nDid we discuss locking down a host to doing either NUMA or not-NUMA, but not both?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"aaaf5f80629350d75b37edf209f4d9587c445b2f","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_5e123c5a","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_880e7630","updated":"2018-03-20 19:53:07.000000000","message":"I\u0027d rather not lock down a host.  The discussion that we had around the \"mixed shared and dedicated on same node\" spec talked about reporting \"pcpus for shared vcpus\" and \"pcpus for dedicated vcpus\" separately into placement, in which case consuming a NUMA_CORE does not have any implications about consuming compute node VCPUs.\n\nSuppose you have an instance with a numa_topology. (Maybe it\u0027s using hugepages, maybe it specified a particular numa topology.)  In this case you would consume some number of VCPUs from a specific NUMA node, which should *also* consume that many VCPUs from the compute node as a whole.  This would mean that we should track VCPUs as a per-NUMA-node resource as well though, I think.\n\nBut that becomes problematic if you have an instance that is not pinned to a NUMA node--you\u0027d consume some number of VCPUs from the compute node as a whole, but how do you account for that node-wide usage on each NUMA node?\n\nGiven my earlier comment above, with \"shared\" vCPUs and 4KB pages it might make sense to let it float across the whole compute node, but with \"shared\" vCPUs and 2MB pages it makes sense to restrict it to the pCPUs on a particular NUMA node.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"5122bd5c205a09534dddb4870961acb1296a03f9","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_cc0046ae","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_88925156","updated":"2018-03-16 04:14:59.000000000","message":"\u003e We hardly need a \u0027dedicated_memory\u0027 configuration option, do we? :)\n\nNo, we don\u0027t.\n\nI\u0027m good with the approach to copy COMPUTE RP\u0027s MEMORY_MB to each NUMA RP\u0027s NUMA_MEMORY_MB, and requesting both resource classes when booting an instance with numatopology and requesting only COMPUTE RP\u0027s MEMORY_MB when booting an instance with no numatopology. Am I getting you?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_88925156","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_9fa60eab","updated":"2018-03-15 13:47:38.000000000","message":"\u003e So we allocate memory from COMPUTE RP(MEMORY_MB) in the case where\n \u003e we can\u0027t say  \"we allocate memory from this numa node\", and from\n \u003e NUMA RP(NUMA_MEMORY_MB) when the NUMATopology of the instance is\n \u003e enabled ... really? Sounds confusing to me. Operators would be\n \u003e confused when they check if the memory is full or not.\n\nAgreed, but...\n\n \u003e I prefer to stick to allocate memory from COMPUTE RP(MEMORY_MB),\n \u003e which is how it works today.\n\nSay you have 2 sockets, 8 cores, no threads and 16GB of RAM (8GB per socket). If you boot an instance with four cores from node 0 and 8GB of RAM, then attempting to boot another four core instance will end up attempting to use the other four CPUs from node 0, right? This will either fail or will result in awful performance. You could also replicate something similar with asymmetric placement of RAM on the physical host (say, 128GB for node 0 and 384 GB for node 1).\n\nWe hardly need a \u0027dedicated_memory\u0027 configuration option, do we? :)","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"fdb0767e4dfae20269b2dc596ca9af34d4a9bcee","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_6e523c15","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_cc0046ae","updated":"2018-03-16 17:20:00.000000000","message":"For hugepages we really do have per-NUMA amounts and they are always consumed on a per-NUMA basis.  So they should be tracked as per-NUMA resources.\n\nFor 4KB pages they can be consumed on a per-NUMA basis (for instances with \"dedicated\" vCPUs, or PCI devices, or an explicit NUMA topology) or they can be consumed on a compute node basis (for instances with no numa_topology).\n\nWhat I would propose is a variation on what we have done internally.  Account for 4KB pages for the compute node as a whole, and use \"mode\u003dpreferred\" for that memory so that it\u0027ll try to be NUMA-local but fall back to NUMA-remote if needed.\n\nAlternately, change instances with no numa_topology (which implies 4KB pages) to be constrained to a single host NUMA node.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"1b14c072fa3688ca049fb4ce3ee16ec72f9be116","unresolved":false,"context_lines":[{"line_number":130,"context_line":"* NUMA_MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":131,"context_line":""},{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_9fa60eab","line":134,"range":{"start_line":133,"start_character":60,"end_line":134,"end_character":27},"in_reply_to":"df7087c5_f40f3e85","updated":"2018-03-15 10:13:29.000000000","message":"So we allocate memory from COMPUTE RP(MEMORY_MB) in the case where we can\u0027t say  \"we allocate memory from this numa node\", and from NUMA RP(NUMA_MEMORY_MB) when the NUMATopology of the instance is enabled ... really? Sounds confusing to me. Operators would be confused when they check if the memory is full or not.\n\nI prefer to stick to allocate memory from COMPUTE RP(MEMORY_MB), which is how it works today.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_549a94b8","line":135,"range":{"start_line":135,"start_character":8,"end_line":135,"end_character":19},"updated":"2018-03-15 13:47:38.000000000","message":"``NUMA_SOCKET``","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_5b94d7e8","line":135,"range":{"start_line":135,"start_character":8,"end_line":135,"end_character":19},"in_reply_to":"df7087c5_549a94b8","updated":"2018-03-23 17:00:31.000000000","message":"please never use the term numa socket.\na singel proces socket can have 0,1 or 2+\nnuma nodes associated with it.\n\nif we have this resouce class please call it NUMA_NODE","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":132,"context_line":"The root Resource Provider (ie. the compute node) would then provide the same"},{"line_number":133,"context_line":"existing resources than if it wouldn\u0027t have a NUMA topology (for VCPU and"},{"line_number":134,"context_line":"MEMORY_MB resource classes), *but* would also add a specific resource class"},{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."},{"line_number":139,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_3ff7d8d5","line":136,"range":{"start_line":135,"start_character":26,"end_line":136,"end_character":56},"updated":"2018-03-14 16:44:32.000000000","message":"grammar nit: should be something like \"...would specify how many NUMA nodes the compute node has.\"   (And if it\u0027s not a NUMA architecture it would just degenerate to a single NUMA node, no?)","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":141,"context_line":"each one would have specific resource classes. For example:"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_5fa88c8a","line":138,"range":{"start_line":138,"start_character":4,"end_line":138,"end_character":78},"updated":"2018-03-14 16:44:32.000000000","message":"Is this really what you meant?  It seems like it would be more accurate to say that this is how many NUMA nodes the host actually has.\n\nCurrently \"hw:numa_nodes\" in the extra specs tells us how many NUMA nodes the instance should use.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":141,"context_line":"each one would have specific resource classes. For example:"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_fb728be3","line":138,"range":{"start_line":138,"start_character":4,"end_line":138,"end_character":78},"in_reply_to":"df7087c5_0851060f","updated":"2018-03-23 17:00:31.000000000","message":"+1 for trait.\n\nto have a resouce class you would have to create an inventory of that clase on the numa node RP with a cpasity of 0 to get the effect you discibe. a trait is much cleaner.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":141,"context_line":"each one would have specific resource classes. For example:"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_7448f82b","line":138,"range":{"start_line":138,"start_character":4,"end_line":138,"end_character":78},"in_reply_to":"df7087c5_5fa88c8a","updated":"2018-03-15 13:47:38.000000000","message":"+1. It\u0027s also possible that an instance could have two NUMA nodes and these could be placed on the same host NUMA node. You can\u0027t directly map host nodes to instance nodes.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":135,"context_line":"called `NUMA_SOCKET` that would help to know how many NUMA nodes is having"},{"line_number":136,"context_line":"the compute node (and whether it\u0027s a NUMA architecture)."},{"line_number":137,"context_line":"Note that the ``NUMA_SOCKET`` resource class won\u0027t have allocations against it,"},{"line_number":138,"context_line":"but will just be used for telling how many NUMA nodes the instance should use."},{"line_number":139,"context_line":""},{"line_number":140,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":141,"context_line":"each one would have specific resource classes. For example:"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_0851060f","line":138,"range":{"start_line":138,"start_character":4,"end_line":138,"end_character":78},"in_reply_to":"df7087c5_7448f82b","updated":"2018-03-19 22:08:41.000000000","message":"I\u0027m not a fan of this idea of \"unconsumable inventory\".  A more natural (and in fact more useful) model would be to mark each of the NUMA RPs with a THIS_IS_A_NUMA_NODE trait.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":142,"context_line":""},{"line_number":143,"context_line":"* GPU devices are having a specific VGPU resource class that counts the number"},{"line_number":144,"context_line":"  of virtual GPUs it can create."},{"line_number":145,"context_line":"* NIC with SRIOV phyical function could count the number of virtual functions"},{"line_number":146,"context_line":"  it can create."},{"line_number":147,"context_line":""},{"line_number":148,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_48461e52","line":145,"range":{"start_line":145,"start_character":17,"end_line":145,"end_character":24},"updated":"2018-03-19 22:08:41.000000000","message":"physical.\n\nAnd you should have one RP per SR-IOV PF, not for the whole NIC.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":148,"context_line":"Asking for NUMA resources tied to a NUMA node or not"},{"line_number":149,"context_line":"----------------------------------------------------"},{"line_number":150,"context_line":""},{"line_number":151,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":152,"context_line":"resources like this:"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"* if I want ``NB_NUMA`` specific NUMA nodes for ``NB_CPUS`` VCPUs, then the"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_f43c0894","line":151,"range":{"start_line":151,"start_character":12,"end_line":151,"end_character":22},"updated":"2018-03-15 13:47:38.000000000","message":"nit: maybe add a backreference","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":151,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":152,"context_line":"resources like this:"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"* if I want ``NB_NUMA`` specific NUMA nodes for ``NB_CPUS`` VCPUs, then the"},{"line_number":155,"context_line":"  flavor extraspec could be ``NUMA_SOCKETS\u003dNB_NUMA`` for a ``NB_CPUS`` flavor."},{"line_number":156,"context_line":"  There wouldn\u0027t be any guarantee that you would end up with all your vCPUs"},{"line_number":157,"context_line":"  spread into NB_NUMA nodes, but just an affinity try."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_94582cfa","line":154,"range":{"start_line":154,"start_character":14,"end_line":154,"end_character":21},"updated":"2018-03-15 13:47:38.000000000","message":"Can you just use ``N`` and ``M`` here. I thought you were talking about traits here for a moment","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":151,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":152,"context_line":"resources like this:"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"* if I want ``NB_NUMA`` specific NUMA nodes for ``NB_CPUS`` VCPUs, then the"},{"line_number":155,"context_line":"  flavor extraspec could be ``NUMA_SOCKETS\u003dNB_NUMA`` for a ``NB_CPUS`` flavor."},{"line_number":156,"context_line":"  There wouldn\u0027t be any guarantee that you would end up with all your vCPUs"},{"line_number":157,"context_line":"  spread into NB_NUMA nodes, but just an affinity try."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_a8a33a00","line":154,"range":{"start_line":154,"start_character":14,"end_line":154,"end_character":21},"in_reply_to":"df7087c5_1e397fd5","updated":"2018-03-19 22:08:41.000000000","message":"Note that you have to do some backslashing to get it attached to the rest of the string, though.  See for example: https://review.openstack.org/#/c/540179/3/specs/rocky/approved/granular-resource-requests.rst@182\n\nI\u0027m gonna say I don\u0027t understand this paragraph.  Is \u0027NB\u0027 some kind of parameter?  Perhaps a concrete example?","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"d5cecfa575985cccb7ae15a476e4c04beffd79b4","unresolved":false,"context_lines":[{"line_number":151,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":152,"context_line":"resources like this:"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"* if I want ``NB_NUMA`` specific NUMA nodes for ``NB_CPUS`` VCPUs, then the"},{"line_number":155,"context_line":"  flavor extraspec could be ``NUMA_SOCKETS\u003dNB_NUMA`` for a ``NB_CPUS`` flavor."},{"line_number":156,"context_line":"  There wouldn\u0027t be any guarantee that you would end up with all your vCPUs"},{"line_number":157,"context_line":"  spread into NB_NUMA nodes, but just an affinity try."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_1e397fd5","line":154,"range":{"start_line":154,"start_character":14,"end_line":154,"end_character":21},"in_reply_to":"df7087c5_2ca0e28f","updated":"2018-03-16 09:29:34.000000000","message":"You should use those, actually. Single backticks means \"default role\" which can be anything (it just currently defaults to italics). You\u0027re better off using single asterisks (*N*) which are guaranteed to be the same everywhere","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"5122bd5c205a09534dddb4870961acb1296a03f9","unresolved":false,"context_lines":[{"line_number":151,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":152,"context_line":"resources like this:"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"* if I want ``NB_NUMA`` specific NUMA nodes for ``NB_CPUS`` VCPUs, then the"},{"line_number":155,"context_line":"  flavor extraspec could be ``NUMA_SOCKETS\u003dNB_NUMA`` for a ``NB_CPUS`` flavor."},{"line_number":156,"context_line":"  There wouldn\u0027t be any guarantee that you would end up with all your vCPUs"},{"line_number":157,"context_line":"  spread into NB_NUMA nodes, but just an affinity try."}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_2ca0e28f","line":154,"range":{"start_line":154,"start_character":14,"end_line":154,"end_character":21},"in_reply_to":"df7087c5_94582cfa","updated":"2018-03-16 04:14:59.000000000","message":"You can use single backtick (`N`) to distinguish general numbers (`N` can be 1, 2, 100 or anything) from unique parameter (``SOME_TRAIT``).","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":152,"context_line":"resources like this:"},{"line_number":153,"context_line":""},{"line_number":154,"context_line":"* if I want ``NB_NUMA`` specific NUMA nodes for ``NB_CPUS`` VCPUs, then the"},{"line_number":155,"context_line":"  flavor extraspec could be ``NUMA_SOCKETS\u003dNB_NUMA`` for a ``NB_CPUS`` flavor."},{"line_number":156,"context_line":"  There wouldn\u0027t be any guarantee that you would end up with all your vCPUs"},{"line_number":157,"context_line":"  spread into NB_NUMA nodes, but just an affinity try."},{"line_number":158,"context_line":""}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_146c1c9a","line":155,"range":{"start_line":155,"start_character":9,"end_line":155,"end_character":18},"updated":"2018-03-15 13:47:38.000000000","message":"nit: extra spec","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Again, ``NUMA_SOCKET`` resource class wouldn\u0027t have allocations against it."},{"line_number":160,"context_line":""},{"line_number":161,"context_line":"But it could be also possible to ask for specific NUMA resources for one node"},{"line_number":162,"context_line":"by telling this: ``resources:NUMA_CORES\u003d2\u0026NUMA_MEMORY_MB\u003d1024``. In that case,"},{"line_number":163,"context_line":"it would make sure that all the resources are tied to a specific NUMA node."},{"line_number":164,"context_line":"In case all the NUMA nodes wouldn\u0027t have enough resources, then a NoValidHost"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_3f5ef843","line":161,"range":{"start_line":161,"start_character":0,"end_line":161,"end_character":77},"updated":"2018-03-14 16:44:32.000000000","message":"I think we\u0027d want a way to express what we currently have in the extra specs for numa nodes.  For example, \"I want one numa node with \"X\" vCPUs and \"Y\" RAM, and another numa node with \"N\" vCPUs and \"M\" RAM\".\n\nOr could we automatically translate the existing hw:numa_nodes/hw:numa_cpus.N/hw:numa_mem.N extra specs into a request to placement?  That would have the benefit of backwards compatibility.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Again, ``NUMA_SOCKET`` resource class wouldn\u0027t have allocations against it."},{"line_number":160,"context_line":""},{"line_number":161,"context_line":"But it could be also possible to ask for specific NUMA resources for one node"},{"line_number":162,"context_line":"by telling this: ``resources:NUMA_CORES\u003d2\u0026NUMA_MEMORY_MB\u003d1024``. In that case,"},{"line_number":163,"context_line":"it would make sure that all the resources are tied to a specific NUMA node."},{"line_number":164,"context_line":"In case all the NUMA nodes wouldn\u0027t have enough resources, then a NoValidHost"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_f433a8a9","line":161,"range":{"start_line":161,"start_character":0,"end_line":161,"end_character":77},"in_reply_to":"df7087c5_3f5ef843","updated":"2018-03-15 13:47:38.000000000","message":"I\u0027d prefer the latter. I\u0027m not yet convinced that we need a new way to specify this stuff when the old way is mostly ok","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":164,"context_line":"In case all the NUMA nodes wouldn\u0027t have enough resources, then a NoValidHost"},{"line_number":165,"context_line":"exception should be raised."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: It\u0027s the operator\u0027s responsibility to make flavors consistent between"},{"line_number":168,"context_line":"          respectively VCPU and NUMA_CORES resource classes and MEMORY_MB and"},{"line_number":169,"context_line":"          NUMA_MEMORY_MB resource classes, so that allocations would be"},{"line_number":170,"context_line":"          consistent for both resource classes:"},{"line_number":171,"context_line":"          * where VCPU and MEMORY_MB would be done against the root RP"},{"line_number":172,"context_line":"          * and NUMA_CORES and NUMA_MEMORY_MB would be against a child NUMA RP"},{"line_number":173,"context_line":""},{"line_number":174,"context_line":"If the operator would like to provide a flavor for asking resources for more"},{"line_number":175,"context_line":"than just one NUMA node, then the usecase would be related to the"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_32686fb7","line":172,"range":{"start_line":167,"start_character":9,"end_line":172,"end_character":78},"updated":"2018-03-14 16:44:32.000000000","message":"I don\u0027t really like pushing this onto the operator, it seems prone to error.  I think nova could automatically convert the existing extra-specs into a placement request.  Like if you specified 3 vCPUs, with a CPU policy of \"dedicated\", then we could ask for 3 NUMA_COREs, but if the CPU policy is \"shared\" then we\u0027d ask for 3 VCPUs.  If we approved the \"mixed shared and dedicated vCPUs in same instance\" spec then we could allow an explicit override where we ask for X \"shared\" vCPUs and Y \"dedicated\" vCPUs.\n\nThis would have the nice property that it \"just works\" with existing flavors, without requiring operators to update all their extra-specs with resource requests.\n\nIf we were going to do what you have here, I think it would make sense to add validation to the resource extra-specs to (for example) ensure that we don\u0027t ask for more resources than are specified in the flavor.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":164,"context_line":"In case all the NUMA nodes wouldn\u0027t have enough resources, then a NoValidHost"},{"line_number":165,"context_line":"exception should be raised."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: It\u0027s the operator\u0027s responsibility to make flavors consistent between"},{"line_number":168,"context_line":"          respectively VCPU and NUMA_CORES resource classes and MEMORY_MB and"},{"line_number":169,"context_line":"          NUMA_MEMORY_MB resource classes, so that allocations would be"},{"line_number":170,"context_line":"          consistent for both resource classes:"},{"line_number":171,"context_line":"          * where VCPU and MEMORY_MB would be done against the root RP"},{"line_number":172,"context_line":"          * and NUMA_CORES and NUMA_MEMORY_MB would be against a child NUMA RP"},{"line_number":173,"context_line":""},{"line_number":174,"context_line":"If the operator would like to provide a flavor for asking resources for more"},{"line_number":175,"context_line":"than just one NUMA node, then the usecase would be related to the"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_d4d5441d","line":172,"range":{"start_line":167,"start_character":9,"end_line":172,"end_character":78},"in_reply_to":"df7087c5_32686fb7","updated":"2018-03-15 13:47:38.000000000","message":"+1, for all the same reasons","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":164,"context_line":"In case all the NUMA nodes wouldn\u0027t have enough resources, then a NoValidHost"},{"line_number":165,"context_line":"exception should be raised."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: It\u0027s the operator\u0027s responsibility to make flavors consistent between"},{"line_number":168,"context_line":"          respectively VCPU and NUMA_CORES resource classes and MEMORY_MB and"},{"line_number":169,"context_line":"          NUMA_MEMORY_MB resource classes, so that allocations would be"},{"line_number":170,"context_line":"          consistent for both resource classes:"},{"line_number":171,"context_line":"          * where VCPU and MEMORY_MB would be done against the root RP"},{"line_number":172,"context_line":"          * and NUMA_CORES and NUMA_MEMORY_MB would be against a child NUMA RP"},{"line_number":173,"context_line":""},{"line_number":174,"context_line":"If the operator would like to provide a flavor for asking resources for more"},{"line_number":175,"context_line":"than just one NUMA node, then the usecase would be related to the"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_1b13bf68","line":172,"range":{"start_line":167,"start_character":9,"end_line":172,"end_character":78},"in_reply_to":"df7087c5_d4d5441d","updated":"2018-03-23 17:00:31.000000000","message":"+1","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":172,"context_line":"          * and NUMA_CORES and NUMA_MEMORY_MB would be against a child NUMA RP"},{"line_number":173,"context_line":""},{"line_number":174,"context_line":"If the operator would like to provide a flavor for asking resources for more"},{"line_number":175,"context_line":"than just one NUMA node, then the usecase would be related to the"},{"line_number":176,"context_line":"`Granular Resource Request`_ spec and shouldn\u0027t be discussed in this spec."},{"line_number":177,"context_line":""},{"line_number":178,"context_line":"Asking for NUMA-associated devices"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_54e934e3","line":175,"range":{"start_line":175,"start_character":34,"end_line":175,"end_character":41},"updated":"2018-03-15 13:47:38.000000000","message":"use case","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":181,"context_line":"The same logic could be done for associated devices that are children of NUMA"},{"line_number":182,"context_line":"nodes:"},{"line_number":183,"context_line":""},{"line_number":184,"context_line":"* If I just want to provide NB_GPU vGPUs for an instance, I would just amend"},{"line_number":185,"context_line":"  a flavor and ask for ``resources:VGPU\u003dNB_GPU``. No NUMA relationship would be"},{"line_number":186,"context_line":"  verified, exactly like the current behaviour."},{"line_number":187,"context_line":"* If I want NB_GPU vGPUs *and* if possible tied to a specific NUMA node, then"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_d4bea4d3","line":184,"range":{"start_line":184,"start_character":28,"end_line":184,"end_character":34},"updated":"2018-03-15 13:47:38.000000000","message":"``N``","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":184,"context_line":"* If I just want to provide NB_GPU vGPUs for an instance, I would just amend"},{"line_number":185,"context_line":"  a flavor and ask for ``resources:VGPU\u003dNB_GPU``. No NUMA relationship would be"},{"line_number":186,"context_line":"  verified, exactly like the current behaviour."},{"line_number":187,"context_line":"* If I want NB_GPU vGPUs *and* if possible tied to a specific NUMA node, then"},{"line_number":188,"context_line":"  I\u0027d propose ``resources:VGPU\u003dNB_GPU\u0026NUMA_SOCKET\u003d1``. If the Placement API"},{"line_number":189,"context_line":"  can\u0027t satisfy the fact that the vCPUs and the vGPUs wouldn\u0027t be in the same"},{"line_number":190,"context_line":"  NUMA node, that shouldn\u0027t be blocking."},{"line_number":191,"context_line":"* If I want NB_GPU vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":192,"context_line":"  my vCPUs, then I\u0027d amend my flavor by"},{"line_number":193,"context_line":"  ``resouces:VGPU\u003dNB_GPU\u0026NUMA_CORES\u003dNB_CPU`` (where NB_CPU would be the same"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_d28ed377","line":190,"range":{"start_line":187,"start_character":2,"end_line":190,"end_character":40},"updated":"2018-03-14 16:44:32.000000000","message":"Is this the host NUMA node 1?  If so, I don\u0027t think we\u0027d want to expose host details in this way.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"0fc45ce5eb5e55eb445a923a768ffa6509bf8399","unresolved":false,"context_lines":[{"line_number":184,"context_line":"* If I just want to provide NB_GPU vGPUs for an instance, I would just amend"},{"line_number":185,"context_line":"  a flavor and ask for ``resources:VGPU\u003dNB_GPU``. No NUMA relationship would be"},{"line_number":186,"context_line":"  verified, exactly like the current behaviour."},{"line_number":187,"context_line":"* If I want NB_GPU vGPUs *and* if possible tied to a specific NUMA node, then"},{"line_number":188,"context_line":"  I\u0027d propose ``resources:VGPU\u003dNB_GPU\u0026NUMA_SOCKET\u003d1``. If the Placement API"},{"line_number":189,"context_line":"  can\u0027t satisfy the fact that the vCPUs and the vGPUs wouldn\u0027t be in the same"},{"line_number":190,"context_line":"  NUMA node, that shouldn\u0027t be blocking."},{"line_number":191,"context_line":"* If I want NB_GPU vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":192,"context_line":"  my vCPUs, then I\u0027d amend my flavor by"},{"line_number":193,"context_line":"  ``resouces:VGPU\u003dNB_GPU\u0026NUMA_CORES\u003dNB_CPU`` (where NB_CPU would be the same"}],"source_content_type":"text/x-rst","patch_set":3,"id":"bf659307_7e2c2999","line":190,"range":{"start_line":187,"start_character":2,"end_line":190,"end_character":40},"in_reply_to":"df7087c5_68dbe27c","updated":"2018-03-23 17:00:31.000000000","message":"if this a host numa node then we are revirting to the buggy behavior we had with numa nodes in icehouse.\n\nlets not allow even operators to specify host numa nodes directly.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"ab710323c6cd9361242769eaec83df59d5997692","unresolved":false,"context_lines":[{"line_number":184,"context_line":"* If I just want to provide NB_GPU vGPUs for an instance, I would just amend"},{"line_number":185,"context_line":"  a flavor and ask for ``resources:VGPU\u003dNB_GPU``. No NUMA relationship would be"},{"line_number":186,"context_line":"  verified, exactly like the current behaviour."},{"line_number":187,"context_line":"* If I want NB_GPU vGPUs *and* if possible tied to a specific NUMA node, then"},{"line_number":188,"context_line":"  I\u0027d propose ``resources:VGPU\u003dNB_GPU\u0026NUMA_SOCKET\u003d1``. If the Placement API"},{"line_number":189,"context_line":"  can\u0027t satisfy the fact that the vCPUs and the vGPUs wouldn\u0027t be in the same"},{"line_number":190,"context_line":"  NUMA node, that shouldn\u0027t be blocking."},{"line_number":191,"context_line":"* If I want NB_GPU vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":192,"context_line":"  my vCPUs, then I\u0027d amend my flavor by"},{"line_number":193,"context_line":"  ``resouces:VGPU\u003dNB_GPU\u0026NUMA_CORES\u003dNB_CPU`` (where NB_CPU would be the same"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_68dbe27c","line":190,"range":{"start_line":187,"start_character":2,"end_line":190,"end_character":40},"in_reply_to":"df7087c5_d28ed377","updated":"2018-03-19 22:08:41.000000000","message":"Agree.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"6a887ebdc8ef2edc99f5bc47108dd455e137b74d","unresolved":false,"context_lines":[{"line_number":188,"context_line":"  I\u0027d propose ``resources:VGPU\u003dNB_GPU\u0026NUMA_SOCKET\u003d1``. If the Placement API"},{"line_number":189,"context_line":"  can\u0027t satisfy the fact that the vCPUs and the vGPUs wouldn\u0027t be in the same"},{"line_number":190,"context_line":"  NUMA node, that shouldn\u0027t be blocking."},{"line_number":191,"context_line":"* If I want NB_GPU vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":192,"context_line":"  my vCPUs, then I\u0027d amend my flavor by"},{"line_number":193,"context_line":"  ``resouces:VGPU\u003dNB_GPU\u0026NUMA_CORES\u003dNB_CPU`` (where NB_CPU would be the same"},{"line_number":194,"context_line":"  value than my flavor vCPU number)"},{"line_number":195,"context_line":""},{"line_number":196,"context_line":"Alternatives"},{"line_number":197,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_92613bac","line":194,"range":{"start_line":191,"start_character":2,"end_line":194,"end_character":35},"updated":"2018-03-14 16:44:32.000000000","message":"What about the configurable PCI NUMA affinity stuff added in https://review.openstack.org/#/c/527472/ ?  If we support non-strict affinity for GPUs then doing strict affinity in placement might cause scheduling to fail when it should actually succeed.","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"184452f6c40b2794bfc20125fe7b7aabbb140b2e","unresolved":false,"context_lines":[{"line_number":261,"context_line":"Work Items"},{"line_number":262,"context_line":"----------"},{"line_number":263,"context_line":""},{"line_number":264,"context_line":"* Virt drivers passing NUMA topology thru `update_provider_tree()` API"},{"line_number":265,"context_line":"* Scheduler not allocating resources for ``NUMA_SOCKET`` resource class"},{"line_number":266,"context_line":""},{"line_number":267,"context_line":"Dependencies"}],"source_content_type":"text/x-rst","patch_set":3,"id":"df7087c5_54fcd46e","line":264,"range":{"start_line":264,"start_character":42,"end_line":264,"end_character":66},"updated":"2018-03-15 13:47:38.000000000","message":"``update_provider_tree()``","commit_id":"416731f5d080ec8e57a389c05ec003762a8dfe76"},{"author":{"_account_id":7730,"name":"Sahid Orentino Ferdjaoui","email":"sahid.ferdjaoui@industrialdiscipline.com","username":"sahid"},"change_message_id":"754bf14dd7f054a2633f0658a15781d8118eb89e","unresolved":false,"context_lines":[{"line_number":150,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":151,"context_line":"  it can create."},{"line_number":152,"context_line":""},{"line_number":153,"context_line":".. note:: Enabling `huge pages`_ (or specific memory page size) could be done"},{"line_number":154,"context_line":"          using traits defining the supported page size (eg."},{"line_number":155,"context_line":"          HW_MEM_PAGESIZE_1GB for a 1GB page size) for the NUMA node (so the"},{"line_number":156,"context_line":"          Placement API would know which Resource Provider could support that"},{"line_number":157,"context_line":"          page size) but also with the MEMORY_MB inventory for that NUMA node"},{"line_number":158,"context_line":"          be having a step_size of the large page size (here 1024)."},{"line_number":159,"context_line":"          That said, that feature has to be discussed in a separate spec."},{"line_number":160,"context_line":""},{"line_number":161,"context_line":""},{"line_number":162,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":5,"id":"bf659307_525d4ae7","line":159,"range":{"start_line":153,"start_character":1,"end_line":159,"end_character":73},"updated":"2018-03-27 12:51:28.000000000","message":"That does not look right, hugepages is not something we could allocate dynamically so I would imagine if MEMORY_MB is small pages, we will need to have a MEMORY_MB_2MB, MEMORY_MB_1G etc... to request them like\n\n  resources:VCPU2\u0026MEMORY_MB_1G\u003d16\n  resources0:VCPU2\u0026MEMORY_MB_1G\u003d8,resources1:VCPU2\u0026MEMORY_MB_1G\u003d8\n\nI would use traits to mark the memory as shared, or bind them to a file...","commit_id":"83cbc3a14ac624e963a761d393762c40222e3bd8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":14,"context_line":"Nova compute nodes, we could use the Resource Providers tree for explaining"},{"line_number":15,"context_line":"the relationship between a root Resource Provider (root RP) ie. a compute node,"},{"line_number":16,"context_line":"and one or more Non-Uniform Memory Access (NUMA) nodes (aka. cells), each of"},{"line_number":17,"context_line":"them having separate resources, like memory or PCI devices, even if any core"},{"line_number":18,"context_line":"can access those resources."},{"line_number":19,"context_line":""},{"line_number":20,"context_line":"Problem description"},{"line_number":21,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_64071e84","line":18,"range":{"start_line":17,"start_character":58,"end_line":18,"end_character":27},"updated":"2018-03-29 21:04:56.000000000","message":"I\u0027m not sure this last part of the sentence is useful.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":15,"context_line":"the relationship between a root Resource Provider (root RP) ie. a compute node,"},{"line_number":16,"context_line":"and one or more Non-Uniform Memory Access (NUMA) nodes (aka. cells), each of"},{"line_number":17,"context_line":"them having separate resources, like memory or PCI devices, even if any core"},{"line_number":18,"context_line":"can access those resources."},{"line_number":19,"context_line":""},{"line_number":20,"context_line":"Problem description"},{"line_number":21,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_62e8237b","line":18,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"i.e. end sentence at pci devices.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":21,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":22,"context_line":""},{"line_number":23,"context_line":"The NUMATopologyFilter checks a lot of NUMA related resources like emulator"},{"line_number":24,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_44021a71","line":25,"range":{"start_line":24,"start_character":61,"end_line":25,"end_character":24},"updated":"2018-03-29 21:04:56.000000000","message":"I think you meant \"While we would like to eventually get rid of this filter,\"","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"The NUMATopologyFilter checks a lot of NUMA related resources like emulator"},{"line_number":24,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_e2a99323","line":25,"in_reply_to":"bf659307_44021a71","updated":"2018-04-04 01:37:05.000000000","message":"+1","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":22,"context_line":""},{"line_number":23,"context_line":"The NUMATopologyFilter checks a lot of NUMA related resources like emulator"},{"line_number":24,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"Since the Placement API allocation would be against a specific NUMA node,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_84f6f262","line":26,"range":{"start_line":25,"start_character":26,"end_line":26,"end_character":70},"updated":"2018-03-29 21:04:56.000000000","message":"That\u0027s not really the problem. The problem is that all of the following things have been tightly coupled into the NUMATopologyFilter and related object models:\n\n* Constraints for pinning guest CPUs to host CPUs\n* Constraints for pinning emulator threads to host CPUs\n* Constraints for the number of memory pages of a particular size that are assigned to one or more of a guest\u0027s virtual NUMA node\n* Constraints regarding the virtual CPU topology (sockets, cores and hyperthreads) being \"fit\" on to a host CPU topology (regardless of whether the host system is NUMA or otherwise)\n* Constraints regarding whether or not a guest CPU can be pinned to a host hyperthread or whether it must be pinned to a host physical core\n\nIn addition to the above, there is the problem that the NUMATopology object couples both the *inventory* of the host along with the *usage* of CPU topologies/huge pages by one or more instances all in the same object.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":23,"context_line":"The NUMATopologyFilter checks a lot of NUMA related resources like emulator"},{"line_number":24,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"Since the Placement API allocation would be against a specific NUMA node,"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_82b9b779","line":26,"in_reply_to":"bf659307_84f6f262","updated":"2018-04-04 01:37:05.000000000","message":"It also dose that coupleing via a giant json blob in the db which is less the ideal, not least of which because of upgrades and the requirements for online schma migration to transform the blob when new field are added to it across upgrades","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":24,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"Since the Placement API allocation would be against a specific NUMA node,"},{"line_number":30,"context_line":"the virt driver could just use that allocation for creating the instance."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_24b5a67e","line":27,"range":{"start_line":27,"start_character":0,"end_line":27,"end_character":69},"updated":"2018-03-29 21:04:56.000000000","message":"I would write this as \"We can model the CPU topology of the host as a set of resource providers arranged in a tree.\"\n\nIt\u0027s not about NUMA, really. This is about modeling in placement the inventories of dedicated and shared CPU (and later, inventories of huge page resources) in a way that matches the model of the host\u0027s CPU layout.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":24,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":25,"context_line":"like to keep that filter, the problem is that the NUMA topology is directly"},{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"Since the Placement API allocation would be against a specific NUMA node,"},{"line_number":30,"context_line":"the virt driver could just use that allocation for creating the instance."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_22278bcc","line":27,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"Right so on a non numa system with multiple sockets you still might want to have a tree structure to model the relationship and hierarchy of the physical platform but since the spec is focusing on modelling numa i dont think we need to stress the even more general case beyond numa for why we would want a tree. I do like your rephrasing however","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":26,"context_line":"verified by a ``nova.virt.hardware._numa_fit_instance_cell()`` method."},{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"Since the Placement API allocation would be against a specific NUMA node,"},{"line_number":30,"context_line":"the virt driver could just use that allocation for creating the instance."},{"line_number":31,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":32,"context_line":"within a NUMA node for a vCPU`_ or `customizing the instance CPU topology`_)"},{"line_number":33,"context_line":"would still be only done by the virt driver, and unrelated of that spec."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_8452723a","line":30,"range":{"start_line":29,"start_character":0,"end_line":30,"end_character":73},"updated":"2018-03-29 21:04:56.000000000","message":"I\u0027m not sure what you\u0027re going for with this sentence. Perhaps you mean this?\n\nIf an instance is allocated dedicated CPU or memory page resources from a resource provider representing a specific NUMA node on a compute host, then we will be able to accurately query usage information for dedicated CPUs and memory pages resources in the same fashion we do for other resource classes like disk and RAM.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":27,"context_line":"Instead, we could use the Placement API for knowing the NUMA topology, and"},{"line_number":28,"context_line":"just directly allocating a specific instance to a NUMA node if possible."},{"line_number":29,"context_line":"Since the Placement API allocation would be against a specific NUMA node,"},{"line_number":30,"context_line":"the virt driver could just use that allocation for creating the instance."},{"line_number":31,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":32,"context_line":"within a NUMA node for a vCPU`_ or `customizing the instance CPU topology`_)"},{"line_number":33,"context_line":"would still be only done by the virt driver, and unrelated of that spec."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_fd43743d","line":30,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"I think what he was highlighting was that we will need a way to corralate  a specific host numa node to the resource provider in placement the tracks its resource inventories. So when the virt driver it is doing the assignment of hugepage to a vm it can use the allocation candidate to select which host numa node to allocate the hugepages from based on the RP it was allocated from.  For Vf or cpus where the virt driver still needs to track individual assignments it will still have some additional work to do as that info will not be provided on the allocation candidate but even in this case the selection of the rp will reduce the set of cpus that the virt driver has to look at as the rp will have determined which host numa node the assignment will come from","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":30,"context_line":"the virt driver could just use that allocation for creating the instance."},{"line_number":31,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":32,"context_line":"within a NUMA node for a vCPU`_ or `customizing the instance CPU topology`_)"},{"line_number":33,"context_line":"would still be only done by the virt driver, and unrelated of that spec."},{"line_number":34,"context_line":""},{"line_number":35,"context_line":"Use Cases"},{"line_number":36,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_21a27f4c","line":33,"range":{"start_line":33,"start_character":43,"end_line":33,"end_character":71},"updated":"2018-03-29 15:33:20.000000000","message":"and are not covered by this spec.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":38,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-core instance"},{"line_number":39,"context_line":"----------------------------------------------------------------------------"},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"Consider the following NUMA topology for a 2-sockets, 4 cores with no"},{"line_number":42,"context_line":"Hyper-Threading:"},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_a1a68f52","line":41,"range":{"start_line":41,"start_character":41,"end_line":41,"end_character":61},"updated":"2018-03-29 15:33:20.000000000","message":"a two-socket, four-core host?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":52,"context_line":"        +----+ +----+      +----+ +----+"},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":55,"context_line":"controller, but not CPU3 and CPU4 that would be on a second one."},{"line_number":56,"context_line":""},{"line_number":57,"context_line":"Ideally, compute and I/O intensive applications that would require more than"},{"line_number":58,"context_line":"one core would try to make sure that both cores are allocated on the same NUMA"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_21491f8a","line":55,"range":{"start_line":55,"start_character":13,"end_line":55,"end_character":64},"updated":"2018-03-29 15:33:20.000000000","message":"while CPU3 and CPU4 would share their own memory.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":52,"context_line":"        +----+ +----+      +----+ +----+"},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":55,"context_line":"controller, but not CPU3 and CPU4 that would be on a second one."},{"line_number":56,"context_line":""},{"line_number":57,"context_line":"Ideally, compute and I/O intensive applications that would require more than"},{"line_number":58,"context_line":"one core would try to make sure that both cores are allocated on the same NUMA"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_1dd2e828","line":55,"in_reply_to":"bf659307_21491f8a","updated":"2018-04-04 01:37:05.000000000","message":"+1","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":55,"context_line":"controller, but not CPU3 and CPU4 that would be on a second one."},{"line_number":56,"context_line":""},{"line_number":57,"context_line":"Ideally, compute and I/O intensive applications that would require more than"},{"line_number":58,"context_line":"one core would try to make sure that both cores are allocated on the same NUMA"},{"line_number":59,"context_line":"node, or some performance penalties would occur (if your application is"},{"line_number":60,"context_line":"CPU-bound or I/O-bound of course)."},{"line_number":61,"context_line":"For the moment, if you\u0027re an operator, you can provide a flavor for"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_a1544f22","line":58,"range":{"start_line":58,"start_character":4,"end_line":58,"end_character":8},"updated":"2018-03-29 15:33:20.000000000","message":"vCPU?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":55,"context_line":"controller, but not CPU3 and CPU4 that would be on a second one."},{"line_number":56,"context_line":""},{"line_number":57,"context_line":"Ideally, compute and I/O intensive applications that would require more than"},{"line_number":58,"context_line":"one core would try to make sure that both cores are allocated on the same NUMA"},{"line_number":59,"context_line":"node, or some performance penalties would occur (if your application is"},{"line_number":60,"context_line":"CPU-bound or I/O-bound of course)."},{"line_number":61,"context_line":"For the moment, if you\u0027re an operator, you can provide a flavor for"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_613877ee","line":58,"range":{"start_line":58,"start_character":42,"end_line":58,"end_character":47},"updated":"2018-03-29 15:33:20.000000000","message":"vCPUs","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":54,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":55,"context_line":"controller, but not CPU3 and CPU4 that would be on a second one."},{"line_number":56,"context_line":""},{"line_number":57,"context_line":"Ideally, compute and I/O intensive applications that would require more than"},{"line_number":58,"context_line":"one core would try to make sure that both cores are allocated on the same NUMA"},{"line_number":59,"context_line":"node, or some performance penalties would occur (if your application is"},{"line_number":60,"context_line":"CPU-bound or I/O-bound of course)."},{"line_number":61,"context_line":"For the moment, if you\u0027re an operator, you can provide a flavor for"},{"line_number":62,"context_line":"asking that by using an extra spec like:"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_2411265e","line":59,"range":{"start_line":57,"start_character":53,"end_line":59,"end_character":4},"updated":"2018-03-29 21:04:56.000000000","message":"\"that require dedicated CPU resources would like to ensure that those CPU resources are provided by the same NUMA node\"?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":71,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":72,"context_line":""},{"line_number":73,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":74,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":75,"context_line":""},{"line_number":76,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible to the vCPUs"},{"line_number":77,"context_line":"--------------------------------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_1da508a2","line":74,"updated":"2018-04-04 01:37:05.000000000","message":"The hw:numa_* extra specs only refer to the virtual numa topology of the guest. Its important to remember that intenioll dose not map 1:1 to the host numa topology in all case today. Modelling numa in placement is specifically modelling host numa topology not guest numa topology so we will need to keep the numa_* that usecase.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d84bc35430517f4e0c69b73d44bb5bcac0603872","unresolved":false,"context_lines":[{"line_number":73,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":74,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":75,"context_line":""},{"line_number":76,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible to the vCPUs"},{"line_number":77,"context_line":"--------------------------------------------------------------------------"},{"line_number":78,"context_line":""},{"line_number":79,"context_line":"Say now that the above NUMA topology with a 2-sockets, 4 cores no HT is having"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_470bfc22","line":76,"range":{"start_line":76,"start_character":35,"end_line":76,"end_character":74},"updated":"2018-03-29 21:04:56.000000000","message":"AFAIK, the vGPUs and vCPUs are entirely unrelated. I believe what you are referring to here is that you want to ensure that the *physical GPU* from which the guest\u0027s VGPU resources are being supplied is associated to the same NUMA node that is supplying the *dedicated CPUs* for a guest?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":73,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":74,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":75,"context_line":""},{"line_number":76,"context_line":"#2 : As a user, I\u0027d like to get my vGPUs as close as possible to the vCPUs"},{"line_number":77,"context_line":"--------------------------------------------------------------------------"},{"line_number":78,"context_line":""},{"line_number":79,"context_line":"Say now that the above NUMA topology with a 2-sockets, 4 cores no HT is having"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_3dc9ec57","line":76,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"Well we have numa affinity for shared cpus also. Simple example is hugepages, if you request hugepages and no other extra speca we invest a request for hw:numa_nodes\u003d1 and constrain the guest to float over the host numa node the hugepages where allocated from. So basically we do partial pinning to a numa node rather the specific 1:1 core pinning.  This usecase is basically the same as the pci numa affinity policy but for vgpus","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":90,"context_line":"       |CPU1| |CPU2| |PGPU|      |CPU3| |CPU4|"},{"line_number":91,"context_line":"       +----+ +----- +----+      +----+ +----+"},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"Here, the physical GPU device (PGPU) would share a common bus with CPU1 and"},{"line_number":94,"context_line":"CPU2, but not with CPU3 and CPU4."},{"line_number":95,"context_line":""},{"line_number":96,"context_line":"In that case, imagine I\u0027d like to run a CUDA library like OpenACC for computing"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_7debe476","line":93,"updated":"2018-04-04 01:37:05.000000000","message":"This is inaccurate but i dont really want to rathole on why beyond to say this assumes only onw numa node per cpu socket. This has not been the case on intel since haswell (cluster on die) or amd since zen(one numa node per ccx i belive). In the intel case the xpus bave equal latency to the pci bus to which the pgpu is attch form both numa node within the socket. A dma or otherwise mmap region associated with the pgpu will however have numa affinity to one of the two memory controllers within the socket. This will produce numa effects as a result of the adress space selected for the dma mapping but it is not related to the bus topology.  As such i would remove references to buses and refer only to memory controllers or numa node/cells","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"93bfbc787ef432454a428520093785aefb79b025","unresolved":false,"context_lines":[{"line_number":90,"context_line":"       |CPU1| |CPU2| |PGPU|      |CPU3| |CPU4|"},{"line_number":91,"context_line":"       +----+ +----- +----+      +----+ +----+"},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"Here, the physical GPU device (PGPU) would share a common bus with CPU1 and"},{"line_number":94,"context_line":"CPU2, but not with CPU3 and CPU4."},{"line_number":95,"context_line":""},{"line_number":96,"context_line":"In that case, imagine I\u0027d like to run a CUDA library like OpenACC for computing"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_8d8c0a80","line":93,"in_reply_to":"bf659307_7debe476","updated":"2018-04-10 09:54:28.000000000","message":"Very interesting explanation. Thanks for helping me understanding how NUMA affinity works for PCI devices.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":119,"context_line":"                      +-----------------+"},{"line_number":120,"context_line":"                      /                 \\"},{"line_number":121,"context_line":"   +------------------+                 +-----------------+"},{"line_number":122,"context_line":"   | NUMA1_rp         |                 | NUMA2_rp        |"},{"line_number":123,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":124,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":125,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_5d9dc007","line":122,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"Can we use the name of the rp to corralate the placement rp to a physical host numa node by defining a name in this spec or at least suggest the virt drive should use such a mechanism. Failing that we will need a custom_ trait to allow the virt driver to map the rp back to the host nodes when consuming allocation candidates.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"fdbb530b1f35d535f5eb24dc4feefb69c1d5b062","unresolved":false,"context_lines":[{"line_number":119,"context_line":"                      +-----------------+"},{"line_number":120,"context_line":"                      /                 \\"},{"line_number":121,"context_line":"   +------------------+                 +-----------------+"},{"line_number":122,"context_line":"   | NUMA1_rp         |                 | NUMA2_rp        |"},{"line_number":123,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":124,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":125,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_145ab381","line":122,"in_reply_to":"bf659307_5d9dc007","updated":"2018-04-04 13:33:11.000000000","message":"That\u0027s completely up to the virt driver.  It can use a name and/or UUID that corresponds to its internal identifier; it can maintain a lookup table; whatever.  But nothing outside of the virt driver needs to be privy to that mechanism. And we certainly don\u0027t want to dictate it from this level, because different platforms may require wildly different methods.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":126,"context_line":"   +------------------+                 +-----------------+"},{"line_number":127,"context_line":"            |                                 |"},{"line_number":128,"context_line":"      +----------+                     +-----------------+"},{"line_number":129,"context_line":"      | PGPU1_rp |                     | PF_rp           |"},{"line_number":130,"context_line":"      | VGPU: 8  |                     | SRIOV_NET_VF: 8 |"},{"line_number":131,"context_line":"      +----------+                     +-----------------+"},{"line_number":132,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_dda9d023","line":129,"updated":"2018-04-04 01:37:05.000000000","message":"Similarly for pf_rp can we suggest a nameing conversion that may be hypervisors specific to corralate a specific pf on the host to this placement resource provider.  For example in the libvirt driver this could be a slugified pci addres and in the hyperv case it could be the uuid they use internally for pfs.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"fdbb530b1f35d535f5eb24dc4feefb69c1d5b062","unresolved":false,"context_lines":[{"line_number":126,"context_line":"   +------------------+                 +-----------------+"},{"line_number":127,"context_line":"            |                                 |"},{"line_number":128,"context_line":"      +----------+                     +-----------------+"},{"line_number":129,"context_line":"      | PGPU1_rp |                     | PF_rp           |"},{"line_number":130,"context_line":"      | VGPU: 8  |                     | SRIOV_NET_VF: 8 |"},{"line_number":131,"context_line":"      +----------+                     +-----------------+"},{"line_number":132,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_54c90b23","line":129,"in_reply_to":"bf659307_dda9d023","updated":"2018-04-04 13:33:11.000000000","message":"Those are good suggestions and examples to reinforce the above.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":151,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":152,"context_line":"  it can create."},{"line_number":153,"context_line":""},{"line_number":154,"context_line":".. note:: Since `huge pages`_ (or specific memory page size) are not really"},{"line_number":155,"context_line":"          related to NUMA topologies, discussing about how to provide that"},{"line_number":156,"context_line":"          feature using Placement resource traits or classes should be done"},{"line_number":157,"context_line":"          out of this spec. That said, like ``PCPU`` resource class for CPU"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_1d62e80a","line":154,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"Well in the linux and widows kernel hugepages are always allocated and track per numa node so i cant think of a better example of something that implicitly has numa affinity.\nAt the hardware levek the memory mapping unit and tlb set up specific entries for each hugepage which map the virtual adress of that vital memory page to a specific physical page or set of contiguous pages owned by a single memory controller.  If you want to delegate numa manage of memory pages to a different spec for scope reasons that is fine but this statement is incorrect and should be removed.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"4706c31e14ea699ab8eaf48faed3321cf48ab4e1","unresolved":false,"context_lines":[{"line_number":151,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":152,"context_line":"  it can create."},{"line_number":153,"context_line":""},{"line_number":154,"context_line":".. note:: Since `huge pages`_ (or specific memory page size) are not really"},{"line_number":155,"context_line":"          related to NUMA topologies, discussing about how to provide that"},{"line_number":156,"context_line":"          feature using Placement resource traits or classes should be done"},{"line_number":157,"context_line":"          out of this spec. That said, like ``PCPU`` resource class for CPU"},{"line_number":158,"context_line":"          pinning, we could use traits saying the page size for a specific"},{"line_number":159,"context_line":"          NUMA node (hence a child Resource Provider) and have specific"},{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_3c466423","line":157,"range":{"start_line":154,"start_character":10,"end_line":157,"end_character":27},"updated":"2018-03-27 17:11:28.000000000","message":"In the current implementation merely asking for hugepages will result in the guest being given an explicit NUMA topology and being constrained to a single host NUMA node. (With the libvirt driver, at least.)  As such, it is actually closely tied to how resources must be tracked in placement.\n\nAlso, because host memory is effectively split up into separate pools for each pagesize, we can\u0027t just track MEMORY_MB in placement, we need to track it for each page size.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":154,"context_line":".. note:: Since `huge pages`_ (or specific memory page size) are not really"},{"line_number":155,"context_line":"          related to NUMA topologies, discussing about how to provide that"},{"line_number":156,"context_line":"          feature using Placement resource traits or classes should be done"},{"line_number":157,"context_line":"          out of this spec. That said, like ``PCPU`` resource class for CPU"},{"line_number":158,"context_line":"          pinning, we could use traits saying the page size for a specific"},{"line_number":159,"context_line":"          NUMA node (hence a child Resource Provider) and have specific"},{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_bd517c38","line":157,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"That is for the libvirt driver only. This is not guaranteed for example with the hyperv driver i.e the fact that hw:mem_page_size\u003dlarge implies hw:numa_nodes\u003d1 is not guaranteed by the api it is just an implementation detail of the libvirt driver.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"4706c31e14ea699ab8eaf48faed3321cf48ab4e1","unresolved":false,"context_lines":[{"line_number":154,"context_line":".. note:: Since `huge pages`_ (or specific memory page size) are not really"},{"line_number":155,"context_line":"          related to NUMA topologies, discussing about how to provide that"},{"line_number":156,"context_line":"          feature using Placement resource traits or classes should be done"},{"line_number":157,"context_line":"          out of this spec. That said, like ``PCPU`` resource class for CPU"},{"line_number":158,"context_line":"          pinning, we could use traits saying the page size for a specific"},{"line_number":159,"context_line":"          NUMA node (hence a child Resource Provider) and have specific"},{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."},{"line_number":161,"context_line":"          for a page size of 1GB, a trait named ``HW_MEM_PAGESIZE_1GB`` and"},{"line_number":162,"context_line":"          a specific ``PAGE_MEMORY_MB`` resource class with a step size of"},{"line_number":163,"context_line":"          1024)"},{"line_number":164,"context_line":""},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_993782b1","line":163,"range":{"start_line":157,"start_character":29,"end_line":163,"end_character":15},"updated":"2018-03-27 17:11:28.000000000","message":"Alternately, we could have PAGE_MEMORY_4KB, PAGE_MEMORY_2MB, PAGE_MEMORY_1GB, etc. and then track resources in units of pages with a step size of 1.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."},{"line_number":161,"context_line":"          for a page size of 1GB, a trait named ``HW_MEM_PAGESIZE_1GB`` and"},{"line_number":162,"context_line":"          a specific ``PAGE_MEMORY_MB`` resource class with a step size of"},{"line_number":163,"context_line":"          1024)"},{"line_number":164,"context_line":""},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_5855cef8","line":163,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"Amd on x86_64 support 512mb also i think\nI think we will have a resource class explosion if we go that route. The outer advantage of the trait approach is we can support hw:mem_page_size\u003dlarge which is really the only value you should set without a serious amount of benchmarking. Any approach that breaks hw:mem_page_size\u003dlarge a non starter for me and i dont see how we csn do that with a page based approach when any page size \u003e 4k is valid","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"9e646d246b4b5b47f2eacfa3a2039b982ccfadc8","unresolved":false,"context_lines":[{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."},{"line_number":161,"context_line":"          for a page size of 1GB, a trait named ``HW_MEM_PAGESIZE_1GB`` and"},{"line_number":162,"context_line":"          a specific ``PAGE_MEMORY_MB`` resource class with a step size of"},{"line_number":163,"context_line":"          1024)"},{"line_number":164,"context_line":""},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_be688187","line":163,"in_reply_to":"bf659307_5855cef8","updated":"2018-04-04 16:55:01.000000000","message":"I don\u0027t recall support for 512MB, but I could be wrong. :)\n\nYou\u0027re correct that support for pagesizes of \"large\" and \"any\" gets a bit messy, but these are already problematic for live migration since if you happen to end up using 1GB pages it\u0027s quite likely you\u0027ll be restricted as to where you can live-migrate to.  (Though this is cloud-provider-dependent, of course.)\n\nI think you could have the scheduler track what page sizes are supported by the various compute nodes and issue multiple requests for the different pages sizes.  In practice I would expect maybe 3-4 possible sizes in a given cloud.\n\nI\u0027m not a placement expert though, so I\u0027ll leave it up to them to decide what makes the most sense.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"27b2d7b3a354b3b16c2b913739d7be504f4aeeb1","unresolved":false,"context_lines":[{"line_number":154,"context_line":".. note:: Since `huge pages`_ (or specific memory page size) are not really"},{"line_number":155,"context_line":"          related to NUMA topologies, discussing about how to provide that"},{"line_number":156,"context_line":"          feature using Placement resource traits or classes should be done"},{"line_number":157,"context_line":"          out of this spec. That said, like ``PCPU`` resource class for CPU"},{"line_number":158,"context_line":"          pinning, we could use traits saying the page size for a specific"},{"line_number":159,"context_line":"          NUMA node (hence a child Resource Provider) and have specific"},{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."},{"line_number":161,"context_line":"          for a page size of 1GB, a trait named ``HW_MEM_PAGESIZE_1GB`` and"},{"line_number":162,"context_line":"          a specific ``PAGE_MEMORY_MB`` resource class with a step size of"},{"line_number":163,"context_line":"          1024)"},{"line_number":164,"context_line":""},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_a60f7d35","line":163,"range":{"start_line":157,"start_character":29,"end_line":163,"end_character":15},"in_reply_to":"bf659307_647de57d","updated":"2018-03-29 19:58:50.000000000","message":"Yes, if you\u0027re tracking it as pages, it doesn\u0027t make sense to track it as MB as well.\n\nI think POWER can do page sizes of 4KB, 64KB, 16MB, and 16GB.  Are there any other architectures we need to consider?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":154,"context_line":".. note:: Since `huge pages`_ (or specific memory page size) are not really"},{"line_number":155,"context_line":"          related to NUMA topologies, discussing about how to provide that"},{"line_number":156,"context_line":"          feature using Placement resource traits or classes should be done"},{"line_number":157,"context_line":"          out of this spec. That said, like ``PCPU`` resource class for CPU"},{"line_number":158,"context_line":"          pinning, we could use traits saying the page size for a specific"},{"line_number":159,"context_line":"          NUMA node (hence a child Resource Provider) and have specific"},{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."},{"line_number":161,"context_line":"          for a page size of 1GB, a trait named ``HW_MEM_PAGESIZE_1GB`` and"},{"line_number":162,"context_line":"          a specific ``PAGE_MEMORY_MB`` resource class with a step size of"},{"line_number":163,"context_line":"          1024)"},{"line_number":164,"context_line":""},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_647de57d","line":163,"range":{"start_line":157,"start_character":29,"end_line":163,"end_character":15},"in_reply_to":"bf659307_993782b1","updated":"2018-03-29 15:33:20.000000000","message":"If we do this, I\u0027d like to get rid of the MEMORY_MB attribute on the NUMA RP entirely. There\u0027s simply no reason to keep it as it leads to a sort of split brain (PAGE_MEMORY_4K + PAGE_MEMORY_2MB + PAGE_MEMORY_1GB \u003d\u003d MEMORY_MB).\n\nI guess we also need to account for other page sizes. It\u0027s only x86 that uses those particular sizes, right?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"57f0d1d0c644271d80eb3583ffc9dc0292dc120e","unresolved":false,"context_lines":[{"line_number":160,"context_line":"          resource classes for checking the usage for each page size (eg."},{"line_number":161,"context_line":"          for a page size of 1GB, a trait named ``HW_MEM_PAGESIZE_1GB`` and"},{"line_number":162,"context_line":"          a specific ``PAGE_MEMORY_MB`` resource class with a step size of"},{"line_number":163,"context_line":"          1024)"},{"line_number":164,"context_line":""},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"Asking for NUMA resources tied to a NUMA node or not"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_1714809f","line":163,"in_reply_to":"bf659307_be688187","updated":"2018-04-10 14:36:24.000000000","message":"Given huge pages are not really tied to my own spec, will remove the whole paragraph so we could discuss on the page size design in a separate spec.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"fdbb530b1f35d535f5eb24dc4feefb69c1d5b062","unresolved":false,"context_lines":[{"line_number":172,"context_line":""},{"line_number":173,"context_line":"Since the extra spec property ``--property hw:numa_nodes\u003dN`` (where N is an"},{"line_number":174,"context_line":"integer) asks for sharding the vCPUs threads between N NUMA nodes, then"},{"line_number":175,"context_line":"the scheduler would translate that into a numbered request group query like:"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"  ``resources\\ *N*:VCPU\\ \u003d\\ *(vCPU_count / N)*``"},{"line_number":178,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_74c3efed","line":175,"range":{"start_line":175,"start_character":4,"end_line":175,"end_character":29},"updated":"2018-04-04 13:33:11.000000000","message":"I like this idea.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"fdbb530b1f35d535f5eb24dc4feefb69c1d5b062","unresolved":false,"context_lines":[{"line_number":174,"context_line":"integer) asks for sharding the vCPUs threads between N NUMA nodes, then"},{"line_number":175,"context_line":"the scheduler would translate that into a numbered request group query like:"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"  ``resources\\ *N*:VCPU\\ \u003d\\ *(vCPU_count / N)*``"},{"line_number":178,"context_line":""},{"line_number":179,"context_line":"(where vCPU_count / N would be rounded to be equally sharding the vCPUs)"},{"line_number":180,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_94056316","line":177,"range":{"start_line":177,"start_character":2,"end_line":177,"end_character":48},"updated":"2018-04-04 13:33:11.000000000","message":"I noticed that your formula dictates a (more or less) \"even\" spread.  Is that intentional/necessary?  Because a more flexible/powerful method would be to translate to:\n\n resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1\n\nThis would allow the procs to land wherever there\u0027s room.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"fd78700705e602ea40f88e0247a7d09295be8697","unresolved":false,"context_lines":[{"line_number":174,"context_line":"integer) asks for sharding the vCPUs threads between N NUMA nodes, then"},{"line_number":175,"context_line":"the scheduler would translate that into a numbered request group query like:"},{"line_number":176,"context_line":""},{"line_number":177,"context_line":"  ``resources\\ *N*:VCPU\\ \u003d\\ *(vCPU_count / N)*``"},{"line_number":178,"context_line":""},{"line_number":179,"context_line":"(where vCPU_count / N would be rounded to be equally sharding the vCPUs)"},{"line_number":180,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_619fac65","line":177,"range":{"start_line":177,"start_character":2,"end_line":177,"end_character":48},"in_reply_to":"bf659307_94056316","updated":"2018-04-04 17:16:17.000000000","message":"The existing behaviour is that if you don\u0027t specify explicit information about how what CPU/RAM is on what virtual NUMA node then they must be evenly divisible across all of the virtual NUMA nodes.  See hardware._get_numa_topology_auto()","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"6560946dd1ac64a75a8d8cdb2d6f0cbbba4c3c90","unresolved":false,"context_lines":[{"line_number":188,"context_line":""},{"line_number":189,"context_line":"    ``resources:VCPU\u003d8``"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"* for a flavor of 7 VCPUs and hw:numa_nodes\u003d3, it would translate into:"},{"line_number":192,"context_line":""},{"line_number":193,"context_line":"    ``resources1:VCPU\u003d3\u0026resources2:VCPU\u003d2\u0026resources3:VCPU\u003d2``"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":196,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_a580e9c0","line":193,"range":{"start_line":191,"start_character":0,"end_line":193,"end_character":61},"updated":"2018-03-29 05:29:35.000000000","message":"For your information, this indivisible case is *currently* not supported and results in an error[1]. Let me make sure you mean you would like to change it to support this case. If this is not what you intend here, I would use a divisible example here.\n\n[1] https://github.com/openstack/nova/blob/42c369223d61d652ae7fb277b799b90a69302651/nova/virt/hardware.py#L1186-L1188","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":190,"context_line":""},{"line_number":191,"context_line":"* for a flavor of 7 VCPUs and hw:numa_nodes\u003d3, it would translate into:"},{"line_number":192,"context_line":""},{"line_number":193,"context_line":"    ``resources1:VCPU\u003d3\u0026resources2:VCPU\u003d2\u0026resources3:VCPU\u003d2``"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":196,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_b83c2a93","line":193,"in_reply_to":"bf659307_86d599e7","updated":"2018-04-04 01:37:05.000000000","message":"Why would we not support tjis this used to work and should work. We hav3 the numa1.Cpu\u003d syntax to allow this to be done manually and i am pretty sure this used to work in old openstack releases","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":188,"context_line":""},{"line_number":189,"context_line":"    ``resources:VCPU\u003d8``"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"* for a flavor of 7 VCPUs and hw:numa_nodes\u003d3, it would translate into:"},{"line_number":192,"context_line":""},{"line_number":193,"context_line":"    ``resources1:VCPU\u003d3\u0026resources2:VCPU\u003d2\u0026resources3:VCPU\u003d2``"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":196,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_04a3c901","line":193,"range":{"start_line":191,"start_character":0,"end_line":193,"end_character":61},"in_reply_to":"bf659307_a580e9c0","updated":"2018-03-29 15:33:20.000000000","message":"+1. I don\u0027t think we need to support this either","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"73746da042011877ce1940482c1cfa5448ea4376","unresolved":false,"context_lines":[{"line_number":188,"context_line":""},{"line_number":189,"context_line":"    ``resources:VCPU\u003d8``"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"* for a flavor of 7 VCPUs and hw:numa_nodes\u003d3, it would translate into:"},{"line_number":192,"context_line":""},{"line_number":193,"context_line":"    ``resources1:VCPU\u003d3\u0026resources2:VCPU\u003d2\u0026resources3:VCPU\u003d2``"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":196,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_86d599e7","line":193,"range":{"start_line":191,"start_character":0,"end_line":193,"end_character":61},"in_reply_to":"bf659307_a580e9c0","updated":"2018-03-29 19:54:04.000000000","message":"Agreed, for the automatic division case the number of guest cpus needs to be a multiple of the number of guest NUMA nodes.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"9e646d246b4b5b47f2eacfa3a2039b982ccfadc8","unresolved":false,"context_lines":[{"line_number":190,"context_line":""},{"line_number":191,"context_line":"* for a flavor of 7 VCPUs and hw:numa_nodes\u003d3, it would translate into:"},{"line_number":192,"context_line":""},{"line_number":193,"context_line":"    ``resources1:VCPU\u003d3\u0026resources2:VCPU\u003d2\u0026resources3:VCPU\u003d2``"},{"line_number":194,"context_line":""},{"line_number":195,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":196,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_3ebb1173","line":193,"in_reply_to":"bf659307_b83c2a93","updated":"2018-04-04 16:55:01.000000000","message":"For the automatic division case if the total amount of resources is not a multiple of the number of NUMA nodes then I think it\u0027s fair to make the user be explicit about how they want the resources split up.  It\u0027s worked this way as far back as I can remember.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"6560946dd1ac64a75a8d8cdb2d6f0cbbba4c3c90","unresolved":false,"context_lines":[{"line_number":197,"context_line":"group, but rather just directly use that value (here, Y)."},{"line_number":198,"context_line":"Eg.:"},{"line_number":199,"context_line":""},{"line_number":200,"context_line":"* for a flavor of 8 VCPUs with extra specs set with"},{"line_number":201,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d2\u0026hw:numa_cpus.1\u003d6``,"},{"line_number":202,"context_line":"  it would translate the Placement query straight into"},{"line_number":203,"context_line":"  ``resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"For the last property about memory splitting across NUMA nodes, that would"},{"line_number":206,"context_line":"also be a number request group query specifying which resource group for which"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_e5f1016d","line":203,"range":{"start_line":200,"start_character":0,"end_line":203,"end_character":41},"updated":"2018-03-29 05:29:35.000000000","message":"\"hw:numa_cpus.N\" is used to specify the exact number of cpus you want to allocate to that numa node, not amount [2].\nIf we want to change it, we should discuss carefully how we divide the cpus to numa nodes, if that design is okay for every kind of VMs, and upgrade impacts.\n\n[2] https://docs.openstack.org/nova/latest/admin/cpu-topologies.html#customizing-instance-numa-placement-policies","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"73746da042011877ce1940482c1cfa5448ea4376","unresolved":false,"context_lines":[{"line_number":197,"context_line":"group, but rather just directly use that value (here, Y)."},{"line_number":198,"context_line":"Eg.:"},{"line_number":199,"context_line":""},{"line_number":200,"context_line":"* for a flavor of 8 VCPUs with extra specs set with"},{"line_number":201,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d2\u0026hw:numa_cpus.1\u003d6``,"},{"line_number":202,"context_line":"  it would translate the Placement query straight into"},{"line_number":203,"context_line":"  ``resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"For the last property about memory splitting across NUMA nodes, that would"},{"line_number":206,"context_line":"also be a number request group query specifying which resource group for which"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_4680e1c0","line":203,"range":{"start_line":200,"start_character":0,"end_line":203,"end_character":41},"in_reply_to":"bf659307_848fd973","updated":"2018-03-29 19:54:04.000000000","message":"I think he\u0027s saying that \"hw:numa_cpus.1\u003d2,3,4,5\" is a list of specific CPUs, not an amount.\n\nIt should still be simple to convert that into \"resources1:VCPU\u003d4\" by checking the length of the list.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":197,"context_line":"group, but rather just directly use that value (here, Y)."},{"line_number":198,"context_line":"Eg.:"},{"line_number":199,"context_line":""},{"line_number":200,"context_line":"* for a flavor of 8 VCPUs with extra specs set with"},{"line_number":201,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d2\u0026hw:numa_cpus.1\u003d6``,"},{"line_number":202,"context_line":"  it would translate the Placement query straight into"},{"line_number":203,"context_line":"  ``resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"For the last property about memory splitting across NUMA nodes, that would"},{"line_number":206,"context_line":"also be a number request group query specifying which resource group for which"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_848fd973","line":203,"range":{"start_line":200,"start_character":0,"end_line":203,"end_character":41},"in_reply_to":"bf659307_e5f1016d","updated":"2018-03-29 15:33:20.000000000","message":"\u003e \"hw:numa_cpus.N\" is used to specify the exact number of cpus you\n \u003e want to allocate to that numa node, not amount [2].\n \u003e If we want to change it, we should discuss carefully how we divide\n \u003e the cpus to numa nodes, if that design is okay for every kind of\n \u003e VMs, and upgrade impacts.\n \u003e \n \u003e [2] https://docs.openstack.org/nova/latest/admin/cpu-topologies.html#customizing-instance-numa-placement-policies\n\nCould you expand on this. It sounds like the same thing to me","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":228,"context_line":"* If I want ``N`` vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":229,"context_line":"  my ``M`` vCPUs, then I\u0027d amend my flavor by ``resources1:VGPU\u003dN;VCPU\u003dM``"},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"* If I just want ``N`` vGPUs *close* to my vCPUs but not specificly in the same"},{"line_number":232,"context_line":"  NUMA node, then I\u0027d be writing my flavor with"},{"line_number":233,"context_line":"  ``resources:VCPU\u003dM\u0026resources1:VGPU\u003dN`` (note the fact that we only use a"},{"line_number":234,"context_line":"  numbered request group for VGPU)"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_446221b0","line":231,"range":{"start_line":231,"start_character":57,"end_line":231,"end_character":67},"updated":"2018-03-29 15:33:20.000000000","message":"specifically","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":239,"context_line":""},{"line_number":240,"context_line":"Given there are NUMA workloads but also non-NUMA workloads, it\u0027s also important"},{"line_number":241,"context_line":"for operators to just have compute nodes accepting the latter."},{"line_number":242,"context_line":"That said, having the compute node resources to be splitted between multiple"},{"line_number":243,"context_line":"NUMA nodes could be a problem for those non-NUMA workloads if they want to keep"},{"line_number":244,"context_line":"the same behaviour than currently."},{"line_number":245,"context_line":"For example, say an instance with 2 vCPUs and one host having 2 NUMA nodes but"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_6435e58e","line":242,"range":{"start_line":242,"start_character":51,"end_line":242,"end_character":59},"updated":"2018-03-29 15:33:20.000000000","message":"split","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":241,"context_line":"for operators to just have compute nodes accepting the latter."},{"line_number":242,"context_line":"That said, having the compute node resources to be splitted between multiple"},{"line_number":243,"context_line":"NUMA nodes could be a problem for those non-NUMA workloads if they want to keep"},{"line_number":244,"context_line":"the same behaviour than currently."},{"line_number":245,"context_line":"For example, say an instance with 2 vCPUs and one host having 2 NUMA nodes but"},{"line_number":246,"context_line":"each one only accepting one VCPU, then the Placement API wouldn\u0027t accept that"},{"line_number":247,"context_line":"host (given each nested RP only accepts one VCPU). For that reason, we need to"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_c4239155","line":244,"range":{"start_line":244,"start_character":4,"end_line":244,"end_character":34},"updated":"2018-03-29 15:33:20.000000000","message":"existing behavior.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"8b80c67c1ad11c87bb45a470535c286df9ca9ad4","unresolved":false,"context_lines":[{"line_number":242,"context_line":"That said, having the compute node resources to be splitted between multiple"},{"line_number":243,"context_line":"NUMA nodes could be a problem for those non-NUMA workloads if they want to keep"},{"line_number":244,"context_line":"the same behaviour than currently."},{"line_number":245,"context_line":"For example, say an instance with 2 vCPUs and one host having 2 NUMA nodes but"},{"line_number":246,"context_line":"each one only accepting one VCPU, then the Placement API wouldn\u0027t accept that"},{"line_number":247,"context_line":"host (given each nested RP only accepts one VCPU). For that reason, we need to"},{"line_number":248,"context_line":"have a configuration for saying which resources should be nested."},{"line_number":249,"context_line":"The proposal we make here could be :"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. code::"},{"line_number":252,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_24e04dfa","line":249,"range":{"start_line":245,"start_character":0,"end_line":249,"end_character":36},"updated":"2018-03-29 15:33:20.000000000","message":"Oh, hold on. So a host would be either all NUMA or non-NUMA? That\u0027s not how I\u0027d pictured this. To be honest, I thought we were going to be left with a situation where the NUMA representation in placement was a second representation, complementing the existing compute node RP.\n\nI assume this won\u0027t support booting instances with different CPU policies as tetsuro\u0027s spec suggested?","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"73746da042011877ce1940482c1cfa5448ea4376","unresolved":false,"context_lines":[{"line_number":242,"context_line":"That said, having the compute node resources to be splitted between multiple"},{"line_number":243,"context_line":"NUMA nodes could be a problem for those non-NUMA workloads if they want to keep"},{"line_number":244,"context_line":"the same behaviour than currently."},{"line_number":245,"context_line":"For example, say an instance with 2 vCPUs and one host having 2 NUMA nodes but"},{"line_number":246,"context_line":"each one only accepting one VCPU, then the Placement API wouldn\u0027t accept that"},{"line_number":247,"context_line":"host (given each nested RP only accepts one VCPU). For that reason, we need to"},{"line_number":248,"context_line":"have a configuration for saying which resources should be nested."},{"line_number":249,"context_line":"The proposal we make here could be :"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. code::"},{"line_number":252,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_26c04de1","line":249,"range":{"start_line":245,"start_character":0,"end_line":249,"end_character":36},"in_reply_to":"bf659307_24e04dfa","updated":"2018-03-29 19:54:04.000000000","message":"The problem arises where we can have resources consumed at the compute node level (VCPU or 4KB pages, primarily) where the instance is free to \"float\" over all host NUMA nodes.  In this scenario we have no way to know how much of each resource was consumed on each host NUMA node, which makes it impossible to properly track per-NUMA resource usage in placement.\n\nThe \"proper\" solution to this is to associate each guest NUMA node to a single host NUMA node and not let it \"float\".\n\nThe way the code is now, this would mean that an instance asking for 4000 4KB pages would not be able to land on a compute node that has 2000 4KB pages on each of two host NUMA nodes.\n\nSean K Mooney proposed that in such a case we could give the guest two virtual NUMA nodes in order to satisfy the memory allocation.  They haven\u0027t asked for any particular NUMA topology, so it\u0027s arguably not \"wrong\" to give them more than one.  This would solve the problem, but I think it would require us to ask placement for multiple permutations--4000 pages on one NUMA node, 3000 on one and 1000 on the other, or 2000 on both.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":246,"context_line":"each one only accepting one VCPU, then the Placement API wouldn\u0027t accept that"},{"line_number":247,"context_line":"host (given each nested RP only accepts one VCPU). For that reason, we need to"},{"line_number":248,"context_line":"have a configuration for saying which resources should be nested."},{"line_number":249,"context_line":"The proposal we make here could be :"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. code::"},{"line_number":252,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_58db8e4d","line":249,"in_reply_to":"bf659307_26c04de1","updated":"2018-04-04 01:37:05.000000000","message":"The cpu policies case should work but the memory case Chris raises above is the one we could not resolve any other way.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. code::"},{"line_number":252,"context_line":""},{"line_number":253,"context_line":"  [devices]"},{"line_number":254,"context_line":"  numa_resource_classes \u003d [VCPU, MEMORY_MB, VGPU]"},{"line_number":255,"context_line":""},{"line_number":256,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_b8cc2a35","line":253,"updated":"2018-04-04 01:37:05.000000000","message":"This should be in the placement, default or virtdriver section as cpus and hugepage are not really devices in the pass through sense","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"57f0d1d0c644271d80eb3583ffc9dc0292dc120e","unresolved":false,"context_lines":[{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. code::"},{"line_number":252,"context_line":""},{"line_number":253,"context_line":"  [devices]"},{"line_number":254,"context_line":"  numa_resource_classes \u003d [VCPU, MEMORY_MB, VGPU]"},{"line_number":255,"context_line":""},{"line_number":256,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_b78fd42c","line":253,"in_reply_to":"bf659307_b8cc2a35","updated":"2018-04-10 14:36:24.000000000","message":"Let\u0027s not opiniate about it now, it\u0027s an implementation detail IMHO","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":256,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"},{"line_number":257,"context_line":"for that specific compute node nova.conf which resources classes to use, then"},{"line_number":258,"context_line":"the compute service (or the virt driver) would look at the host NUMA topology"},{"line_number":259,"context_line":"and provide that resource class for the children NUMA nodes."},{"line_number":260,"context_line":""},{"line_number":261,"context_line":"E.g., a nova.conf having set ``[devices ]/numa_resource_classes \u003d VGPU`` would"},{"line_number":262,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_98be06d3","line":259,"updated":"2018-04-04 01:37:05.000000000","message":"I assume that this will default to all numa relevant classes and the operator will have to set it to an empthy list if the want no numa topology reported. We should default to reporting numa for the host to placement.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"57f0d1d0c644271d80eb3583ffc9dc0292dc120e","unresolved":false,"context_lines":[{"line_number":256,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"},{"line_number":257,"context_line":"for that specific compute node nova.conf which resources classes to use, then"},{"line_number":258,"context_line":"the compute service (or the virt driver) would look at the host NUMA topology"},{"line_number":259,"context_line":"and provide that resource class for the children NUMA nodes."},{"line_number":260,"context_line":""},{"line_number":261,"context_line":"E.g., a nova.conf having set ``[devices ]/numa_resource_classes \u003d VGPU`` would"},{"line_number":262,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_977df01d","line":259,"in_reply_to":"bf659307_98be06d3","updated":"2018-04-10 14:36:24.000000000","message":"No, will explain why in my next revision","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":25625,"name":"Tetsuro Nakamura","email":"tetsuro.nakamura.bc@hco.ntt.co.jp","username":"tetsuro0907"},"change_message_id":"6560946dd1ac64a75a8d8cdb2d6f0cbbba4c3c90","unresolved":false,"context_lines":[{"line_number":286,"context_line":"          children) should ideally be done outside of the virt driver codes."},{"line_number":287,"context_line":"          That said, it\u0027s acceptable for a first start to define those directly"},{"line_number":288,"context_line":"          in the libvirt module."},{"line_number":289,"context_line":""},{"line_number":290,"context_line":"Alternatives"},{"line_number":291,"context_line":"------------"},{"line_number":292,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_a320e149","line":289,"updated":"2018-03-29 05:29:35.000000000","message":"Yup, leaving the configurations to operators may be controversial, but to me it sounds reasonable. I don\u0027t think there are many operators who want to be aware of NUMA topology. Those NFV operators who want stricter scheduling for higher performance can just go through the procedure to prepare the proper configuration.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":286,"context_line":"          children) should ideally be done outside of the virt driver codes."},{"line_number":287,"context_line":"          That said, it\u0027s acceptable for a first start to define those directly"},{"line_number":288,"context_line":"          in the libvirt module."},{"line_number":289,"context_line":""},{"line_number":290,"context_line":"Alternatives"},{"line_number":291,"context_line":"------------"},{"line_number":292,"context_line":""}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_98084692","line":289,"in_reply_to":"bf659307_a320e149","updated":"2018-04-04 01:37:05.000000000","message":"Truing it off by default will mean placement will not be used for numa prefiltering before hitting the numatopology filter which means they will not see the performance gains of this work. Do we really want an opt in model hear and not opt out. Also we have to be careful that this is not a config option that changes api beviour. If its opt-in we cant automatically split up resource request based on hw:numa_* option or all existing uses of those flavours extra specs will break on upgrade unless we require a config change on upgrade which break our upgrade policy.","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"452750add70a4a093c92a17975016e4bdb96f786","unresolved":false,"context_lines":[{"line_number":338,"context_line":""},{"line_number":339,"context_line":"Upgrade impact"},{"line_number":340,"context_line":"--------------"},{"line_number":341,"context_line":"None"},{"line_number":342,"context_line":""},{"line_number":343,"context_line":"Implementation"},{"line_number":344,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_58814ec2","line":341,"in_reply_to":"","updated":"2018-04-04 01:37:05.000000000","message":"If we start auto generating the resource groups from existing flavours via hw:numa_* extra specs  this will break on upgrade unlees we a.) use an opt out for resource class for numa reporting b.) add all existing numa related resources classes to the default for numa_resource_classes. e.g.\nnuma_resource_classes\u003dvCPU, pCPU, MEMORY_MB, vGPU, VF, PF","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"57f0d1d0c644271d80eb3583ffc9dc0292dc120e","unresolved":false,"context_lines":[{"line_number":338,"context_line":""},{"line_number":339,"context_line":"Upgrade impact"},{"line_number":340,"context_line":"--------------"},{"line_number":341,"context_line":"None"},{"line_number":342,"context_line":""},{"line_number":343,"context_line":"Implementation"},{"line_number":344,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":6,"id":"bf659307_d7ffe864","line":341,"in_reply_to":"bf659307_58814ec2","updated":"2018-04-10 14:36:24.000000000","message":"see my next revision","commit_id":"7d2a8902eed926f448ad2b9b07d8984dcf8c3d5d"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":22,"context_line":"The NUMATopologyFilter checks a lot of NUMA related resources like emulator"},{"line_number":23,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":24,"context_line":"like to eventually get rid of this filter, the problem is that it\u0027s coupling"},{"line_number":25,"context_line":"multiple high performance features that are not really related to NUMA"},{"line_number":26,"context_line":"topologies (like for example CPU pinning, emulator threads or memory pages) and"},{"line_number":27,"context_line":"also checks both *inventories* of the host with *usage* of CPU and"},{"line_number":28,"context_line":"memory page resources by each instance."},{"line_number":29,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_dd2aa92a","line":26,"range":{"start_line":25,"start_character":44,"end_line":26,"end_character":75},"updated":"2018-04-10 15:15:59.000000000","message":"In the first sentence of this paragraph, you wrote: \"checks a lot of NUMA related resources like emulator threads policies, CPU pinned instances or memory page sizes\".\n\nBut here, you say \"not really related to NUMA topologies (like for example CPU pinning, emulator threads or memory pages)\" :)\n\nI think it would be less confusing to just begin the paragraph with \"The NUMATopologyFilter checks a number of resources, including emulator thread policies, CPU pinning and memory pages.\"","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":22,"context_line":"The NUMATopologyFilter checks a lot of NUMA related resources like emulator"},{"line_number":23,"context_line":"threads policies, CPU pinned instances or memory page sizes. While we would"},{"line_number":24,"context_line":"like to eventually get rid of this filter, the problem is that it\u0027s coupling"},{"line_number":25,"context_line":"multiple high performance features that are not really related to NUMA"},{"line_number":26,"context_line":"topologies (like for example CPU pinning, emulator threads or memory pages) and"},{"line_number":27,"context_line":"also checks both *inventories* of the host with *usage* of CPU and"},{"line_number":28,"context_line":"memory page resources by each instance."},{"line_number":29,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_39893dc9","line":26,"range":{"start_line":25,"start_character":44,"end_line":26,"end_character":75},"in_reply_to":"bf659307_dd2aa92a","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":28,"context_line":"memory page resources by each instance."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"Instead, we can model the host CPU (and later memory) topologies as a set of"},{"line_number":31,"context_line":"resource providers arranged in a tree, and just directly allocating a specific"},{"line_number":32,"context_line":"instance to a NUMA node if possible."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_5dc4595f","line":31,"range":{"start_line":31,"start_character":57,"end_line":31,"end_character":67},"updated":"2018-04-10 15:15:59.000000000","message":"s/allocating/allocate resources to/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":28,"context_line":"memory page resources by each instance."},{"line_number":29,"context_line":""},{"line_number":30,"context_line":"Instead, we can model the host CPU (and later memory) topologies as a set of"},{"line_number":31,"context_line":"resource providers arranged in a tree, and just directly allocating a specific"},{"line_number":32,"context_line":"instance to a NUMA node if possible."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_99a08944","line":31,"range":{"start_line":31,"start_character":57,"end_line":31,"end_character":67},"in_reply_to":"bf659307_5dc4595f","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":29,"context_line":""},{"line_number":30,"context_line":"Instead, we can model the host CPU (and later memory) topologies as a set of"},{"line_number":31,"context_line":"resource providers arranged in a tree, and just directly allocating a specific"},{"line_number":32,"context_line":"instance to a NUMA node if possible."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":35,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_7d94bd44","line":32,"range":{"start_line":32,"start_character":9,"end_line":32,"end_character":24},"updated":"2018-04-10 15:15:59.000000000","message":"s/to a NUMA node if possible/from a resource provider representing a NUMA node/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":29,"context_line":""},{"line_number":30,"context_line":"Instead, we can model the host CPU (and later memory) topologies as a set of"},{"line_number":31,"context_line":"resource providers arranged in a tree, and just directly allocating a specific"},{"line_number":32,"context_line":"instance to a NUMA node if possible."},{"line_number":33,"context_line":""},{"line_number":34,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":35,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_d99a8112","line":32,"range":{"start_line":32,"start_character":9,"end_line":32,"end_character":24},"in_reply_to":"bf659307_7d94bd44","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":38,"context_line":"like disk and RAM."},{"line_number":39,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":40,"context_line":"within a NUMA node for a vCPU`_ or `customizing the instance CPU topology`_)"},{"line_number":41,"context_line":"would still be only done by the virt driver, and are not covered by that spec."},{"line_number":42,"context_line":""},{"line_number":43,"context_line":"Use Cases"},{"line_number":44,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_b7b9027a","line":41,"range":{"start_line":41,"start_character":68,"end_line":41,"end_character":72},"updated":"2018-04-11 04:00:58.000000000","message":"this","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":38,"context_line":"like disk and RAM."},{"line_number":39,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":40,"context_line":"within a NUMA node for a vCPU`_ or `customizing the instance CPU topology`_)"},{"line_number":41,"context_line":"would still be only done by the virt driver, and are not covered by that spec."},{"line_number":42,"context_line":""},{"line_number":43,"context_line":"Use Cases"},{"line_number":44,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_598671bb","line":41,"range":{"start_line":41,"start_character":68,"end_line":41,"end_character":72},"in_reply_to":"bf659307_b7b9027a","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":43,"context_line":"Use Cases"},{"line_number":44,"context_line":"---------"},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-core instance"},{"line_number":47,"context_line":"----------------------------------------------------------------------------"},{"line_number":48,"context_line":""},{"line_number":49,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_fd61ad3a","line":46,"range":{"start_line":46,"start_character":61,"end_line":46,"end_character":67},"updated":"2018-04-10 15:15:59.000000000","message":"it\u0027s not a 2-core instance. It\u0027s an instance with 2 VCPU.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":43,"context_line":"Use Cases"},{"line_number":44,"context_line":"---------"},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-core instance"},{"line_number":47,"context_line":"----------------------------------------------------------------------------"},{"line_number":48,"context_line":""},{"line_number":49,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_f98e65cf","line":46,"range":{"start_line":46,"start_character":61,"end_line":46,"end_character":67},"in_reply_to":"bf659307_fd61ad3a","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":62,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":63,"context_line":"controller, while CPU3 and CPU4 would share their own memory."},{"line_number":64,"context_line":""},{"line_number":65,"context_line":"Ideally, applications that require intensive CPU resources would like to ensure"},{"line_number":66,"context_line":"that those CPU resources are provided by the same NUMA node, or some"},{"line_number":67,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":68,"context_line":"I/O-bound of course)."}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_9dcbd148","line":65,"range":{"start_line":65,"start_character":45,"end_line":65,"end_character":58},"updated":"2018-04-10 15:15:59.000000000","message":"these applications don\u0027t necessarily require intensive CPU resources. They require low-latency memory access, which is why being on the same NUMA node as their vCPU threads are running on is important.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"9935b7ced5b3ce273ab14da6672aff153032c669","unresolved":false,"context_lines":[{"line_number":62,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":63,"context_line":"controller, while CPU3 and CPU4 would share their own memory."},{"line_number":64,"context_line":""},{"line_number":65,"context_line":"Ideally, applications that require intensive CPU resources would like to ensure"},{"line_number":66,"context_line":"that those CPU resources are provided by the same NUMA node, or some"},{"line_number":67,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":68,"context_line":"I/O-bound of course)."}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_a3679651","line":65,"range":{"start_line":65,"start_character":45,"end_line":65,"end_character":58},"in_reply_to":"bf659307_9dcbd148","updated":"2018-04-10 16:13:30.000000000","message":"Specifically, low latency memory access from multiple vCPUs within the same instance.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":62,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":63,"context_line":"controller, while CPU3 and CPU4 would share their own memory."},{"line_number":64,"context_line":""},{"line_number":65,"context_line":"Ideally, applications that require intensive CPU resources would like to ensure"},{"line_number":66,"context_line":"that those CPU resources are provided by the same NUMA node, or some"},{"line_number":67,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":68,"context_line":"I/O-bound of course)."}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_fc55f31e","line":65,"range":{"start_line":65,"start_character":45,"end_line":65,"end_character":58},"in_reply_to":"bf659307_a3679651","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":66,"context_line":"that those CPU resources are provided by the same NUMA node, or some"},{"line_number":67,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":68,"context_line":"I/O-bound of course)."},{"line_number":69,"context_line":"For the moment, if you\u0027re an operator, you can provide a flavor for"},{"line_number":70,"context_line":"asking that by using an extra spec like:"},{"line_number":71,"context_line":""},{"line_number":72,"context_line":".. code::"},{"line_number":73,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_bd0d7591","line":70,"range":{"start_line":69,"start_character":16,"end_line":70,"end_character":39},"updated":"2018-04-10 15:15:59.000000000","message":"s/you can provide a flavor for asking that by using an extra spec like/you can use an extra spec to indicate NUMA topology/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":66,"context_line":"that those CPU resources are provided by the same NUMA node, or some"},{"line_number":67,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":68,"context_line":"I/O-bound of course)."},{"line_number":69,"context_line":"For the moment, if you\u0027re an operator, you can provide a flavor for"},{"line_number":70,"context_line":"asking that by using an extra spec like:"},{"line_number":71,"context_line":""},{"line_number":72,"context_line":".. code::"},{"line_number":73,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_3c604b01","line":70,"range":{"start_line":69,"start_character":16,"end_line":70,"end_character":39},"in_reply_to":"bf659307_bd0d7591","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":78,"context_line":""},{"line_number":79,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":80,"context_line":""},{"line_number":81,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":82,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"#2 : As a user, I\u0027d like to get a vGPU NUMA affinity"},{"line_number":85,"context_line":"----------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_1d658145","line":82,"range":{"start_line":81,"start_character":0,"end_line":82,"end_character":72},"updated":"2018-04-10 15:15:59.000000000","message":"I would remove this sentence.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":78,"context_line":""},{"line_number":79,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":80,"context_line":""},{"line_number":81,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":82,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"#2 : As a user, I\u0027d like to get a vGPU NUMA affinity"},{"line_number":85,"context_line":"----------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_9c471778","line":82,"range":{"start_line":81,"start_character":0,"end_line":82,"end_character":72},"in_reply_to":"bf659307_1d658145","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":81,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":82,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"#2 : As a user, I\u0027d like to get a vGPU NUMA affinity"},{"line_number":85,"context_line":"----------------------------------------------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_9d50b160","line":84,"range":{"start_line":84,"start_character":32,"end_line":84,"end_character":52},"updated":"2018-04-10 15:15:59.000000000","message":"s/a vGPU NUMA affinity/a vGPU that uses the same memory controller as the NUMA node running the guest\u0027s vCPU threads/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":81,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":82,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"#2 : As a user, I\u0027d like to get a vGPU NUMA affinity"},{"line_number":85,"context_line":"----------------------------------------------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_5c22dfba","line":84,"range":{"start_line":84,"start_character":32,"end_line":84,"end_character":52},"in_reply_to":"bf659307_2365c645","updated":"2018-04-16 13:31:06.000000000","message":"Not really, since GPUs have their own memory controllers. It\u0027s more about max throughput for CPU/GPU intercommunication by preventing unnecessary QPI hops.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"9935b7ced5b3ce273ab14da6672aff153032c669","unresolved":false,"context_lines":[{"line_number":81,"context_line":"Now, imagine a world where you could ask for a guest NUMA topology by using"},{"line_number":82,"context_line":"flavors that make the scheduler using specific Placement API requests..."},{"line_number":83,"context_line":""},{"line_number":84,"context_line":"#2 : As a user, I\u0027d like to get a vGPU NUMA affinity"},{"line_number":85,"context_line":"----------------------------------------------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_2365c645","line":84,"range":{"start_line":84,"start_character":32,"end_line":84,"end_character":52},"in_reply_to":"bf659307_9d50b160","updated":"2018-04-10 16:13:30.000000000","message":"++","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":85,"context_line":"----------------------------------------------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"},{"line_number":88,"context_line":"is having some PCI devices, like a NVidia Tesla M10 device. That PCI device"},{"line_number":89,"context_line":"will then have some NUMA affinity to one of the two memory controllers, like :"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_1dca6119","line":88,"range":{"start_line":88,"start_character":0,"end_line":88,"end_character":14},"updated":"2018-04-10 15:15:59.000000000","message":"s/is having some/has some/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":85,"context_line":"----------------------------------------------------"},{"line_number":86,"context_line":""},{"line_number":87,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"},{"line_number":88,"context_line":"is having some PCI devices, like a NVidia Tesla M10 device. That PCI device"},{"line_number":89,"context_line":"will then have some NUMA affinity to one of the two memory controllers, like :"},{"line_number":90,"context_line":""},{"line_number":91,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_7418c180","line":88,"range":{"start_line":88,"start_character":0,"end_line":88,"end_character":14},"in_reply_to":"bf659307_1dca6119","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":102,"context_line":"CPU1 and CPU2, but not with CPU3 and CPU4 which have a separate memory mapping"},{"line_number":103,"context_line":"region."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"In that case, imagine I\u0027d like to run a CUDA library like OpenACC for computing"},{"line_number":106,"context_line":"things like Artificial Intelligence calculations, or coin mining (heh), I\u0027d"},{"line_number":107,"context_line":"love to make sure my GPU devices are affinitized by NUMA nodes for maximum"},{"line_number":108,"context_line":"throughput reasons."},{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Proposed change"},{"line_number":111,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_9d5af14d","line":108,"range":{"start_line":105,"start_character":0,"end_line":108,"end_character":19},"updated":"2018-04-10 15:15:59.000000000","message":"I actually don\u0027t think the vGPU affinity case is all that important. The GPU itself has processing power and memory that libraries like OpenACC will utilize. It\u0027s the results of those CUDA functions that may need to be passed to a CPU\u0027s L2 or L3 caches.\n\nI think the SR-IOV PCI device requiring NUMA affinity is a more solid use case, since the amount of memory the NIC has is tiny compared to a GPU.\n\nBut, whatever, I can go with this...","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":102,"context_line":"CPU1 and CPU2, but not with CPU3 and CPU4 which have a separate memory mapping"},{"line_number":103,"context_line":"region."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"In that case, imagine I\u0027d like to run a CUDA library like OpenACC for computing"},{"line_number":106,"context_line":"things like Artificial Intelligence calculations, or coin mining (heh), I\u0027d"},{"line_number":107,"context_line":"love to make sure my GPU devices are affinitized by NUMA nodes for maximum"},{"line_number":108,"context_line":"throughput reasons."},{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Proposed change"},{"line_number":111,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_340a4926","line":108,"range":{"start_line":105,"start_character":0,"end_line":108,"end_character":19},"in_reply_to":"bf659307_2f958b33","updated":"2018-04-16 13:31:06.000000000","message":"Just changed that to be clearer. HTH.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"d46d1913f7e9e70dc6a411041f29e8057103d208","unresolved":false,"context_lines":[{"line_number":102,"context_line":"CPU1 and CPU2, but not with CPU3 and CPU4 which have a separate memory mapping"},{"line_number":103,"context_line":"region."},{"line_number":104,"context_line":""},{"line_number":105,"context_line":"In that case, imagine I\u0027d like to run a CUDA library like OpenACC for computing"},{"line_number":106,"context_line":"things like Artificial Intelligence calculations, or coin mining (heh), I\u0027d"},{"line_number":107,"context_line":"love to make sure my GPU devices are affinitized by NUMA nodes for maximum"},{"line_number":108,"context_line":"throughput reasons."},{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Proposed change"},{"line_number":111,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_2f958b33","line":108,"range":{"start_line":105,"start_character":0,"end_line":108,"end_character":19},"in_reply_to":"bf659307_9d5af14d","updated":"2018-04-11 15:15:09.000000000","message":"I have many internal customers that are actually very interested in having NUMA affinity for having the GPU memory close as possible to the NUMA memory controller used by the virtual CPU, just because they\u0027d like to run OpenACC.\n\nSee https://devblogs.nvidia.com/performance-portability-gpus-cpus-openacc/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":116,"context_line":"NUMA nodes being nested Resource Providers - take #1"},{"line_number":117,"context_line":"----------------------------------------------------"},{"line_number":118,"context_line":""},{"line_number":119,"context_line":"Given virt drivers can pass a resource providers tree to the compute service,"},{"line_number":120,"context_line":"for example the libvirt driver could create a tree for a (2-socket, 16 cores)"},{"line_number":121,"context_line":"NUMA topology having one GPU device and one SRIOV PF like this:"},{"line_number":122,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_7d24bdc6","line":119,"range":{"start_line":119,"start_character":30,"end_line":119,"end_character":54},"updated":"2018-04-10 15:15:59.000000000","message":"s/resource providers tree/provider tree/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":116,"context_line":"NUMA nodes being nested Resource Providers - take #1"},{"line_number":117,"context_line":"----------------------------------------------------"},{"line_number":118,"context_line":""},{"line_number":119,"context_line":"Given virt drivers can pass a resource providers tree to the compute service,"},{"line_number":120,"context_line":"for example the libvirt driver could create a tree for a (2-socket, 16 cores)"},{"line_number":121,"context_line":"NUMA topology having one GPU device and one SRIOV PF like this:"},{"line_number":122,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_f403710e","line":119,"range":{"start_line":119,"start_character":30,"end_line":119,"end_character":54},"in_reply_to":"bf659307_7d24bdc6","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":148,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":149,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":150,"context_line":""},{"line_number":151,"context_line":"A third resource class that we call ``PCPU`` could be there for CPU pinning."},{"line_number":152,"context_line":"See the `CPU resources`_ spec for more details about that specific resource"},{"line_number":153,"context_line":"type."},{"line_number":154,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_e8248db1","line":151,"range":{"start_line":151,"start_character":65,"end_line":151,"end_character":75},"updated":"2018-04-10 15:15:59.000000000","message":"s/CPU pinning/dedicated CPU resources/. Pinning refers to assignment, which placement does not do. Dedicated CPU resources refer to the simple integer amount of host processors to use for dedicated guest CPU resources.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":148,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":149,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":150,"context_line":""},{"line_number":151,"context_line":"A third resource class that we call ``PCPU`` could be there for CPU pinning."},{"line_number":152,"context_line":"See the `CPU resources`_ spec for more details about that specific resource"},{"line_number":153,"context_line":"type."},{"line_number":154,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_af03640d","line":151,"range":{"start_line":151,"start_character":65,"end_line":151,"end_character":75},"in_reply_to":"bf659307_e8248db1","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":152,"context_line":"See the `CPU resources`_ spec for more details about that specific resource"},{"line_number":153,"context_line":"type."},{"line_number":154,"context_line":""},{"line_number":155,"context_line":"The root Resource Provider (ie. the compute node) would then stop to provide"},{"line_number":156,"context_line":"the existing resources that NUMA nodes have, but would still be providing"},{"line_number":157,"context_line":"resources for classes that are not NUMA-related."},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_a83bf5c5","line":156,"range":{"start_line":155,"start_character":56,"end_line":156,"end_character":73},"updated":"2018-04-10 15:15:59.000000000","message":"s/then stop to provide the existing resources that NUMA nodes have, but would still be providing/would only provide/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":152,"context_line":"See the `CPU resources`_ spec for more details about that specific resource"},{"line_number":153,"context_line":"type."},{"line_number":154,"context_line":""},{"line_number":155,"context_line":"The root Resource Provider (ie. the compute node) would then stop to provide"},{"line_number":156,"context_line":"the existing resources that NUMA nodes have, but would still be providing"},{"line_number":157,"context_line":"resources for classes that are not NUMA-related."},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_4f088827","line":156,"range":{"start_line":155,"start_character":56,"end_line":156,"end_character":73},"in_reply_to":"bf659307_a83bf5c5","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":156,"context_line":"the existing resources that NUMA nodes have, but would still be providing"},{"line_number":157,"context_line":"resources for classes that are not NUMA-related."},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":160,"context_line":"each one would have specific resource classes. For example:"},{"line_number":161,"context_line":""},{"line_number":162,"context_line":"* GPU devices are having a specific VGPU resource class that counts the number"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_486339dd","line":159,"range":{"start_line":159,"start_character":61,"end_line":159,"end_character":73},"updated":"2018-04-10 15:15:59.000000000","message":"s/nested child/child/","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":156,"context_line":"the existing resources that NUMA nodes have, but would still be providing"},{"line_number":157,"context_line":"resources for classes that are not NUMA-related."},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":160,"context_line":"each one would have specific resource classes. For example:"},{"line_number":161,"context_line":""},{"line_number":162,"context_line":"* GPU devices are having a specific VGPU resource class that counts the number"}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_6f0d4c36","line":159,"range":{"start_line":159,"start_character":61,"end_line":159,"end_character":73},"in_reply_to":"bf659307_486339dd","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":160,"context_line":"each one would have specific resource classes. For example:"},{"line_number":161,"context_line":""},{"line_number":162,"context_line":"* GPU devices are having a specific VGPU resource class that counts the number"},{"line_number":163,"context_line":"  of virtual GPUs it can create."},{"line_number":164,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":165,"context_line":"  it can create."}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_770b6a29","line":162,"range":{"start_line":162,"start_character":14,"end_line":162,"end_character":24},"updated":"2018-04-11 04:00:58.000000000","message":"have","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"097d26867a1256b52707df1247ab2c591aeefef6","unresolved":false,"context_lines":[{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a nested child where"},{"line_number":160,"context_line":"each one would have specific resource classes. For example:"},{"line_number":161,"context_line":""},{"line_number":162,"context_line":"* GPU devices are having a specific VGPU resource class that counts the number"},{"line_number":163,"context_line":"  of virtual GPUs it can create."},{"line_number":164,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":165,"context_line":"  it can create."}],"source_content_type":"text/x-rst","patch_set":7,"id":"9f6a8fd7_0f129059","line":162,"range":{"start_line":162,"start_character":14,"end_line":162,"end_character":24},"in_reply_to":"bf659307_770b6a29","updated":"2018-04-16 13:31:06.000000000","message":"Done","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"94c5880954d454f7adf56795858a8dfc60765eb9","unresolved":false,"context_lines":[{"line_number":182,"context_line":""},{"line_number":183,"context_line":"  ``resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1``"},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"where vCPU_count / N would be *equally* sharding the vCPUs."},{"line_number":186,"context_line":""},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"Eg.:"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_a8a9b5dd","line":185,"updated":"2018-04-10 15:15:59.000000000","message":"As noted by efried, the above is not actually how the granular request groups works. Granular request groups do not guarantee that separate resource providers fulfill each different request group.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":182,"context_line":""},{"line_number":183,"context_line":"  ``resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1``"},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"where vCPU_count / N would be *equally* sharding the vCPUs."},{"line_number":186,"context_line":""},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"Eg.:"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_823efeb5","line":185,"in_reply_to":"bf659307_a8a9b5dd","updated":"2018-04-11 04:00:58.000000000","message":"Correct. The above is a useful way to maximize my chances that my instance will land *somewhere* when I don\u0027t care about NUMA affinity.  I can end up with any number of my VCPUs provided by any number of resource providers in any combination.\n\n...which I would have thought is a more common use case than deliberate explicit sharding.  Not so?\n\n...but this example doesn\u0027t represent what we should/would do with hw:numa_nodes\u003dN because N doesn\u0027t figure into the math at all.  The example represents what we should do when we get a flavor with just VCPU:\u003cnumber\u003e (which doesn\u0027t care about NUMA affinity) and we want to allow it to land on hosts with or without their RPs set up in NUMA nodes.\n\nFor deliberate sharding, we have two choices (not counting explicit traits like NUMA_NODE_0).\n\n- Placement returns all permutations; and then the NUMATopologyFilter winnows the results to just the ones that shard as desired;\n- Implement forced separation of providers in placement.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":187,"context_line":""},{"line_number":188,"context_line":"Eg.:"},{"line_number":189,"context_line":""},{"line_number":190,"context_line":"* for a flavor of 8 VCPUs and hw:numa_nodes\u003d2, it would translate into:"},{"line_number":191,"context_line":""},{"line_number":192,"context_line":"    ``resources1:VCPU\u003d4\u0026resources2:VCPU\u003d4``"},{"line_number":193,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_42b9c612","line":190,"updated":"2018-04-11 04:00:58.000000000","message":"These examples *do* gel with the formula on L185.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":193,"context_line":""},{"line_number":194,"context_line":"* for a flavor of 8 VCPUs and hw:numa_nodes\u003d1, it would translate into:"},{"line_number":195,"context_line":""},{"line_number":196,"context_line":"    ``resources:VCPU\u003d8``"},{"line_number":197,"context_line":""},{"line_number":198,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":199,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_a256a2cd","line":196,"range":{"start_line":196,"start_character":6,"end_line":196,"end_character":15},"updated":"2018-04-11 04:00:58.000000000","message":"Both for consistency and because you\u0027ll probably want other resources in your unnumbered request group, this should be resources1.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":206,"context_line":"  ``resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":207,"context_line":""},{"line_number":208,"context_line":"For the last property about memory splitting across NUMA nodes, that would"},{"line_number":209,"context_line":"also be a number request group query specifying which resource group for which"},{"line_number":210,"context_line":"MEMORY_MB resource class."},{"line_number":211,"context_line":"Eg.:"},{"line_number":212,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_82cd9e86","line":209,"range":{"start_line":209,"start_character":10,"end_line":209,"end_character":16},"updated":"2018-04-11 04:00:58.000000000","message":"numbered","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":225,"context_line":"nodes:"},{"line_number":226,"context_line":""},{"line_number":227,"context_line":"* If I just want to provide ``N`` vGPUs for an instance, I would just amend"},{"line_number":228,"context_line":"  a flavor and ask for ``resources:VGPU\u003dN``. No NUMA relationship would be"},{"line_number":229,"context_line":"  verified, exactly like the current behaviour."},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"* If I want ``N`` vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":232,"context_line":"  my ``M`` vCPUs, then I\u0027d amend my flavor by ``resources1:VGPU\u003dN;VCPU\u003dM``"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_627e4a1a","line":229,"range":{"start_line":228,"start_character":45,"end_line":229,"end_character":47},"updated":"2018-04-11 04:00:58.000000000","message":"Not sure what you mean by \"the current behavior\", but saying VGPU:N will land all N of the VGPUs on the same provider.  That\u0027s regardless of whether you use a numbered or unnumbered request group.\n\nWe never split up resources you request in a single \u003cRC\u003e:\u003cAMOUNT\u003e (because think it through for DISK_GB).","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":228,"context_line":"  a flavor and ask for ``resources:VGPU\u003dN``. No NUMA relationship would be"},{"line_number":229,"context_line":"  verified, exactly like the current behaviour."},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"* If I want ``N`` vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":232,"context_line":"  my ``M`` vCPUs, then I\u0027d amend my flavor by ``resources1:VGPU\u003dN;VCPU\u003dM``"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"* If I just want ``N`` vGPUs *close* to my vCPUs but not specifically in the"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_827b5e26","line":231,"range":{"start_line":231,"start_character":73,"end_line":231,"end_character":77},"updated":"2018-04-11 04:00:58.000000000","message":"as","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":229,"context_line":"  verified, exactly like the current behaviour."},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"* If I want ``N`` vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":232,"context_line":"  my ``M`` vCPUs, then I\u0027d amend my flavor by ``resources1:VGPU\u003dN;VCPU\u003dM``"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"* If I just want ``N`` vGPUs *close* to my vCPUs but not specifically in the"},{"line_number":235,"context_line":"  same NUMA node, then I\u0027d be writing my flavor with"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_62896ac8","line":232,"range":{"start_line":232,"start_character":46,"end_line":232,"end_character":74},"updated":"2018-04-11 04:00:58.000000000","message":"But this doesn\u0027t work, because the VGPUs and VCPUs are not in the same resource provider.\n\nIn order to satisfy this use case, we would need to invent something like the \"common subtree\" semantic we\u0027ve been bandying about.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":231,"context_line":"* If I want ``N`` vGPUs *and* those vGPUs be in the exact same NUMA node than"},{"line_number":232,"context_line":"  my ``M`` vCPUs, then I\u0027d amend my flavor by ``resources1:VGPU\u003dN;VCPU\u003dM``"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"* If I just want ``N`` vGPUs *close* to my vCPUs but not specifically in the"},{"line_number":235,"context_line":"  same NUMA node, then I\u0027d be writing my flavor with"},{"line_number":236,"context_line":"  ``resources:VCPU\u003dM\u0026resources1:VGPU\u003dN`` (note the fact that we only use a"},{"line_number":237,"context_line":"  numbered request group for VGPU)"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_62a3aa9f","line":234,"range":{"start_line":234,"start_character":29,"end_line":234,"end_character":36},"updated":"2018-04-11 04:00:58.000000000","message":"The only meaning of \"close\" here is \"on the same host\".  Saying \"close\" implies some kind of control over \"distance\", which we don\u0027t have.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":233,"context_line":""},{"line_number":234,"context_line":"* If I just want ``N`` vGPUs *close* to my vCPUs but not specifically in the"},{"line_number":235,"context_line":"  same NUMA node, then I\u0027d be writing my flavor with"},{"line_number":236,"context_line":"  ``resources:VCPU\u003dM\u0026resources1:VGPU\u003dN`` (note the fact that we only use a"},{"line_number":237,"context_line":"  numbered request group for VGPU)"},{"line_number":238,"context_line":""},{"line_number":239,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_82c91e5a","line":236,"range":{"start_line":236,"start_character":2,"end_line":236,"end_character":40},"updated":"2018-04-11 04:00:58.000000000","message":"You actually might as well put them both in the unnumbered group for purposes of this example.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":234,"context_line":"* If I just want ``N`` vGPUs *close* to my vCPUs but not specifically in the"},{"line_number":235,"context_line":"  same NUMA node, then I\u0027d be writing my flavor with"},{"line_number":236,"context_line":"  ``resources:VCPU\u003dM\u0026resources1:VGPU\u003dN`` (note the fact that we only use a"},{"line_number":237,"context_line":"  numbered request group for VGPU)"},{"line_number":238,"context_line":""},{"line_number":239,"context_line":""},{"line_number":240,"context_line":"Optionally configured NUMA resources"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_82cefe57","line":237,"range":{"start_line":237,"start_character":2,"end_line":237,"end_character":33},"updated":"2018-04-11 04:00:58.000000000","message":"no reason for that.  You could have put them both in the unnumbered, or both in separate groups, to the same effect.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":292,"context_line":"      +----------+"},{"line_number":293,"context_line":""},{"line_number":294,"context_line":""},{"line_number":295,"context_line":".. note:: Given host NUMA topologies are not virt-specific, the definition of"},{"line_number":296,"context_line":"          the provider tree where each NUMA node is a child RP and where the"},{"line_number":297,"context_line":"          resource classes should be set (either on the root RP or on the"},{"line_number":298,"context_line":"          children) should ideally be done outside of the virt driver codes."}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_c201566c","line":295,"range":{"start_line":295,"start_character":10,"end_line":295,"end_character":58},"updated":"2018-04-11 04:00:58.000000000","message":"WRONG","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":295,"context_line":".. note:: Given host NUMA topologies are not virt-specific, the definition of"},{"line_number":296,"context_line":"          the provider tree where each NUMA node is a child RP and where the"},{"line_number":297,"context_line":"          resource classes should be set (either on the root RP or on the"},{"line_number":298,"context_line":"          children) should ideally be done outside of the virt driver codes."},{"line_number":299,"context_line":"          That said, it\u0027s acceptable for a first start to define those directly"},{"line_number":300,"context_line":"          in the libvirt module."},{"line_number":301,"context_line":""}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_62faca75","line":298,"range":{"start_line":298,"start_character":43,"end_line":298,"end_character":75},"updated":"2018-04-11 04:00:58.000000000","message":"NO","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":346,"context_line":"Developer impact"},{"line_number":347,"context_line":"----------------"},{"line_number":348,"context_line":""},{"line_number":349,"context_line":"None, except virt driver maintainers."},{"line_number":350,"context_line":""},{"line_number":351,"context_line":"Upgrade impact"},{"line_number":352,"context_line":"--------------"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_823cbe2f","line":349,"range":{"start_line":349,"start_character":13,"end_line":349,"end_character":37},"updated":"2018-04-11 04:00:58.000000000","message":"okay, but this is pretty huge.  You\u0027ve somewhat explained it above (about how virt drivers need to model their RPs), which is fine, but this section deserves something less dismissive than this.","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":370,"context_line":"Dependencies"},{"line_number":371,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":372,"context_line":""},{"line_number":373,"context_line":"This work builds on reapproval and completion of the `Nested Resource"},{"line_number":374,"context_line":"Providers`_ effort."},{"line_number":375,"context_line":""},{"line_number":376,"context_line":"As it\u0027s said earlier, expressing a NUMA relationship requires `Granular"},{"line_number":377,"context_line":"Resource Request`_ spec."}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_421b06c9","line":374,"range":{"start_line":373,"start_character":0,"end_line":374,"end_character":19},"updated":"2018-04-11 04:00:58.000000000","message":"which at this point is the rocky nested-resource-providers-allocation-candidates blueprint.\n\nhttps://review.openstack.org/#/q/branch:master+topic:bp/nested-resource-providers-allocation-candidates","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"4be5dab69ff69056474bcec79eec8aef45f39517","unresolved":false,"context_lines":[{"line_number":376,"context_line":"As it\u0027s said earlier, expressing a NUMA relationship requires `Granular"},{"line_number":377,"context_line":"Resource Request`_ spec."},{"line_number":378,"context_line":""},{"line_number":379,"context_line":"While we\u0027re commenting the use of a ``PCPU`` resource class, that spec doesn\u0027t"},{"line_number":380,"context_line":"formally depend on `CPU resources`_ spec."},{"line_number":381,"context_line":""},{"line_number":382,"context_line":"Testing"}],"source_content_type":"text/x-rst","patch_set":7,"id":"bf659307_e279fafb","line":379,"range":{"start_line":379,"start_character":61,"end_line":379,"end_character":65},"updated":"2018-04-11 04:00:58.000000000","message":"this","commit_id":"3ec980e9d2a14e3f41eb3bd150f94896418894fe"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"1f662c4003dc9c736949b3a27ec70f80a0c7584d","unresolved":false,"context_lines":[{"line_number":192,"context_line":""},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"where ``sharded\u003dresources1,resources2`` parameter would make sure that the"},{"line_number":195,"context_line":"resources for the both groups would be in separate providers."},{"line_number":196,"context_line":""},{"line_number":197,"context_line":"Eg.:"},{"line_number":198,"context_line":""}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_37b686fc","line":195,"updated":"2018-04-18 14:03:25.000000000","message":"You can track these proposed deltas to the granular spec:\n\nhttps://review.openstack.org/#/c/561717/\n\nis as you\u0027ve described above.  (At the moment, it\u0027s called `separate_providers` rather than `sharded`.)\n\nhttps://review.openstack.org/#/c/560974/\n\nhas \"sharded\" as the default behavior, and you have to do \"any-fit\" spreading by putting the spreadable resources into the un-numbered group and listing their resource classes in a `can_split` queryparam.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"1f662c4003dc9c736949b3a27ec70f80a0c7584d","unresolved":false,"context_lines":[{"line_number":233,"context_line":"The same logic could be done for associated devices that are children of NUMA"},{"line_number":234,"context_line":"nodes:"},{"line_number":235,"context_line":""},{"line_number":236,"context_line":"* If I just want to provide ``N`` vGPUs for an instance, I would just amend"},{"line_number":237,"context_line":"  a flavor and ask for ``resources:VGPU\u003dN``. No NUMA relationship would be"},{"line_number":238,"context_line":"  verified, exactly like the behaviour in Queens where you\u0027re only asking for a"},{"line_number":239,"context_line":"  number of vGPUs."},{"line_number":240,"context_line":""},{"line_number":241,"context_line":"* If I want ``N`` vGPUs that would *possibly* be in the same NUMA node than my"},{"line_number":242,"context_line":"  vCPUs but not that being mandatory, then I\u0027d be writing my flavor with"}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_17cb4272","line":239,"range":{"start_line":236,"start_character":0,"end_line":239,"end_character":18},"updated":"2018-04-18 14:03:25.000000000","message":"Again, this will get all the VGPUs from the same provider.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"1f662c4003dc9c736949b3a27ec70f80a0c7584d","unresolved":false,"context_lines":[{"line_number":245,"context_line":""},{"line_number":246,"context_line":" * If I want ``N`` vGPUs *and* those vGPUs be in the *exact* same NUMA node as"},{"line_number":247,"context_line":"   my ``M`` vCPUs, then I\u0027d pass a new parameter to the Placement API by saying"},{"line_number":248,"context_line":"   ``in_tree:resources1,resources2\u0026resources1:VCPU\u003dM\u0026resources2:VGPU\u003dN``"},{"line_number":249,"context_line":""},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"Optionally configured NUMA resources"}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_d7c4ea61","line":248,"range":{"start_line":248,"start_character":5,"end_line":248,"end_character":12},"updated":"2018-04-18 14:03:25.000000000","message":"I assume this is referring to the \"common subtree\" or \"common ancestor\" idea we\u0027ve discussed.\n\nWe can\u0027t use `in_tree` as we\u0027re already using that to indicate *entire* trees when requesting GET /resource_providers.\n\nAlso, it\u0027s a little more complicated than just listing the request groups that need to have a common ancestor.  We need some way to indicate *where* that common ancestor should be.  (Otherwise, everything in a tree has a common ancestor - the root RP.)  We wanted to do this via a common trait.  So for this example, we might mark each NUMA node with trait NUMA_ROOT, and then use a request like:\n\n  resources1\u003dVCPU:M\n \u0026resources2\u003dVGPU:N\n \u0026common_ancestor_by_trait\u003dgroups:resources1,resources2;trait:NUMA_ROOT\n\nThat syntax is ugly, and will surely be bikeshedded to death before it ever emerges, but it conveys the idea.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":11564,"name":"Chris Dent","email":"cdent@anticdent.org","username":"chdent"},"change_message_id":"4d4a4351693457ecf166af6fd3458e81a33bd590","unresolved":false,"context_lines":[{"line_number":249,"context_line":""},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"Optionally configured NUMA resources"},{"line_number":252,"context_line":"------------------------------------"},{"line_number":253,"context_line":""},{"line_number":254,"context_line":"Given there are NUMA workloads but also non-NUMA workloads, it\u0027s also important"},{"line_number":255,"context_line":"for operators to just have compute nodes accepting the latter."}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_5a7eba6b","line":252,"updated":"2018-04-18 17:08:54.000000000","message":"Glad to see this section. As I was reading through above I was wondering \"what about a numa capable host that we don\u0027t want to use in a numa style?\"","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"1f662c4003dc9c736949b3a27ec70f80a0c7584d","unresolved":false,"context_lines":[{"line_number":303,"context_line":"      +----------+"},{"line_number":304,"context_line":""},{"line_number":305,"context_line":""},{"line_number":306,"context_line":".. note:: Given host NUMA topologies are not virt-specific, the definition of"},{"line_number":307,"context_line":"          the provider tree where each NUMA node is a child RP and where the"},{"line_number":308,"context_line":"          resource classes should be set (either on the root RP or on the"},{"line_number":309,"context_line":"          children) should ideally be done outside of the virt driver codes."}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_57ae9a98","line":306,"range":{"start_line":306,"start_character":16,"end_line":306,"end_character":58},"updated":"2018-04-18 14:03:25.000000000","message":"Still WRONG","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"1f662c4003dc9c736949b3a27ec70f80a0c7584d","unresolved":false,"context_lines":[{"line_number":306,"context_line":".. note:: Given host NUMA topologies are not virt-specific, the definition of"},{"line_number":307,"context_line":"          the provider tree where each NUMA node is a child RP and where the"},{"line_number":308,"context_line":"          resource classes should be set (either on the root RP or on the"},{"line_number":309,"context_line":"          children) should ideally be done outside of the virt driver codes."},{"line_number":310,"context_line":"          That said, it\u0027s acceptable for a first start to define those directly"},{"line_number":311,"context_line":"          in the libvirt module."},{"line_number":312,"context_line":""}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_1790224e","line":309,"range":{"start_line":309,"start_character":38,"end_line":309,"end_character":75},"updated":"2018-04-18 14:03:25.000000000","message":"Still NO","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":11564,"name":"Chris Dent","email":"cdent@anticdent.org","username":"chdent"},"change_message_id":"4d4a4351693457ecf166af6fd3458e81a33bd590","unresolved":false,"context_lines":[{"line_number":308,"context_line":"          resource classes should be set (either on the root RP or on the"},{"line_number":309,"context_line":"          children) should ideally be done outside of the virt driver codes."},{"line_number":310,"context_line":"          That said, it\u0027s acceptable for a first start to define those directly"},{"line_number":311,"context_line":"          in the libvirt module."},{"line_number":312,"context_line":""},{"line_number":313,"context_line":"Alternatives"},{"line_number":314,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_7acffeaf","line":311,"updated":"2018-04-18 17:08:54.000000000","message":"Yeah, have to agree with Eric here. The discovery and structure NUMA stuff is very much dependent on the virt driver. And even if that were not already the case, we would want to preserve the possibility to be able to have flexible handling in future virt drivers.\n\nSo even if there are some virt drivers that might like to share some code for cooking the numa provider tree, it should happen within the confines of the virt driver.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"1f662c4003dc9c736949b3a27ec70f80a0c7584d","unresolved":false,"context_lines":[{"line_number":337,"context_line":"  parameter would accept a list of strings that are the names of the numbered"},{"line_number":338,"context_line":"  request groups to shard with."},{"line_number":339,"context_line":""},{"line_number":340,"context_line":"  A Placement API microversion would accept both of the parameters."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":""},{"line_number":343,"context_line":"Security impact"}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_179ee253","line":340,"updated":"2018-04-18 14:03:25.000000000","message":"Yeah, these should be done via separate blueprints, upon which this one should depend.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":11564,"name":"Chris Dent","email":"cdent@anticdent.org","username":"chdent"},"change_message_id":"4d4a4351693457ecf166af6fd3458e81a33bd590","unresolved":false,"context_lines":[{"line_number":337,"context_line":"  parameter would accept a list of strings that are the names of the numbered"},{"line_number":338,"context_line":"  request groups to shard with."},{"line_number":339,"context_line":""},{"line_number":340,"context_line":"  A Placement API microversion would accept both of the parameters."},{"line_number":341,"context_line":""},{"line_number":342,"context_line":""},{"line_number":343,"context_line":"Security impact"}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_ba3a7695","line":340,"in_reply_to":"9f6a8fd7_179ee253","updated":"2018-04-18 17:08:54.000000000","message":"Agree, especially as there are multiple similar topics in flight.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":6873,"name":"Matt Riedemann","email":"mriedem.os@gmail.com","username":"mriedem"},"change_message_id":"bbbb3063e351af31bf7bf53d4aa757b9c27b3025","unresolved":false,"context_lines":[{"line_number":394,"context_line":"* Scheduler translating flavor extra specs for NUMA properties into Placement"},{"line_number":395,"context_line":"  queries."},{"line_number":396,"context_line":""},{"line_number":397,"context_line":"Dependencies"},{"line_number":398,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":399,"context_line":""},{"line_number":400,"context_line":"This work builds on reapproval and completion of the `Nested Resource"}],"source_content_type":"text/x-rst","patch_set":9,"id":"9f6a8fd7_8d624cad","line":397,"updated":"2018-04-25 14:24:38.000000000","message":"Does this depend on https://review.openstack.org/#/c/559466/ or does that depend on this? Because that spec refers to things mentioned in this spec, like the config [devices]numa_resource_classes config option.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"ed0dd400db85b474b33b0bc2ef1fb3f7cd6c290a","unresolved":false,"context_lines":[{"line_number":394,"context_line":"* Scheduler translating flavor extra specs for NUMA properties into Placement"},{"line_number":395,"context_line":"  queries."},{"line_number":396,"context_line":""},{"line_number":397,"context_line":"Dependencies"},{"line_number":398,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":399,"context_line":""},{"line_number":400,"context_line":"This work builds on reapproval and completion of the `Nested Resource"}],"source_content_type":"text/x-rst","patch_set":9,"id":"5f7c97a3_4e7f13ab","line":397,"in_reply_to":"9f6a8fd7_8d624cad","updated":"2018-05-18 09:10:55.000000000","message":"Shit, probably. I mean, this spec is dependent on the above. I haven\u0027t seen the spec you gave before today, so I\u0027ll try to understand the problem statement and if it needs the above for this spec.","commit_id":"3f3c3cceae107ba70de79aa5e641eabcf6e7d93c"}],"specs/stein/approved/numa-topology-with-rps.rst":[{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":15,"context_line":"the relationship between a root Resource Provider (root RP) ie. a compute node,"},{"line_number":16,"context_line":"and one or more Non-Uniform Memory Access (NUMA) nodes (aka. cells), each of"},{"line_number":17,"context_line":"them having separate resources, like memory or PCI devices."},{"line_number":18,"context_line":""},{"line_number":19,"context_line":"Problem description"},{"line_number":20,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":21,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_2bd8f47c","line":18,"updated":"2018-09-19 10:18:12.000000000","message":"Far as I can see, this is going to depend on the CPU resources spec [1]. Could you either (a) add a note here stating as much or (b) add a note anywhere you reference PCPUs stating that this won\u0027t be available until said spec is completed.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":15,"context_line":"the relationship between a root Resource Provider (root RP) ie. a compute node,"},{"line_number":16,"context_line":"and one or more Non-Uniform Memory Access (NUMA) nodes (aka. cells), each of"},{"line_number":17,"context_line":"them having separate resources, like memory or PCI devices."},{"line_number":18,"context_line":""},{"line_number":19,"context_line":"Problem description"},{"line_number":20,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":21,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_9278a62c","line":18,"in_reply_to":"3f79a3b5_2bd8f47c","updated":"2018-11-16 00:03:26.000000000","message":"originally i think the cpu resouce spec had a dependcy on this one. but in any case yes there is a tight coupleing in both directions. so we have to choose one way to break the cycle.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"43d30827ffe8584d3a435023b5e84345d4e7bbfc","unresolved":false,"context_lines":[{"line_number":21,"context_line":""},{"line_number":22,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":23,"context_line":"policies, CPU pinned instances and memory page sizes. While we would like to"},{"line_number":24,"context_line":"eventually get rid of this filter, the problem is that it\u0027s coupling multiple"},{"line_number":25,"context_line":"high performance features that are not really related to NUMA topologies (like"},{"line_number":26,"context_line":"for example CPU pinning, emulator threads or memory pages) and also checks both"},{"line_number":27,"context_line":"*inventories* of the host with *usage* of CPU and memory page resources by each"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_2bbc9bb9","line":24,"range":{"start_line":24,"start_character":11,"end_line":24,"end_character":33},"updated":"2018-09-17 20:43:22.000000000","message":"So my first thought was (and you mention this a couple of paragraphs down) - can we get rid of it entirely?\n\nWe would still need it for CPU pinning, for instance, because we need to know which specific pCPUs are pinned, not just how many.\n\nBut then thinking about it some more, I don\u0027t think it matters. If an instance has 2 pinned vCPUs in NUMA node 0 (which has, say, 4 CPUs), and a new instance needs 2 pinned vCPUs, it can go in NUMA node 0, and placement doesn\u0027t need to know whether the first instance is using pCPUs 0 and 1, or 2 and 3, or 1 and 3, etc.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":21,"context_line":""},{"line_number":22,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":23,"context_line":"policies, CPU pinned instances and memory page sizes. While we would like to"},{"line_number":24,"context_line":"eventually get rid of this filter, the problem is that it\u0027s coupling multiple"},{"line_number":25,"context_line":"high performance features that are not really related to NUMA topologies (like"},{"line_number":26,"context_line":"for example CPU pinning, emulator threads or memory pages) and also checks both"},{"line_number":27,"context_line":"*inventories* of the host with *usage* of CPU and memory page resources by each"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_0ea3d19c","line":24,"range":{"start_line":24,"start_character":11,"end_line":24,"end_character":33},"in_reply_to":"3f79a3b5_28980c67","updated":"2018-10-01 21:37:23.000000000","message":"\u003e I think you could make it work with a model where only the\n \u003e hypervisor cares about *which* resources are being used, and\n \u003e placement (and the scheduler) only care about *can it fit*.\n\nPrecisely.\n\n \u003e This would be different from what we have today, where the\n \u003e NUMATopologyFilter calculates *which ones* as a way to figure out\n \u003e *can it fit*.\n\nCorrect, and the NUMATopologyFilter promptly throws away the assignment decision it made, relying on the compute node\u0027s Claim object to re-perform that assignment decision and actually store the InstanceNUMATopology object that contains the pinning decision.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"66a66f0bbed67db369257311894d8d9de3456778","unresolved":false,"context_lines":[{"line_number":21,"context_line":""},{"line_number":22,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":23,"context_line":"policies, CPU pinned instances and memory page sizes. While we would like to"},{"line_number":24,"context_line":"eventually get rid of this filter, the problem is that it\u0027s coupling multiple"},{"line_number":25,"context_line":"high performance features that are not really related to NUMA topologies (like"},{"line_number":26,"context_line":"for example CPU pinning, emulator threads or memory pages) and also checks both"},{"line_number":27,"context_line":"*inventories* of the host with *usage* of CPU and memory page resources by each"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_28980c67","line":24,"range":{"start_line":24,"start_character":11,"end_line":24,"end_character":33},"in_reply_to":"3f79a3b5_2bbc9bb9","updated":"2018-09-18 15:29:55.000000000","message":"I think you could make it work with a model where only the hypervisor cares about *which* resources are being used, and placement (and the scheduler) only care about *can it fit*.\n\nThis would be different from what we have today, where the NUMATopologyFilter calculates *which ones* as a way to figure out *can it fit*.\n\nOn the other hand, I\u0027m not sure how much we\u0027d save, because we\u0027d still want the scheduler to know about all the rules that might prevent an instance from fitting a guest NUMA node onto a given host NUMA node.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":21,"context_line":""},{"line_number":22,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":23,"context_line":"policies, CPU pinned instances and memory page sizes. While we would like to"},{"line_number":24,"context_line":"eventually get rid of this filter, the problem is that it\u0027s coupling multiple"},{"line_number":25,"context_line":"high performance features that are not really related to NUMA topologies (like"},{"line_number":26,"context_line":"for example CPU pinning, emulator threads or memory pages) and also checks both"},{"line_number":27,"context_line":"*inventories* of the host with *usage* of CPU and memory page resources by each"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_703a57e6","line":24,"range":{"start_line":24,"start_character":11,"end_line":24,"end_character":33},"in_reply_to":"3f79a3b5_2bbc9bb9","updated":"2018-09-19 10:18:12.000000000","message":"You\u0027ve also got a lot more than just CPU pinning going on here. Anything NUMA\u0027y is handled by this filter, including: NUMA-aware vSwitches, PCI (w/ NUMA policies), hugepages, handling of NUMA topology extra specs, handling of CPU topology extra specs...\n\nI don\u0027t think it\u0027s viable to move all of these to placement yet which means this has to be an iterative thing.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"78eac928b8553451a97f5cc9e2e037612511a70d","unresolved":false,"context_lines":[{"line_number":21,"context_line":""},{"line_number":22,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":23,"context_line":"policies, CPU pinned instances and memory page sizes. While we would like to"},{"line_number":24,"context_line":"eventually get rid of this filter, the problem is that it\u0027s coupling multiple"},{"line_number":25,"context_line":"high performance features that are not really related to NUMA topologies (like"},{"line_number":26,"context_line":"for example CPU pinning, emulator threads or memory pages) and also checks both"},{"line_number":27,"context_line":"*inventories* of the host with *usage* of CPU and memory page resources by each"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_b28a821b","line":24,"range":{"start_line":24,"start_character":11,"end_line":24,"end_character":33},"in_reply_to":"3f79a3b5_4ecb0949","updated":"2018-10-23 01:24:50.000000000","message":"this is on my review list for tomorrow/today but yes as jay said we will always have the numa filter.\n\nplacement will never do cpu assignment.\nas such the numa toplogy filter and the associated blob in the db will always be needed to keep track of which core on each numa node is free.\n\nthat since we dont need to do asignment of hugepages or mempages ingeneral and jsut need a tally of the avalaible mempages of each size per numa node that can be entirley offloaded to placemnet.\n\nit would be nice to merge the pci and numa topology fileter in to a singel filter at some point but ya this wont be going away.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":21,"context_line":""},{"line_number":22,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":23,"context_line":"policies, CPU pinned instances and memory page sizes. While we would like to"},{"line_number":24,"context_line":"eventually get rid of this filter, the problem is that it\u0027s coupling multiple"},{"line_number":25,"context_line":"high performance features that are not really related to NUMA topologies (like"},{"line_number":26,"context_line":"for example CPU pinning, emulator threads or memory pages) and also checks both"},{"line_number":27,"context_line":"*inventories* of the host with *usage* of CPU and memory page resources by each"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_4ecb0949","line":24,"range":{"start_line":24,"start_character":11,"end_line":24,"end_character":33},"in_reply_to":"3f79a3b5_703a57e6","updated":"2018-10-01 21:37:23.000000000","message":"\u003e I don\u0027t think it\u0027s viable to move all of these to placement yet\n \u003e which means this has to be an iterative thing.\n\nCorrect. We\u0027d only be removing the small pieces from NUMATopologyFilter that could be satisfied by a placement query -- and in fact, the NUMATopologyFilter would be able to look at the provider_summaries and allocation_requests parts of the allocation candidates response to determine which NUMA node (resource provider) should be used to satisfy a request.\n\nIn other words, we\u0027ll need to live with the NUMATopologyFilter for the rest of eternity. We shouldn\u0027t even talk about deprecating it, since that ain\u0027t gonna happen.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":46,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-vCPU instance"},{"line_number":47,"context_line":"----------------------------------------------------------------------------"},{"line_number":48,"context_line":""},{"line_number":49,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"},{"line_number":50,"context_line":"Hyper-Threading:"},{"line_number":51,"context_line":""},{"line_number":52,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_d0280b99","line":49,"range":{"start_line":49,"start_character":44,"end_line":49,"end_character":62},"updated":"2018-09-19 10:18:12.000000000","message":"nit: \"two socket, four core\"","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":47,"context_line":"----------------------------------------------------------------------------"},{"line_number":48,"context_line":""},{"line_number":49,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"},{"line_number":50,"context_line":"Hyper-Threading:"},{"line_number":51,"context_line":""},{"line_number":52,"context_line":".. code::"},{"line_number":53,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_b01d4f74","line":50,"range":{"start_line":50,"start_character":0,"end_line":50,"end_character":16},"updated":"2018-09-19 10:18:12.000000000","message":"Wonder if it\u0027s worth adding a glossary to this spec too, to ensure everyone\u0027s on the same page. Hyper Threading is just Intel\u0027s lingo for SMP, after all","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f411d171c272e83c92dc73270db2bbc6c29501fa","unresolved":false,"context_lines":[{"line_number":47,"context_line":"----------------------------------------------------------------------------"},{"line_number":48,"context_line":""},{"line_number":49,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"},{"line_number":50,"context_line":"Hyper-Threading:"},{"line_number":51,"context_line":""},{"line_number":52,"context_line":".. code::"},{"line_number":53,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_38953a3e","line":50,"range":{"start_line":50,"start_character":0,"end_line":50,"end_character":16},"in_reply_to":"3f79a3b5_b01d4f74","updated":"2018-10-24 15:15:47.000000000","message":"Technically, that\u0027s not exactly the same, right?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":68,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":69,"context_line":"I/O-bound of course)."},{"line_number":70,"context_line":"For the moment, if you\u0027re an operator, you can use flavor extra specs to"},{"line_number":71,"context_line":"indicate a wanted NUMA topology for your instance like:"},{"line_number":72,"context_line":""},{"line_number":73,"context_line":".. code::"},{"line_number":74,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_70133766","line":71,"range":{"start_line":71,"start_character":11,"end_line":71,"end_character":17},"updated":"2018-09-19 10:18:12.000000000","message":"desired","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":80,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":81,"context_line":""},{"line_number":82,"context_line":""},{"line_number":83,"context_line":"#2 : As a user, I\u0027d like max throughput for a vCPU/vGPU communication"},{"line_number":84,"context_line":"---------------------------------------------------------------------"},{"line_number":85,"context_line":""},{"line_number":86,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_4874689c","line":83,"range":{"start_line":83,"start_character":40,"end_line":83,"end_character":46},"updated":"2018-10-01 21:37:23.000000000","message":"s/for a/for/","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fc95d3adfb598e7e5019655f469093290611a2b1","unresolved":false,"context_lines":[{"line_number":83,"context_line":"#2 : As a user, I\u0027d like max throughput for a vCPU/vGPU communication"},{"line_number":84,"context_line":"---------------------------------------------------------------------"},{"line_number":85,"context_line":""},{"line_number":86,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"},{"line_number":87,"context_line":"has some PCI devices, like a NVidia Tesla M10 device."},{"line_number":88,"context_line":"If that architecture also has a dual-IOH controller for PCIe devices, then"},{"line_number":89,"context_line":"a specific PCIe device will have some NUMA affinity to one of the two memory"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_adbfd1fa","line":86,"range":{"start_line":86,"start_character":47,"end_line":86,"end_character":54},"updated":"2018-09-11 20:37:03.000000000","message":"nit:numa nodes","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":83,"context_line":"#2 : As a user, I\u0027d like max throughput for a vCPU/vGPU communication"},{"line_number":84,"context_line":"---------------------------------------------------------------------"},{"line_number":85,"context_line":""},{"line_number":86,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"},{"line_number":87,"context_line":"has some PCI devices, like a NVidia Tesla M10 device."},{"line_number":88,"context_line":"If that architecture also has a dual-IOH controller for PCIe devices, then"},{"line_number":89,"context_line":"a specific PCIe device will have some NUMA affinity to one of the two memory"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_0b117853","line":86,"range":{"start_line":86,"start_character":47,"end_line":86,"end_character":54},"in_reply_to":"3f79a3b5_adbfd1fa","updated":"2018-09-19 10:18:12.000000000","message":"This is fine as is as the host presumably has two physical sockets. Might call out socket vs. node in the aforementioned glossary though.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":85,"context_line":""},{"line_number":86,"context_line":"Say now that the above NUMA topology with a \"2-sockets, 4 cores no HT\" host"},{"line_number":87,"context_line":"has some PCI devices, like a NVidia Tesla M10 device."},{"line_number":88,"context_line":"If that architecture also has a dual-IOH controller for PCIe devices, then"},{"line_number":89,"context_line":"a specific PCIe device will have some NUMA affinity to one of the two memory"},{"line_number":90,"context_line":"controllers, like :"},{"line_number":91,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_6b56ec07","line":88,"range":{"start_line":88,"start_character":37,"end_line":88,"end_character":40},"updated":"2018-09-19 10:18:12.000000000","message":"First time I\u0027d ever seen this initialism. Seems to be I/O Hub [1]. Maybe call that out here or in the glossary?\n\n[1] http://www.qdpma.com/SystemArchitecture/SystemArchitecture_QPI.html","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":103,"context_line":"CPU1 and CPU2, but not with CPU3 and CPU4 which have a separate memory mapping"},{"line_number":104,"context_line":"region."},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"In that case, imagine I\u0027d like to use a CUDA library by running parallel"},{"line_number":107,"context_line":"threads in my instance for computing things like Artificial Intelligence"},{"line_number":108,"context_line":"calculations, or coin mining (heh), I\u0027d love to make sure my GPU devices are"},{"line_number":109,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_cb44e04a","line":106,"range":{"start_line":106,"start_character":22,"end_line":106,"end_character":25},"updated":"2018-09-19 10:18:12.000000000","message":"the user would","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":104,"context_line":"region."},{"line_number":105,"context_line":""},{"line_number":106,"context_line":"In that case, imagine I\u0027d like to use a CUDA library by running parallel"},{"line_number":107,"context_line":"threads in my instance for computing things like Artificial Intelligence"},{"line_number":108,"context_line":"calculations, or coin mining (heh), I\u0027d love to make sure my GPU devices are"},{"line_number":109,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."},{"line_number":110,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_eb419c37","line":107,"range":{"start_line":107,"start_character":11,"end_line":107,"end_character":13},"updated":"2018-09-19 10:18:12.000000000","message":"their","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":105,"context_line":""},{"line_number":106,"context_line":"In that case, imagine I\u0027d like to use a CUDA library by running parallel"},{"line_number":107,"context_line":"threads in my instance for computing things like Artificial Intelligence"},{"line_number":108,"context_line":"calculations, or coin mining (heh), I\u0027d love to make sure my GPU devices are"},{"line_number":109,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."},{"line_number":110,"context_line":""},{"line_number":111,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_e8a9b4dc","line":108,"range":{"start_line":108,"start_character":12,"end_line":108,"end_character":34},"updated":"2018-10-01 21:37:23.000000000","message":"I would just remove this.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":105,"context_line":""},{"line_number":106,"context_line":"In that case, imagine I\u0027d like to use a CUDA library by running parallel"},{"line_number":107,"context_line":"threads in my instance for computing things like Artificial Intelligence"},{"line_number":108,"context_line":"calculations, or coin mining (heh), I\u0027d love to make sure my GPU devices are"},{"line_number":109,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."},{"line_number":110,"context_line":""},{"line_number":111,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ab796493","line":108,"range":{"start_line":108,"start_character":36,"end_line":108,"end_character":39},"updated":"2018-09-19 10:18:12.000000000","message":"they\u0027d","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":117,"context_line":"NUMA nodes being nested Resource Providers - take #1"},{"line_number":118,"context_line":"----------------------------------------------------"},{"line_number":119,"context_line":""},{"line_number":120,"context_line":"Given virt drivers can pass a provider tree to the compute service, for example"},{"line_number":121,"context_line":"the libvirt driver could create a tree for a (2-socket, 16 cores) NUMA topology"},{"line_number":122,"context_line":"having one GPU device and one SRIOV PF like this:"},{"line_number":123,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_287d6c6c","line":120,"range":{"start_line":120,"start_character":0,"end_line":120,"end_character":43},"updated":"2018-10-01 21:37:23.000000000","message":"The resource tracker passes the provider tree (with the compute node provider as the root of the tree) to the virt driver, not the other way around. The virt driver may add child nodes to the provider tree passed to it by the resource tracker.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fc95d3adfb598e7e5019655f469093290611a2b1","unresolved":false,"context_lines":[{"line_number":118,"context_line":"----------------------------------------------------"},{"line_number":119,"context_line":""},{"line_number":120,"context_line":"Given virt drivers can pass a provider tree to the compute service, for example"},{"line_number":121,"context_line":"the libvirt driver could create a tree for a (2-socket, 16 cores) NUMA topology"},{"line_number":122,"context_line":"having one GPU device and one SRIOV PF like this:"},{"line_number":123,"context_line":""},{"line_number":124,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_4dff1dab","line":121,"range":{"start_line":121,"start_character":48,"end_line":121,"end_character":54},"updated":"2018-09-11 20:37:03.000000000","message":"numa node","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":118,"context_line":"----------------------------------------------------"},{"line_number":119,"context_line":""},{"line_number":120,"context_line":"Given virt drivers can pass a provider tree to the compute service, for example"},{"line_number":121,"context_line":"the libvirt driver could create a tree for a (2-socket, 16 cores) NUMA topology"},{"line_number":122,"context_line":"having one GPU device and one SRIOV PF like this:"},{"line_number":123,"context_line":""},{"line_number":124,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_946b6eee","line":121,"range":{"start_line":121,"start_character":25,"end_line":121,"end_character":39},"updated":"2018-10-01 21:37:23.000000000","message":"s/create a tree/create child providers for each of 2 sockets representing separate NUMA nodes/","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"c3b9e6f939d284daa8d222eae2b2a22adcd5f59d","unresolved":false,"context_lines":[{"line_number":129,"context_line":"                      +-----------------+"},{"line_number":130,"context_line":"                      /                 \\"},{"line_number":131,"context_line":"   +------------------+                 +-----------------+"},{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_c5068886","line":132,"range":{"start_line":132,"start_character":4,"end_line":132,"end_character":18},"updated":"2018-09-12 20:14:03.000000000","message":"we just said at the PTG there will be a naming convention.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"43d30827ffe8584d3a435023b5e84345d4e7bbfc","unresolved":false,"context_lines":[{"line_number":131,"context_line":"   +------------------+                 +-----------------+"},{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_0b4bdf86","line":134,"range":{"start_line":134,"start_character":61,"end_line":134,"end_character":70},"updated":"2018-09-17 20:43:22.000000000","message":"Are we doing shared and dedicated CPUs on the same host/RP?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":131,"context_line":"   +------------------+                 +-----------------+"},{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_0bc41891","line":134,"range":{"start_line":134,"start_character":61,"end_line":134,"end_character":70},"in_reply_to":"3f79a3b5_0b4bdf86","updated":"2018-09-19 10:18:12.000000000","message":"Yeah, it needs that spec [1]. I\u0027ve called out the need to highlight the need for this spec above.\n\n[1] https://review.openstack.org/#/c/555081/","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"1d7222c0ed660d8c7569c12429c2c939b518f01b","unresolved":false,"context_lines":[{"line_number":131,"context_line":"   +------------------+                 +-----------------+"},{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_53ed0923","line":134,"range":{"start_line":134,"start_character":61,"end_line":134,"end_character":70},"in_reply_to":"3f79a3b5_0b4bdf86","updated":"2018-09-17 22:53:58.000000000","message":"Yes, that\u0027s covered in Jay\u0027s spec.  There\u0027s a gotcha specifically around 4KB pages and floating vs numa-affined instances as highlighted in my bug mentioned immediately below.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"19b572a55a63b35c2f200300f0e321d73eaf489e","unresolved":false,"context_lines":[{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"},{"line_number":138,"context_line":"    +---------------+                     +--------------------+"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_7fb01141","line":135,"range":{"start_line":135,"start_character":0,"end_line":135,"end_character":59},"updated":"2018-09-17 16:55:12.000000000","message":"There is a problem with representing 4KB memory pages as purely per-NUMA-node resources.\n\nI\u0027ve written it up at https://bugs.launchpad.net/nova/+bug/1792985\n\nI think this implies that either we go back to restricting such instances to a single NUMA node, or else we must not use \"strict\" affinity for the default page size.  (And maybe the default page size (4KB on x86) and VCPU resources should be modeled as compute node resources rather than NUMA node resources.)","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"1d7222c0ed660d8c7569c12429c2c939b518f01b","unresolved":false,"context_lines":[{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"},{"line_number":138,"context_line":"    +---------------+                     +--------------------+"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_f3ed3524","line":135,"range":{"start_line":135,"start_character":0,"end_line":135,"end_character":59},"in_reply_to":"3f79a3b5_7fb01141","updated":"2018-09-17 22:53:58.000000000","message":"Sean-k-mooney mentioned one other option, don\u0027t allow numa-floating and numa-pinned instances on the same numa node.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"},{"line_number":138,"context_line":"    +---------------+                     +--------------------+"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ef2e8f8b","line":135,"range":{"start_line":135,"start_character":0,"end_line":135,"end_character":59},"in_reply_to":"3f79a3b5_abec0429","updated":"2018-10-01 21:37:23.000000000","message":"\u003e That goes against what\u0027s been outlined in Jay\u0027s spec [1], which\n \u003e would allow dedicated and shared CPU instances to like side by\n \u003e side. Perhaps we could enable strict only if \u0027cpu_shared_set\u0027 is\n \u003e not defined for the host, but that feels kind of magic.\n \u003e \n \u003e [1] https://review.openstack.org/#/c/555081/\n\nYeah, that\u0027s too magical IMHO.\n\nI\u0027d prefer to just leave a warning in the docs as we have always done, possibly adding something to the effect of: \"If you do choose to mix non-NUMA-pinned instances with NUMA-pinned instances on the same compute host, you might want to consider increasing the CONF.host_reserved_memory_mb setting\" or something like that?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"eca9c899e1784e3d8f13b68a40a9d2620a91a18c","unresolved":false,"context_lines":[{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"},{"line_number":138,"context_line":"    +---------------+                     +--------------------+"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_13a3e300","line":135,"range":{"start_line":135,"start_character":0,"end_line":135,"end_character":59},"in_reply_to":"3f79a3b5_ef2e8f8b","updated":"2018-10-02 15:08:27.000000000","message":"To be totally reliable, you\u0027d need to reserve an good bit of memory.\n\nThere are deterministic solutions, and they\u0027re not too complicated.  Some possible solutions are:\n1) Remove the \"strict\" memory tuning in the libvirt XML.  This would only affect 4K pages, the hugepages would still be strictly affined.  Memory would still largely be affined but if it can\u0027t be allocated on the local node we don\u0027t fail to boot the instance.\n2) Restrict floating instances to a single NUMA node rather than let them float across the whole host.\n3) Add a warning saying that all the host CPUs on a given host numa node should be in either \"cpu_shared_set\" or \"cpu_dedicated_set\", but not split across both.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":132,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":133,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":134,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":135,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"},{"line_number":138,"context_line":"    +---------------+                     +--------------------+"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_abec0429","line":135,"range":{"start_line":135,"start_character":0,"end_line":135,"end_character":59},"in_reply_to":"3f79a3b5_f3ed3524","updated":"2018-09-19 10:18:12.000000000","message":"That goes against what\u0027s been outlined in Jay\u0027s spec [1], which would allow dedicated and shared CPU instances to like side by side. Perhaps we could enable strict only if \u0027cpu_shared_set\u0027 is not defined for the host, but that feels kind of magic.\n\n[1] https://review.openstack.org/#/c/555081/","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"c3b9e6f939d284daa8d222eae2b2a22adcd5f59d","unresolved":false,"context_lines":[{"line_number":136,"context_line":"   +------------------+                 +-----------------+"},{"line_number":137,"context_line":"            |                                 |"},{"line_number":138,"context_line":"    +---------------+                     +--------------------+"},{"line_number":139,"context_line":"    | \u003cGPU_TYPE\u003e    |                     | \u003cPHYS_FUNC_PCI_ID\u003e |"},{"line_number":140,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":141,"context_line":"    +---------------+                     +--------------------+"},{"line_number":142,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_c51448a2","line":139,"range":{"start_line":139,"start_character":6,"end_line":139,"end_character":16},"updated":"2018-09-12 20:14:03.000000000","message":"ditto","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fc95d3adfb598e7e5019655f469093290611a2b1","unresolved":false,"context_lines":[{"line_number":140,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":141,"context_line":"    +---------------+                     +--------------------+"},{"line_number":142,"context_line":""},{"line_number":143,"context_line":"The Resource Providers would be named by the virt driver, so they could be"},{"line_number":144,"context_line":"different between for example libvirt or Xen. Above is just an example for"},{"line_number":145,"context_line":"libvirt, but let\u0027s not rat hole here about those names for the moment."},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_2dbe61d4","line":144,"range":{"start_line":143,"start_character":58,"end_line":144,"end_character":44},"updated":"2018-09-11 20:37:03.000000000","message":"i think this will be problematic if an external service like cyborg or neutron needs t create child resouce providers beneth the numa nodes created by the virt driver.\n\ncan we define a standard nameing convetion that will be used for all services.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":140,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":141,"context_line":"    +---------------+                     +--------------------+"},{"line_number":142,"context_line":""},{"line_number":143,"context_line":"The Resource Providers would be named by the virt driver, so they could be"},{"line_number":144,"context_line":"different between for example libvirt or Xen. Above is just an example for"},{"line_number":145,"context_line":"libvirt, but let\u0027s not rat hole here about those names for the moment."},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ab3ac494","line":144,"range":{"start_line":143,"start_character":58,"end_line":144,"end_character":44},"in_reply_to":"3f79a3b5_2dbe61d4","updated":"2018-09-19 10:18:12.000000000","message":"+1 I think we _do_ need to rat hole here :) Let\u0027s just make sure said hole is not allowed to get too deep","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":140,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":141,"context_line":"    +---------------+                     +--------------------+"},{"line_number":142,"context_line":""},{"line_number":143,"context_line":"The Resource Providers would be named by the virt driver, so they could be"},{"line_number":144,"context_line":"different between for example libvirt or Xen. Above is just an example for"},{"line_number":145,"context_line":"libvirt, but let\u0027s not rat hole here about those names for the moment."},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_0f5aeb1c","line":144,"range":{"start_line":143,"start_character":58,"end_line":144,"end_character":44},"in_reply_to":"3f79a3b5_ab3ac494","updated":"2018-10-01 21:37:23.000000000","message":"My suggestion was to have the following naming convention:\n\n \u003cPARENT_RP_NAME\u003e_\u003cDEVICE_ADDRESS\u003e\n\nwhich is unambiguous and predictable.\n\nThe trick is what should go in \u003cDEVICE_ADDRESS\u003e for a NUMA node? NUMA nodes don\u0027t have traditional addresses like PCI devices do, so we\u0027d need to \"fake\" a device address for each NUMA node.\n\nI think something like \"NUMA{NODE_ID}\" should be fine (which is I believe what Sylvain suggested.\n\nThat would mean that, if we had a 2-NUMA-node compute host with hypervisor_hostname of \"cn1\" and 2 SR-IOV PFs, each affined to one NUMA node, we might have 5 providers arranged in a tree with the following unambiguous and predictable names:\n\ncn1\n cn1_NUMA0\n  cn1_NUMA0_0000:3f:00.1\n cn1_NUMA1\n  cn1_NUMA1_0000:3a:00.1\n\nThat said, all an external system needs to do is:\n\n GET /resource_providers?in_tree\u003dcn1\n\nand they will get all the child providers under the root compute node provider. If all the external agent needs to do is add a child resource provider to the appropriate child provider of the root compute node, it is easy enough to figure out where to add that child to by examining the traits and inventories of the provider...","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":145,"context_line":"libvirt, but let\u0027s not rat hole here about those names for the moment."},{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":148,"context_line":"classes :"},{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_8b5ee8e6","line":148,"range":{"start_line":148,"start_character":0,"end_line":148,"end_character":9},"updated":"2018-09-19 10:18:12.000000000","message":"nit: You need a line break after this","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":148,"context_line":"classes :"},{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""},{"line_number":152,"context_line":"A third resource class that we call ``PCPU`` could be there for dedicated CPU"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_0fc0ab49","line":149,"range":{"start_line":149,"start_character":8,"end_line":149,"end_character":53},"updated":"2018-10-01 21:37:23.000000000","message":"Might be worth including PCPU here when discussing dedicated guest processor resources and VCPU for discussing shared guest processor resources.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"43d30827ffe8584d3a435023b5e84345d4e7bbfc","unresolved":false,"context_lines":[{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":148,"context_line":"classes :"},{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""},{"line_number":152,"context_line":"A third resource class that we call ``PCPU`` could be there for dedicated CPU"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_4b547722","line":149,"range":{"start_line":149,"start_character":54,"end_line":149,"end_character":62},"updated":"2018-09-17 20:43:22.000000000","message":"This may be going too deep too fast, but wouldn\u0027t we need to model thread siblings so that we can support the \u0027require\u0027 thread policy?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":146,"context_line":""},{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":148,"context_line":"classes :"},{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""},{"line_number":152,"context_line":"A third resource class that we call ``PCPU`` could be there for dedicated CPU"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ab63a4b0","line":149,"range":{"start_line":149,"start_character":2,"end_line":149,"end_character":6},"updated":"2018-09-19 10:18:12.000000000","message":"``literal``","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"19b572a55a63b35c2f200300f0e321d73eaf489e","unresolved":false,"context_lines":[{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":148,"context_line":"classes :"},{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""},{"line_number":152,"context_line":"A third resource class that we call ``PCPU`` could be there for dedicated CPU"},{"line_number":153,"context_line":"resources. See the `CPU resources`_ spec for more details about that"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_dce4d3aa","line":150,"range":{"start_line":150,"start_character":0,"end_line":150,"end_character":59},"updated":"2018-09-17 16:55:12.000000000","message":"Do we have a plan for representing the various page sizes?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":147,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":148,"context_line":"classes :"},{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""},{"line_number":152,"context_line":"A third resource class that we call ``PCPU`` could be there for dedicated CPU"},{"line_number":153,"context_line":"resources. See the `CPU resources`_ spec for more details about that"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_6fd6ff28","line":150,"range":{"start_line":150,"start_character":0,"end_line":150,"end_character":59},"in_reply_to":"3f79a3b5_dce4d3aa","updated":"2018-10-01 21:37:23.000000000","message":"Good question. Way back when I\u0027d proposed adding resource classes for various page sizes:\n\nhttps://review.openstack.org/#/c/442718/\n\nMight be worth resurrecting that corpse.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":149,"context_line":"* VCPU: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":150,"context_line":"* MEMORY_MB: for telling how much memory the NUMA node has."},{"line_number":151,"context_line":""},{"line_number":152,"context_line":"A third resource class that we call ``PCPU`` could be there for dedicated CPU"},{"line_number":153,"context_line":"resources. See the `CPU resources`_ spec for more details about that"},{"line_number":154,"context_line":"specific resource type."},{"line_number":155,"context_line":""},{"line_number":156,"context_line":"The root Resource Provider (ie. the compute node) would only provide resources"},{"line_number":157,"context_line":"for classes that are not NUMA-related."}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_2b6f9492","line":154,"range":{"start_line":152,"start_character":0,"end_line":154,"end_character":23},"updated":"2018-09-19 10:18:12.000000000","message":"Move this into a \u0027.. note\u0027, like Jay has done for dependencies to this spec in his [1]\n\n[1] https://review.openstack.org/#/c/555081/","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":156,"context_line":"The root Resource Provider (ie. the compute node) would only provide resources"},{"line_number":157,"context_line":"for classes that are not NUMA-related."},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Each PCI device (like a physical GPU device) would then be a child where each"},{"line_number":160,"context_line":"one would have specific resource classes. For example:"},{"line_number":161,"context_line":""},{"line_number":162,"context_line":"* GPU devices have a specific VGPU resource class that counts the number of"},{"line_number":163,"context_line":"  virtual GPUs it can create."}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_2f924759","line":160,"range":{"start_line":159,"start_character":0,"end_line":160,"end_character":41},"updated":"2018-10-01 21:37:23.000000000","message":"This is only for NUMA-affined resources, right? Might be worth clarifying you are referring here only to NUMA-affined things?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":161,"context_line":""},{"line_number":162,"context_line":"* GPU devices have a specific VGPU resource class that counts the number of"},{"line_number":163,"context_line":"  virtual GPUs it can create."},{"line_number":164,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":165,"context_line":"  it can create."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ab0884d8","line":164,"range":{"start_line":164,"start_character":2,"end_line":164,"end_character":28},"updated":"2018-09-19 10:18:12.000000000","message":"s/a/An/\n\nor\n\ns/a SR-IOV physical function/SR-IOV physical functions/","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"43d30827ffe8584d3a435023b5e84345d4e7bbfc","unresolved":false,"context_lines":[{"line_number":164,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":165,"context_line":"  it can create."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":168,"context_line":"          that needs a separate discussion on how to provide that feature using"},{"line_number":169,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":170,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ab098b31","line":167,"updated":"2018-09-17 20:43:22.000000000","message":"Yeah, fair enough, but we need to at least keep that in the back of our heads, because it\u0027s heavily related. I like the previously discussed idea of modelling them as pages (of varying sizes if needed, but I suspect any one host will only have one size), counting units of those, and removing MEMORY_MB from NUMA nodes that have hugepages.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"1d7222c0ed660d8c7569c12429c2c939b518f01b","unresolved":false,"context_lines":[{"line_number":164,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":165,"context_line":"  it can create."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":168,"context_line":"          that needs a separate discussion on how to provide that feature using"},{"line_number":169,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":170,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_d36fd98f","line":167,"in_reply_to":"3f79a3b5_ab098b31","updated":"2018-09-17 22:53:58.000000000","message":"As a counter-example, we currently have hosts with a mix of 4K, 2MB, and 1GB pages.  (Mostly useful for small clusters.)  But we can hash it out in that separate discussion.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":164,"context_line":"* a SR-IOV physical function could count the number of virtual functions"},{"line_number":165,"context_line":"  it can create."},{"line_number":166,"context_line":""},{"line_number":167,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":168,"context_line":"          that needs a separate discussion on how to provide that feature using"},{"line_number":169,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":170,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_8b2ca84a","line":167,"in_reply_to":"3f79a3b5_d36fd98f","updated":"2018-09-19 10:18:12.000000000","message":"I personally don\u0027t think these should be out of scope as the effort to migrate afterwards seems like it would be non-trivial, but I won\u0027t block on that. However, what I would like to see is rather than us migrating the MEMORY_MB option and continuing referring to memory in terms of MB, we start referring to memory in terms of page sizes. Chris mentions 4K, 2MB and 1GB, but I\u0027m sure other platforms support other page sizes so this would need to be generic.\n\nI might go draft a spec on this.\n\nUPDATE: Yup, Power supports (supported?) 4 KB, 64 KB, 16 MB, and 16 GB [1]. Definitely needs some work here.\n\n[1] https://www.ibm.com/developerworks/community/blogs/RedbooksVetter/entry/the_advantage_of_the_correct_power_systems_page_sizes?lang\u003den","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":169,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":170,"context_line":""},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"Asking for NUMA resources tied to a NUMA node or not"},{"line_number":173,"context_line":"----------------------------------------------------"},{"line_number":174,"context_line":""},{"line_number":175,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":176,"context_line":"resources using existing flavors that would be translated into specific"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_abe084cd","line":173,"range":{"start_line":172,"start_character":0,"end_line":173,"end_character":52},"updated":"2018-09-19 10:18:12.000000000","message":"As noted below, I would much rather we drop the older \u0027hw:numa_nodes\u0027 extra spec and related extra specs. In line with that, I think the order of all of these examples should be reversed. Instead of saying,\n\n  If you had extra spec ``--property hw:numa_nodes\u003dN``,\n  you would get N NUMA nodes\n  which would look like ``resources1:VCPU:1\u0026...``\n\nsay\n\n  If you wanted N NUMA nodes\n  you would request ``resources1:VCPU:1\u0026...`\n  which would be analogous to the old ``hw:numa_nodes\u003dN``\n\nIf that makes sense?\n\nAlso, either drop ``--property`` and refer to just the extra specs or use it consistently, meaning ``resources1\u003dVCPU:1`` would become ``--property resources1:VCPU:1`` and so forth (right?)","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"d7f13a8e92454890a60836b5b41e7989c82b68e1","unresolved":false,"context_lines":[{"line_number":169,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":170,"context_line":""},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"Asking for NUMA resources tied to a NUMA node or not"},{"line_number":173,"context_line":"----------------------------------------------------"},{"line_number":174,"context_line":""},{"line_number":175,"context_line":"Back to the usecase #1, it would then be possible to ask for specific NUMA"},{"line_number":176,"context_line":"resources using existing flavors that would be translated into specific"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_6f7d9f1c","line":173,"range":{"start_line":172,"start_character":0,"end_line":173,"end_character":52},"in_reply_to":"3f79a3b5_abe084cd","updated":"2018-10-01 21:37:23.000000000","message":"++ to what stephenfin writes.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":176,"context_line":"resources using existing flavors that would be translated into specific"},{"line_number":177,"context_line":"Placement resource queries."},{"line_number":178,"context_line":""},{"line_number":179,"context_line":"Since the extra spec property ``--property hw:numa_nodes\u003dN`` (where N is an"},{"line_number":180,"context_line":"integer) asks for sharding the vCPUs threads between N NUMA nodes, then"},{"line_number":181,"context_line":"the scheduler would translate that into a numbered request group query like:"},{"line_number":182,"context_line":""},{"line_number":183,"context_line":"  ``resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1``"},{"line_number":184,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_6b834c04","line":181,"range":{"start_line":179,"start_character":0,"end_line":181,"end_character":76},"updated":"2018-09-19 10:18:12.000000000","message":"Personally, I think this extra spec should be deprecated and we should migrate away from it in the future, similar to what we\u0027re planning to do with \u0027hw:cpu_policy\u0027 in Jay\u0027s spec [1]. I\u0027m OK with providing this shim for a release though, allowing us time to use online migrations to migrate the flavor extra specs to the new coolness.\n\n[1] https://review.openstack.org/#/c/555081/18/specs/stein/approved/cpu-resources.rst@321","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":180,"context_line":"integer) asks for sharding the vCPUs threads between N NUMA nodes, then"},{"line_number":181,"context_line":"the scheduler would translate that into a numbered request group query like:"},{"line_number":182,"context_line":""},{"line_number":183,"context_line":"  ``resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1``"},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"As the Placement API now supports since 1.25 microversion the ask to shard"},{"line_number":186,"context_line":"resources, we would amend the above by :"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_8bd10832","line":183,"range":{"start_line":183,"start_character":2,"end_line":183,"end_character":4},"updated":"2018-09-19 10:18:12.000000000","message":"nit: Drop these and replace the \u0027:\u0027 above with \u0027::\u0027. Ditto for the rest of the examples below.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":182,"context_line":""},{"line_number":183,"context_line":"  ``resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1``"},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"As the Placement API now supports since 1.25 microversion the ask to shard"},{"line_number":186,"context_line":"resources, we would amend the above by :"},{"line_number":187,"context_line":""},{"line_number":188,"context_line":"  ``group_policy\u003disolate``"},{"line_number":189,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_2bcc745f","line":186,"range":{"start_line":185,"start_character":21,"end_line":186,"end_character":9},"updated":"2018-09-19 10:18:12.000000000","message":"nit: added the ability to shard resources in the 1.25 microversion","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":187,"context_line":""},{"line_number":188,"context_line":"  ``group_policy\u003disolate``"},{"line_number":189,"context_line":""},{"line_number":190,"context_line":"Eg.:"},{"line_number":191,"context_line":""},{"line_number":192,"context_line":"* for a flavor of 8 VCPUs and hw:numa_nodes\u003d2, it would translate into:"},{"line_number":193,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_abbf24ce","line":190,"range":{"start_line":190,"start_character":0,"end_line":190,"end_character":4},"updated":"2018-09-19 10:18:12.000000000","message":"For example:","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":189,"context_line":""},{"line_number":190,"context_line":"Eg.:"},{"line_number":191,"context_line":""},{"line_number":192,"context_line":"* for a flavor of 8 VCPUs and hw:numa_nodes\u003d2, it would translate into:"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"    ``group_policy\u003disolate\u0026resources1:VCPU\u003d4\u0026resources2:VCPU\u003d4``"},{"line_number":195,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_cbc38057","line":192,"range":{"start_line":192,"start_character":30,"end_line":192,"end_character":45},"updated":"2018-09-19 10:18:12.000000000","message":"``literal``","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":197,"context_line":""},{"line_number":198,"context_line":"    ``resources1:VCPU\u003d8``"},{"line_number":199,"context_line":""},{"line_number":200,"context_line":"For the other related property ``hw:numa_cpus.N\u003dY``, it would just"},{"line_number":201,"context_line":"not calculate the number of VCPUs to ask for the specific numbered request"},{"line_number":202,"context_line":"group, but rather just directly use that value (here, Y)."},{"line_number":203,"context_line":"Eg.:"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"* for a flavor of 8 VCPUs with extra specs set with"},{"line_number":206,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_cpus.1\u003d2,3,4,5,6,7``,"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_8b04a87f","line":203,"range":{"start_line":200,"start_character":0,"end_line":203,"end_character":4},"updated":"2018-09-19 10:18:12.000000000","message":"Again, let\u0027s just deprecate this and provide a minimal shim for a single release","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":212,"context_line":"which MEMORY_MB resource class."},{"line_number":213,"context_line":"Eg.:"},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"* for a flavor of 4GB RAM with extra specs set to"},{"line_number":216,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_mem:0\u003d1024\u0026hw:numa_mem:1\u003d3072``, then it would"},{"line_number":217,"context_line":"  translate the Placement query into"},{"line_number":218,"context_line":"  ``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":219,"context_line":""},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"Asking for NUMA-associated devices"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_cbedc0b8","line":218,"range":{"start_line":215,"start_character":0,"end_line":218,"end_character":78},"updated":"2018-09-19 10:18:12.000000000","message":"Again, flip the order of this. Instead of\n\n  old world\n  expected result\n  new world equivalent\n\nsay\n\n  expected result\n  new world\n  old world equivalent","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"818b172a26998448150d02297040f787f0696376","unresolved":false,"context_lines":[{"line_number":218,"context_line":"  ``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":219,"context_line":""},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"Asking for NUMA-associated devices"},{"line_number":222,"context_line":"----------------------------------"},{"line_number":223,"context_line":""},{"line_number":224,"context_line":"The same logic could be done for associated devices that are children of NUMA"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_bd06dc20","line":221,"updated":"2018-09-14 15:59:27.000000000","message":"we said at the PTG we\u0027re going to split the spec into two with the NUMA affinity be a separate spec.\n\nAdd also a note about PCPU resource class that would be on children.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"76594c6ac7823fc79bece4f005761844ef96afa2","unresolved":false,"context_lines":[{"line_number":218,"context_line":"  ``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":219,"context_line":""},{"line_number":220,"context_line":""},{"line_number":221,"context_line":"Asking for NUMA-associated devices"},{"line_number":222,"context_line":"----------------------------------"},{"line_number":223,"context_line":""},{"line_number":224,"context_line":"The same logic could be done for associated devices that are children of NUMA"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_6b34ac49","line":221,"in_reply_to":"3f79a3b5_bd06dc20","updated":"2018-09-19 10:18:12.000000000","message":"(I stopped reading here as it looks like a lot of the below needs rework)","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"e68229abd357f667327d1215a3cb8544cdafaf15","unresolved":false,"context_lines":[{"line_number":229,"context_line":"  behaviour we have as those resources would necessarly be satisfied by a"},{"line_number":230,"context_line":"  single Resource Provider (here, the GPU type). That said, Placement API would"},{"line_number":231,"context_line":"  only return child RPs that would have both vGPUs and enough VCPUs (not"},{"line_number":232,"context_line":"  balancing over multiple NUMA nodes)"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"* If I want ``N`` vGPUs that would *possibly* be in the same NUMA node than my"},{"line_number":235,"context_line":"  vCPUs but not that being mandatory, then I\u0027d be writing my flavor with"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_4bc4c941","line":232,"updated":"2018-09-12 21:30:39.000000000","message":"this is wrong : the right syntax has to be ``group_policy\u003dnone\u0026resources1:VCPU\u003dM\u0026resources2:VGPU\u003dN``.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"e68229abd357f667327d1215a3cb8544cdafaf15","unresolved":false,"context_lines":[{"line_number":234,"context_line":"* If I want ``N`` vGPUs that would *possibly* be in the same NUMA node than my"},{"line_number":235,"context_line":"  vCPUs but not that being mandatory, then I\u0027d be writing my flavor with"},{"line_number":236,"context_line":"  ``group_policy\u003dnone\u0026resources1:VCPU\u003dM\u0026resources2:VGPU\u003dN``."},{"line_number":237,"context_line":""},{"line_number":238,"context_line":""},{"line_number":239,"context_line":"Optionally configured NUMA resources"},{"line_number":240,"context_line":"------------------------------------"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_cbdd3914","line":237,"updated":"2018-09-12 21:30:39.000000000","message":"that is incorrect, we need a subtree thingy","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"e68229abd357f667327d1215a3cb8544cdafaf15","unresolved":false,"context_lines":[{"line_number":235,"context_line":"  vCPUs but not that being mandatory, then I\u0027d be writing my flavor with"},{"line_number":236,"context_line":"  ``group_policy\u003dnone\u0026resources1:VCPU\u003dM\u0026resources2:VGPU\u003dN``."},{"line_number":237,"context_line":""},{"line_number":238,"context_line":""},{"line_number":239,"context_line":"Optionally configured NUMA resources"},{"line_number":240,"context_line":"------------------------------------"},{"line_number":241,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_6be425e1","line":238,"updated":"2018-09-12 21:30:39.000000000","message":"missing the third case : I want on the same NUMA node","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8768,"name":"Chris Friesen","email":"chris.friesen@windriver.com","username":"cbf123"},"change_message_id":"15e10edf3cff7498e90bb2dc7743ec622635ba96","unresolved":false,"context_lines":[{"line_number":235,"context_line":"  vCPUs but not that being mandatory, then I\u0027d be writing my flavor with"},{"line_number":236,"context_line":"  ``group_policy\u003dnone\u0026resources1:VCPU\u003dM\u0026resources2:VGPU\u003dN``."},{"line_number":237,"context_line":""},{"line_number":238,"context_line":""},{"line_number":239,"context_line":"Optionally configured NUMA resources"},{"line_number":240,"context_line":"------------------------------------"},{"line_number":241,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_f174a238","line":238,"in_reply_to":"3f79a3b5_6be425e1","updated":"2018-09-12 22:58:56.000000000","message":"agreed...good catch.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fc95d3adfb598e7e5019655f469093290611a2b1","unresolved":false,"context_lines":[{"line_number":254,"context_line":""},{"line_number":255,"context_line":".. code::"},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"  [devices]"},{"line_number":258,"context_line":"  numa_resource_classes \u003d [VCPU, MEMORY_MB, VGPU]"},{"line_number":259,"context_line":""},{"line_number":260,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_ed9aa960","line":257,"range":{"start_line":257,"start_character":3,"end_line":257,"end_character":10},"updated":"2018-09-11 20:37:03.000000000","message":"numa\n\ndevices could look weird given we have pci passthough","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"fc95d3adfb598e7e5019655f469093290611a2b1","unresolved":false,"context_lines":[{"line_number":255,"context_line":".. code::"},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"  [devices]"},{"line_number":258,"context_line":"  numa_resource_classes \u003d [VCPU, MEMORY_MB, VGPU]"},{"line_number":259,"context_line":""},{"line_number":260,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"},{"line_number":261,"context_line":"for that specific compute node nova.conf which resources classes to use, then"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_8d9f7550","line":258,"range":{"start_line":258,"start_character":1,"end_line":258,"end_character":7},"updated":"2018-09-11 20:37:03.000000000","message":"delete the numa_prefix","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"43d30827ffe8584d3a435023b5e84345d4e7bbfc","unresolved":false,"context_lines":[{"line_number":299,"context_line":"          the virt drivers. Of course, a shared module could be imagined for"},{"line_number":300,"context_line":"          the sake of consistency between drivers, but this is an"},{"line_number":301,"context_line":"          implementation detail."},{"line_number":302,"context_line":""},{"line_number":303,"context_line":"Alternatives"},{"line_number":304,"context_line":"------------"},{"line_number":305,"context_line":""}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_9db73222","line":302,"updated":"2018-09-17 20:43:22.000000000","message":"Do we need all this complexity? Does it make sense to expose NUMA affinity for GPUs but then float CPUs and memory all over the host? How about a single numa_resources \u003d [flat,nested] conf option for virt drivers?","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"4af83c6a1784608158b89ad595732e9b591fe81c","unresolved":false,"context_lines":[{"line_number":352,"context_line":""},{"line_number":353,"context_line":"Upgrade impact"},{"line_number":354,"context_line":"--------------"},{"line_number":355,"context_line":"None"},{"line_number":356,"context_line":""},{"line_number":357,"context_line":"Implementation"},{"line_number":358,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_c53768af","line":355,"updated":"2018-09-12 19:59:39.000000000","message":"*WRONG* We need to reshape.","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"b9c06dba87800858f8daeb30cff2e67f5e57e1b0","unresolved":false,"context_lines":[{"line_number":352,"context_line":""},{"line_number":353,"context_line":"Upgrade impact"},{"line_number":354,"context_line":"--------------"},{"line_number":355,"context_line":"None"},{"line_number":356,"context_line":""},{"line_number":357,"context_line":"Implementation"},{"line_number":358,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":10,"id":"3f79a3b5_506c749c","line":355,"in_reply_to":"3f79a3b5_c53768af","updated":"2018-09-12 20:38:23.000000000","message":"FFU impact as well has to be documented","commit_id":"f9b9c51845e713af88a269f0c78e7dac3feba301"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":19,"context_line":".. note::"},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstact manner. Specific resource capabilities like pinned"},{"line_number":23,"context_line":"  CPUs will be described in other `CPU resources`_ spec. To the same extent,"},{"line_number":24,"context_line":"  how this model can be queried for specific grouped request tied to a certain"},{"line_number":25,"context_line":"  NUMA node (where the main usecase is NUMA affinity) will also be discussed"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_336bc21c","line":22,"range":{"start_line":22,"start_character":20,"end_line":22,"end_character":27},"updated":"2018-10-23 19:33:30.000000000","message":"abstract","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":167,"context_line":"the virt driver) and where NUMA# would litterally be a string made of \u0027NUMA\u0027"},{"line_number":168,"context_line":"postfixed by the NUMA cell ID which is provided by the virt driver."},{"line_number":169,"context_line":"For PCI devices, we propose the convention to be ``nodename_PCIaddr`` where"},{"line_number":170,"context_line":"PCIaddr would be the (formatted) PCI address of the device."},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":173,"context_line":"classes :"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_1319e6d0","line":170,"updated":"2018-10-23 19:33:30.000000000","message":"As discussed in Denver, by convention we should prefix the name of each provider with the name of its parent. And I think we may have also said we would use double underscore to separate the prefix (to ensure disambiguation when the names themselves contain underscores). So assuming the PCI device is itself a child of a numa provider, its name would actually be three-tiered, something like: $nodename__NUMA$N__PCI$DBSF","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f411d171c272e83c92dc73270db2bbc6c29501fa","unresolved":false,"context_lines":[{"line_number":167,"context_line":"the virt driver) and where NUMA# would litterally be a string made of \u0027NUMA\u0027"},{"line_number":168,"context_line":"postfixed by the NUMA cell ID which is provided by the virt driver."},{"line_number":169,"context_line":"For PCI devices, we propose the convention to be ``nodename_PCIaddr`` where"},{"line_number":170,"context_line":"PCIaddr would be the (formatted) PCI address of the device."},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":173,"context_line":"classes :"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_5d619aa7","line":170,"in_reply_to":"3f79a3b5_1319e6d0","updated":"2018-10-24 15:15:47.000000000","message":"I don\u0027t like having more than $compute__$pciaddr\nWhy ? because we could provide some PCI devices as child RPs while they\u0027re not NUMA related.\nAlso I think we said during the PTG that in case you\u0027d like to know whether it\u0027s a NUMA related child, you should just look at the parent.\n\nAlso, saying \"PCI%s\" is bizarre, because I don\u0027t understand why we should know whether it\u0027s a PCI device or not. If we need to know this, why not having a \"PCI\" trait ?","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"2c3fdffcc10e87f5bcf6ab4222265751e431ee2f","unresolved":false,"context_lines":[{"line_number":167,"context_line":"the virt driver) and where NUMA# would litterally be a string made of \u0027NUMA\u0027"},{"line_number":168,"context_line":"postfixed by the NUMA cell ID which is provided by the virt driver."},{"line_number":169,"context_line":"For PCI devices, we propose the convention to be ``nodename_PCIaddr`` where"},{"line_number":170,"context_line":"PCIaddr would be the (formatted) PCI address of the device."},{"line_number":171,"context_line":""},{"line_number":172,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":173,"context_line":"classes :"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_f1d44d20","line":170,"in_reply_to":"3f79a3b5_5d619aa7","updated":"2018-10-24 15:40:11.000000000","message":"\u003e I don\u0027t like having more than $compute__$pciaddr\n \u003e Why ? because we could provide some PCI devices as child RPs while\n \u003e they\u0027re not NUMA related.\n\nI get that, but future us will be much happer if we make a standard format and stick to it universally. PCI address should be unique on a compute node, but maybe there\u0027s something else that\u0027s not, where we would wind up with the same name if we didn\u0027t interpose the $numa bit.\n\nAre you trying to make it easier to \"find\" PCI resource providers? Assuming we want to use the name to do that, name.endswith is as easy as anything. (Better yet, name.rsplit(\u0027__\u0027, 1)[-1] works even if the PCI device is somehow a root provider.)\n\n \u003e Also I think we said during the PTG that in case you\u0027d like to know\n \u003e whether it\u0027s a NUMA related child, you should just look at the\n \u003e parent.\n\nAgree that we generally shouldn\u0027t go around parsing names to discover properties of providers. (That said, if we don\u0027t get to use traits for e.g. PCI addresses, not sure what other options we have. Is encoding metadata into provider names any better than doing the same with traits?) It\u0027s more about guaranteeing uniqueness (more below) and having a standard, predictable convention.\n\n \u003e Also, saying \"PCI%s\" is bizarre, because I don\u0027t understand why we\n \u003e should know whether it\u0027s a PCI device or not. If we need to know\n \u003e this, why not having a \"PCI\" trait ?\n\nI would love to have a PCI trait, but that\u0027s a separate issue.\n\nThe reasoning to include some kind of type designation is to guarantee name uniqueness. I don\u0027t have a concrete example right now (which is kind of the point - solving for specific examples still leaves holes for all the things we didn\u0027t specifically solve for) but what if there\u0027s some other kind of provider (non-PCI) whose normalized name also happens to be eight hex digits? Then we could end up with a name conflict in the field that would be pretty tough to deal with.\n\nUnlikely, maybe; but coming up with a standard, predictable, universal, unambiguous naming convention isn\u0027t that hard, so we should do it.","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":199,"context_line":"          yet a consensus, that\u0027s why we prefer to just provide the above."},{"line_number":200,"context_line":""},{"line_number":201,"context_line":""},{"line_number":202,"context_line":"Asking for vCPUs splitted evenly between NUMA nodes"},{"line_number":203,"context_line":"---------------------------------------------------"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"The existing feature of dividing all your vCPUs between NUMA nodes will be"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_d347ee79","line":202,"range":{"start_line":202,"start_character":17,"end_line":202,"end_character":25},"updated":"2018-10-23 19:33:30.000000000","message":"split","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":202,"context_line":"Asking for vCPUs splitted evenly between NUMA nodes"},{"line_number":203,"context_line":"---------------------------------------------------"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"The existing feature of dividing all your vCPUs between NUMA nodes will be"},{"line_number":206,"context_line":"asked by providing a numbered request group query like::"},{"line_number":207,"context_line":""},{"line_number":208,"context_line":"  resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_f3630a18","line":205,"range":{"start_line":205,"start_character":24,"end_line":205,"end_character":66},"updated":"2018-10-23 19:33:30.000000000","message":"This makes it sound like you\u0027ll be dividing *evenly*, which is not necessarily the case unless you have at least \u003cvCPU_count\u003e NUMA nodes and are using group_policy\u003disolate.","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":220,"context_line":""},{"line_number":221,"context_line":"* for a flavor of 8 VCPUs and ``hw:numa_nodes\u003d1``, it would translate into::"},{"line_number":222,"context_line":""},{"line_number":223,"context_line":"    resources1:VCPU\u003d8"},{"line_number":224,"context_line":""},{"line_number":225,"context_line":"Due to an upgrade concern, the existing extra spec property ``hw:numa_nodes\u003dN``"},{"line_number":226,"context_line":"(where N is an integer) will be supported but deprecated in Stein. The"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_fe4d1171","line":223,"range":{"start_line":223,"start_character":14,"end_line":223,"end_character":20},"updated":"2018-10-23 19:33:30.000000000","message":": and \u003d are backwards here (and in several of the other examples below)","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"2c3fdffcc10e87f5bcf6ab4222265751e431ee2f","unresolved":false,"context_lines":[{"line_number":220,"context_line":""},{"line_number":221,"context_line":"* for a flavor of 8 VCPUs and ``hw:numa_nodes\u003d1``, it would translate into::"},{"line_number":222,"context_line":""},{"line_number":223,"context_line":"    resources1:VCPU\u003d8"},{"line_number":224,"context_line":""},{"line_number":225,"context_line":"Due to an upgrade concern, the existing extra spec property ``hw:numa_nodes\u003dN``"},{"line_number":226,"context_line":"(where N is an integer) will be supported but deprecated in Stein. The"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_f17bed27","line":223,"range":{"start_line":223,"start_character":14,"end_line":223,"end_character":20},"in_reply_to":"3f79a3b5_9df732d2","updated":"2018-10-24 15:40:11.000000000","message":"It\u0027s not a concern, just a typo. The queryparam syntax is\n\n resources$N\u003d$RC:$AMOUNT,$RC:$AMOUNT,...","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f411d171c272e83c92dc73270db2bbc6c29501fa","unresolved":false,"context_lines":[{"line_number":220,"context_line":""},{"line_number":221,"context_line":"* for a flavor of 8 VCPUs and ``hw:numa_nodes\u003d1``, it would translate into::"},{"line_number":222,"context_line":""},{"line_number":223,"context_line":"    resources1:VCPU\u003d8"},{"line_number":224,"context_line":""},{"line_number":225,"context_line":"Due to an upgrade concern, the existing extra spec property ``hw:numa_nodes\u003dN``"},{"line_number":226,"context_line":"(where N is an integer) will be supported but deprecated in Stein. The"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_9df732d2","line":223,"range":{"start_line":223,"start_character":14,"end_line":223,"end_character":20},"in_reply_to":"3f79a3b5_fe4d1171","updated":"2018-10-24 15:15:47.000000000","message":"I don\u0027t understand the concern. Could you please explain me ?","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":227,"context_line":"scheduler service will actually translate this into the numbered request group"},{"line_number":228,"context_line":"query above for Stein, but we will remove the translation in the next cycle."},{"line_number":229,"context_line":""},{"line_number":230,"context_line":"Asking for vCPUs splitted unevenly between NUMA nodes"},{"line_number":231,"context_line":"-----------------------------------------------------"},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"The existing feature of asking a specific number of vCPUs for a NUMA node"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_f34c6a9e","line":230,"range":{"start_line":230,"start_character":17,"end_line":230,"end_character":25},"updated":"2018-10-23 19:33:30.000000000","message":"split","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":262,"context_line":"Again, we would provide a translation mechanism only for Stein in the scheduler"},{"line_number":263,"context_line":"service for the existing extra spec property ``hw:numa_mem`` that would be"},{"line_number":264,"context_line":"deprecated. For example, a flavor of 4GB RAM with extra specs set to"},{"line_number":265,"context_line":"``hw:numa_nodes\u003d2\u0026hw:numa_mem:0\u003d1024\u0026hw:numa_mem:1\u003d3072`` would be translated"},{"line_number":266,"context_line":"into"},{"line_number":267,"context_line":"``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":268,"context_line":""}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_5e0e8533","line":265,"range":{"start_line":265,"start_character":29,"end_line":265,"end_character":30},"updated":"2018-10-23 19:33:30.000000000","message":".","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":262,"context_line":"Again, we would provide a translation mechanism only for Stein in the scheduler"},{"line_number":263,"context_line":"service for the existing extra spec property ``hw:numa_mem`` that would be"},{"line_number":264,"context_line":"deprecated. For example, a flavor of 4GB RAM with extra specs set to"},{"line_number":265,"context_line":"``hw:numa_nodes\u003d2\u0026hw:numa_mem:0\u003d1024\u0026hw:numa_mem:1\u003d3072`` would be translated"},{"line_number":266,"context_line":"into"},{"line_number":267,"context_line":"``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":268,"context_line":""}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_7e0b4121","line":265,"range":{"start_line":265,"start_character":48,"end_line":265,"end_character":49},"updated":"2018-10-23 19:33:30.000000000","message":".","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":266,"context_line":"into"},{"line_number":267,"context_line":"``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":""},{"line_number":270,"context_line":"Asking for affinity between hardware devices and NUMA nodes"},{"line_number":271,"context_line":"-----------------------------------------------------------"},{"line_number":272,"context_line":""}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_7ed58146","line":269,"updated":"2018-10-23 19:33:30.000000000","message":"Should add an example for requesting NUMA-affined CPU *and* memory asymmetrically. E.g.\n\n  hw:numa_nodes\u003d2\n \u0026hw:numa_cpus.0\u003d0,1\n \u0026hw:numa_mem.0\u003d1024\n \u0026hw:numa_cpus.1\u003d2,3,4,5,6,7\n \u0026hw:numa_mem.1\u003d3072\n\nneeds to translate into:\n\n  group_policy\u003disolate\n \u0026resources1\u003dVCPU:2,MEMORY_MB:1024\n \u0026resources2\u003dVCPU:6,MEMORY_MB:3072\n\nand *not*\n\n  group_policy\u003disolate\n \u0026resources1\u003dVCPU:2,MEMORY_MB:3072\n \u0026resources2\u003dVCPU:6,MEMORY_MB:1024\n\nTo wit, we should be explicit that for hw:numa_$R.$N, we guarantee that we\u0027ll ask for each $R in the same request group for a given $N. (It would be nice if we could just map hw:numa_$R.$N\u003d$X directly to resources$N\u003d$R:$X, but we can\u0027t rely on $N not being taken by some other request group already. So we\u0027ll have to use the gibi method and look for a free one.)","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f411d171c272e83c92dc73270db2bbc6c29501fa","unresolved":false,"context_lines":[{"line_number":266,"context_line":"into"},{"line_number":267,"context_line":"``group_policy\u003disolate\u0026resources1:MEMORY_MB\u003d1024\u0026resources2:MEMORY_MB\u003d3072``"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":""},{"line_number":270,"context_line":"Asking for affinity between hardware devices and NUMA nodes"},{"line_number":271,"context_line":"-----------------------------------------------------------"},{"line_number":272,"context_line":""}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_3dcffe8b","line":269,"in_reply_to":"3f79a3b5_7ed58146","updated":"2018-10-24 15:15:47.000000000","message":"Good point","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":305,"context_line":"have a configuration for saying which resources should be nested."},{"line_number":306,"context_line":"To reinforce the above, that means a host would be either NUMA or non-NUMA,"},{"line_number":307,"context_line":"hence non-NUMA workloads being set on a specific NUMA node if host is set so."},{"line_number":308,"context_line":"The proposal we make here could be :"},{"line_number":309,"context_line":""},{"line_number":310,"context_line":".. code::"},{"line_number":311,"context_line":""}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_fedd1113","line":308,"range":{"start_line":308,"start_character":26,"end_line":308,"end_character":34},"updated":"2018-10-23 19:33:30.000000000","message":"So is this being proposed as part of the work covered by this spec, or not?\n\nIf so, we need to talk about exactly when it\u0027s legal for the operator to change this conf setting. Because doing so could get us into a situation where we need to do a reshape, which we\u0027ve said we only want to allow on upgrade boundaries. So what do we do if the operator randomly edits this setting and restarts the compute service?","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":324,"context_line":"   We reserve the choice to modify the default value to implicitly list all the"},{"line_number":325,"context_line":"   NUMA-related resource classes, but that would be done in a later cycle."},{"line_number":326,"context_line":""},{"line_number":327,"context_line":"E.g., a nova.conf having set ``[devices ]/numa_resource_classes \u003d VGPU`` would"},{"line_number":328,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"},{"line_number":329,"context_line":""},{"line_number":330,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_7e9b4163","line":327,"range":{"start_line":327,"start_character":29,"end_line":327,"end_character":72},"updated":"2018-10-23 19:33:30.000000000","message":"This doesn\u0027t match L312-3. Which is it? (I prefer L312-3)","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"93d0de2eebea7c236e67f61dd478296e007c7c8e","unresolved":false,"context_lines":[{"line_number":418,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":419,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"},{"line_number":420,"context_line":"classes, the virt drivers will be responsible for providing a reshape mechanism"},{"line_number":421,"context_line":"that will eventually call the Placement API ``/reshaper`` endpoint when"},{"line_number":422,"context_line":"starting the compute service."},{"line_number":423,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":424,"context_line":"Upgrade (FFU) strategy where all controplane is down and should possibly"}],"source_content_type":"text/x-rst","patch_set":11,"id":"3f79a3b5_3e1629a3","line":421,"updated":"2018-10-23 19:33:30.000000000","message":"wouldn\u0027t hurt to add a reference to the reshaper spec.","commit_id":"5ac130dae30c195a01dc6a4a2d26250fbf58cd90"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"74d32e9911236eb38c162ee35a7c97a9061876d5","unresolved":false,"context_lines":[{"line_number":352,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":353,"context_line":"   than once after upgrading."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"E.g., a nova.conf having set ``[numa]/numa_resource_classes \u003d VGPU`` would"},{"line_number":356,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"},{"line_number":357,"context_line":""},{"line_number":358,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":12,"id":"3f79a3b5_73d28500","line":355,"range":{"start_line":355,"start_character":38,"end_line":355,"end_character":59},"updated":"2018-10-24 14:42:22.000000000","message":"resource_classes","commit_id":"662a4278c862190b94a37b769631c6fdf12f3790"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f411d171c272e83c92dc73270db2bbc6c29501fa","unresolved":false,"context_lines":[{"line_number":352,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":353,"context_line":"   than once after upgrading."},{"line_number":354,"context_line":""},{"line_number":355,"context_line":"E.g., a nova.conf having set ``[numa]/numa_resource_classes \u003d VGPU`` would"},{"line_number":356,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"},{"line_number":357,"context_line":""},{"line_number":358,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":12,"id":"3f79a3b5_56073782","line":355,"range":{"start_line":355,"start_character":38,"end_line":355,"end_character":59},"in_reply_to":"3f79a3b5_73d28500","updated":"2018-10-24 15:15:47.000000000","message":"Holy shit. /me facepalms","commit_id":"662a4278c862190b94a37b769631c6fdf12f3790"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"751a1f89c409db38ee036c4ae3ae412522b74e7d","unresolved":false,"context_lines":[{"line_number":38,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":39,"context_line":"  node)"},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"While the latter verification *could* still be needed for this filter, the"},{"line_number":42,"context_line":"former verification (ie. the host fit) can be done by the Placement API."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. note::"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_5d064487","line":41,"range":{"start_line":41,"start_character":31,"end_line":41,"end_character":36},"updated":"2018-10-26 13:41:35.000000000","message":"should. Jay has made it clear, with good justification, that placement will never have a role in generating CPU pin mapping","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":38,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":39,"context_line":"  node)"},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"While the latter verification *could* still be needed for this filter, the"},{"line_number":42,"context_line":"former verification (ie. the host fit) can be done by the Placement API."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. note::"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_07548ffc","line":41,"range":{"start_line":41,"start_character":31,"end_line":41,"end_character":36},"in_reply_to":"3f79a3b5_5d064487","updated":"2018-11-16 00:03:26.000000000","message":"could -\u003e will","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":39,"context_line":"  node)"},{"line_number":40,"context_line":""},{"line_number":41,"context_line":"While the latter verification *could* still be needed for this filter, the"},{"line_number":42,"context_line":"former verification (ie. the host fit) can be done by the Placement API."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. note::"},{"line_number":45,"context_line":"  Eventually, we would like to make sure that we no longer check *which*"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_7428c8ec","line":42,"updated":"2018-10-30 12:59:21.000000000","message":"Some part of the *which* like, which NUMA node, will be decided by placement and nova together. When the nova scheduler selects one allocation candidate from the candidates returned by placement, nova at that point also selects *which* NUMA node the resources will be allocated from. However nova can only select a candidate if that was returned by placement in a first place, so placement does have a saying about the *which* decision as well.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"751a1f89c409db38ee036c4ae3ae412522b74e7d","unresolved":false,"context_lines":[{"line_number":42,"context_line":"former verification (ie. the host fit) can be done by the Placement API."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. note::"},{"line_number":45,"context_line":"  Eventually, we would like to make sure that we no longer check *which*"},{"line_number":46,"context_line":"  resources shall be used in the scheduler filter, but rather somewhere done by"},{"line_number":47,"context_line":"  the compute service (eg. in the virt driver), but that\u0027s not something"},{"line_number":48,"context_line":"  related to this spec."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"Accordingly, we can model the host CPU (and later memory) topologies as a set"},{"line_number":51,"context_line":"of resource providers arranged in a tree, and just directly allocate resources"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_1dfc4c6f","line":48,"range":{"start_line":45,"start_character":0,"end_line":48,"end_character":23},"updated":"2018-10-26 13:41:35.000000000","message":"Huh?","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":42,"context_line":"former verification (ie. the host fit) can be done by the Placement API."},{"line_number":43,"context_line":""},{"line_number":44,"context_line":".. note::"},{"line_number":45,"context_line":"  Eventually, we would like to make sure that we no longer check *which*"},{"line_number":46,"context_line":"  resources shall be used in the scheduler filter, but rather somewhere done by"},{"line_number":47,"context_line":"  the compute service (eg. in the virt driver), but that\u0027s not something"},{"line_number":48,"context_line":"  related to this spec."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"Accordingly, we can model the host CPU (and later memory) topologies as a set"},{"line_number":51,"context_line":"of resource providers arranged in a tree, and just directly allocate resources"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_873f3fb1","line":48,"range":{"start_line":45,"start_character":0,"end_line":48,"end_character":23},"in_reply_to":"3f79a3b5_1dfc4c6f","updated":"2018-11-16 00:03:26.000000000","message":"am i did not think this is a goal.\n\ni would much prefer the schduler to continue  to calulate the pinnings and pass that to the compute node to use \nso that we can elimnate the pinning calulatetion on the compute node. i dont think we can ever really eliminate it from the schduler.\n\nideally we would claim the pinned core/mempages in the condoctor/schdueler also before we make the down call to the compute node to eliminate the late race on the compute node that exists today but as you said this is out of scope of this spec.\n\nmaybe we should just remove this note.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":48,"context_line":"  related to this spec."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"Accordingly, we can model the host CPU (and later memory) topologies as a set"},{"line_number":51,"context_line":"of resource providers arranged in a tree, and just directly allocate resources"},{"line_number":52,"context_line":"a specific instance from a resource provider representing a NUMA node."},{"line_number":53,"context_line":""},{"line_number":54,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":55,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_d41f5cb7","line":52,"range":{"start_line":51,"start_character":51,"end_line":52,"end_character":19},"updated":"2018-10-30 12:59:21.000000000","message":"nit: for a specific instance","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":53,"context_line":""},{"line_number":54,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":55,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":56,"context_line":"will be able to accurately query usage information for dedicated CPUs and"},{"line_number":57,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":58,"context_line":"like disk and RAM."},{"line_number":59,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_07a24ffc","line":56,"range":{"start_line":56,"start_character":33,"end_line":56,"end_character":38},"updated":"2018-11-16 00:03:26.000000000","message":"its important to rememebr that placement keeps a tally count of the availabel inventory of a specific resouce.\n\nso while we can querry the amount of cpus availabe on a numa node we cannot query which cpus are free or used in placement and will need to continue to track that in the resouce tracker.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":57,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":58,"context_line":"like disk and RAM."},{"line_number":59,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":60,"context_line":"within a NUMA node for a vCPU`_ or `customizing the instance CPU topology`_)"},{"line_number":61,"context_line":"would still be only done by the virt driver, and are not covered by this spec."},{"line_number":62,"context_line":""},{"line_number":63,"context_line":"Use Cases"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_077b2f7a","line":60,"range":{"start_line":60,"start_character":34,"end_line":60,"end_character":75},"updated":"2018-11-16 00:03:26.000000000","message":"the instance cpu topology has nothing to do to with the host topology so i thas no interaction with either the resouce tracker or placement so it really should not be called out here.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"751a1f89c409db38ee036c4ae3ae412522b74e7d","unresolved":false,"context_lines":[{"line_number":153,"context_line":"   +------------------+                 +-----------------+"},{"line_number":154,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":155,"context_line":"   | VCPU: 8          |                 | VCPU: 8         | (shared CPUs)"},{"line_number":156,"context_line":"   | PCPU: 8          |                 | PCPU: 8         | (dedicated CPUs)"},{"line_number":157,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":158,"context_line":"   +------------------+                 +-----------------+"},{"line_number":159,"context_line":"            |                                 |"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_9d15bc97","line":156,"range":{"start_line":156,"start_character":42,"end_line":156,"end_character":49},"updated":"2018-10-26 13:41:35.000000000","message":"The inclusion of this makes Jay\u0027s spec a hard dependency of this one. You should call that out earlier or with a note immediately before/after this","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"751a1f89c409db38ee036c4ae3ae412522b74e7d","unresolved":false,"context_lines":[{"line_number":164,"context_line":""},{"line_number":165,"context_line":"Resource Provider names for NUMA nodes shall follow a convention of"},{"line_number":166,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"},{"line_number":167,"context_line":"the virt driver) and where NUMA# would litterally be a string made of \u0027NUMA\u0027"},{"line_number":168,"context_line":"postfixed by the NUMA cell ID which is provided by the virt driver."},{"line_number":169,"context_line":"For PCI devices, we propose the convention to be ``nodename__PCIaddr`` where"},{"line_number":170,"context_line":"PCIaddr would be the (formatted) PCI address of the device. That said, it\u0027s an"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_bd699826","line":167,"range":{"start_line":167,"start_character":42,"end_line":167,"end_character":43},"updated":"2018-10-26 13:41:35.000000000","message":"literally","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":211,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":212,"context_line":"microversion, we would amend the above by::"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":"  group_policy\u003disolate"},{"line_number":215,"context_line":""},{"line_number":216,"context_line":"For example:"},{"line_number":217,"context_line":""}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_54bfcc78","line":214,"range":{"start_line":214,"start_character":2,"end_line":214,"end_character":22},"updated":"2018-10-30 12:59:21.000000000","message":"I think group_policy is directly settable via flavor extra_spec [1]. So we have to tell the admin to add that key when creating the flavor with hw:numa_nodes\u003dFLAVOR-NODES. Or in the other hand we have to make sure if the admin adds that for some other reason then the admin knows that it also affect the meaning of hw:numa_nodes\u003dFLAVOR-NODES as well.\n\n[1]https://github.com/openstack/nova/blob/a9a893ed76566d0f49c71412ad572b2305dd6897/nova/scheduler/utils.py#L150","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":211,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":212,"context_line":"microversion, we would amend the above by::"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":"  group_policy\u003disolate"},{"line_number":215,"context_line":""},{"line_number":216,"context_line":"For example:"},{"line_number":217,"context_line":""}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_e7eab30b","line":214,"range":{"start_line":214,"start_character":2,"end_line":214,"end_character":22},"in_reply_to":"3f79a3b5_54bfcc78","updated":"2018-11-16 00:03:26.000000000","message":"it is setable but for backwars compatiblity we cannot require the admin to set group_pliocy in the flavor\n\nas such if numa_nodes is set and group_policy is not we will need to infer group_policy\u003disolate.\n\nif the admin explcitly sets it then we will need to either raise an  exception if it conflicts with the derived value or allow the explcit value to override it.\n\ni would prefer the exception.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":211,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":212,"context_line":"microversion, we would amend the above by::"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":"  group_policy\u003disolate"},{"line_number":215,"context_line":""},{"line_number":216,"context_line":"For example:"},{"line_number":217,"context_line":""}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_7ea53748","line":214,"range":{"start_line":214,"start_character":2,"end_line":214,"end_character":22},"in_reply_to":"3f79a3b5_e7eab30b","updated":"2020-01-29 10:27:17.000000000","message":"For backwards compatibility, see in the spec how I plan to manage it.\n\nWe could also nitpick on group_policy being used for other subresources but VCPU but then that\u0027s a Placement API semantics, right?","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":308,"context_line":"in terms of numbered request groups, we somehow need to amend the Placement API"},{"line_number":309,"context_line":"so that we could query some resources where some providers could be optionnally"},{"line_number":310,"context_line":"in the same subtree."},{"line_number":311,"context_line":"Since Placement API doesn\u0027t provide such expression for the moment, we also"},{"line_number":312,"context_line":"acknowledge the fact that NUMA affinity will be, at least for Stein, managed"},{"line_number":313,"context_line":"through other mechanisms but Placement queries."},{"line_number":314,"context_line":""}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_87b41f1f","line":311,"range":{"start_line":311,"start_character":0,"end_line":311,"end_character":66},"updated":"2018-11-16 00:03:26.000000000","message":"this is explitly a non goal of placement and will never be supported unless there is a significat change in direction so i woudld remove this sub paragraph.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":309,"context_line":"so that we could query some resources where some providers could be optionnally"},{"line_number":310,"context_line":"in the same subtree."},{"line_number":311,"context_line":"Since Placement API doesn\u0027t provide such expression for the moment, we also"},{"line_number":312,"context_line":"acknowledge the fact that NUMA affinity will be, at least for Stein, managed"},{"line_number":313,"context_line":"through other mechanisms but Placement queries."},{"line_number":314,"context_line":""},{"line_number":315,"context_line":""},{"line_number":316,"context_line":"Optionally configured NUMA resources"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_3743326e","line":313,"range":{"start_line":312,"start_character":26,"end_line":313,"end_character":47},"updated":"2018-10-30 12:59:21.000000000","message":"I guess this other mechanism should be the NUMATopologyFilter and the PciPassthroughFilter selecting the proper allocation candidate. BUT those filters only filtering compute hosts (i.e. root RP) so if there are multiple candidate per compute host (with different NUMA allocations having different affinities) then what mechanism will do the selection? \n\nIt cannot be in the nova-compute or in the virt driver as scheduler will allocation one candidate in placement that will determine the affinity. So it needs to be in the scheduler somehow. \n\nWill we extend the BaseFilter to be capable of filtering allocation candidates instead of filtering just host?\n\nOr do we want to build some logic that runs _after_ the filters and modifies the currently simple allocation candidate selection in [1]?\n\n[1] https://github.com/openstack/nova/blob/a9a893ed76566d0f49c71412ad572b2305dd6897/nova/scheduler/filter_scheduler.py#L224-L229","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":374,"context_line":"      +--------------+"},{"line_number":375,"context_line":""},{"line_number":376,"context_line":""},{"line_number":377,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":378,"context_line":"          makes the population of those nested Resource Providers to necessarly"},{"line_number":379,"context_line":"          be done by each virt driver. Consequently, while the above"},{"line_number":380,"context_line":"          configuration option is said to be generic, the use of this option"},{"line_number":381,"context_line":"          for populating the Resource Providers tree will only be done by"},{"line_number":382,"context_line":"          the virt drivers. Of course, a shared module could be imagined for"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_9a2d8d92","line":379,"range":{"start_line":377,"start_character":10,"end_line":379,"end_character":38},"updated":"2018-10-30 12:59:21.000000000","message":"Does it also means that some virt driver will ignore the [numa]/resource_classes \u003d VGPU config value? Or will the implementation of this spec cover every in tree driver?","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":374,"context_line":"      +--------------+"},{"line_number":375,"context_line":""},{"line_number":376,"context_line":""},{"line_number":377,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":378,"context_line":"          makes the population of those nested Resource Providers to necessarly"},{"line_number":379,"context_line":"          be done by each virt driver. Consequently, while the above"},{"line_number":380,"context_line":"          configuration option is said to be generic, the use of this option"},{"line_number":381,"context_line":"          for populating the Resource Providers tree will only be done by"},{"line_number":382,"context_line":"          the virt drivers. Of course, a shared module could be imagined for"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_be3d0fe6","line":379,"range":{"start_line":377,"start_character":10,"end_line":379,"end_character":38},"in_reply_to":"3f79a3b5_674c6340","updated":"2020-01-29 10:27:17.000000000","message":"Sean, you\u0027re correct.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":374,"context_line":"      +--------------+"},{"line_number":375,"context_line":""},{"line_number":376,"context_line":""},{"line_number":377,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":378,"context_line":"          makes the population of those nested Resource Providers to necessarly"},{"line_number":379,"context_line":"          be done by each virt driver. Consequently, while the above"},{"line_number":380,"context_line":"          configuration option is said to be generic, the use of this option"},{"line_number":381,"context_line":"          for populating the Resource Providers tree will only be done by"},{"line_number":382,"context_line":"          the virt drivers. Of course, a shared module could be imagined for"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_674c6340","line":379,"range":{"start_line":377,"start_character":10,"end_line":379,"end_character":38},"in_reply_to":"3f79a3b5_9a2d8d92","updated":"2018-11-16 00:03:26.000000000","message":"well currently only the libvirt virt driver support vgpus.\n\nbut this basically state that as the virt drivers are responsible for discovering the available resources on a host they are also responsible for deciding to report them to placement.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":410,"context_line":"None"},{"line_number":411,"context_line":""},{"line_number":412,"context_line":"Other end user impact"},{"line_number":413,"context_line":"---------------------"},{"line_number":414,"context_line":"Operators will need to modify their flavors by using direct numbered request"},{"line_number":415,"context_line":"groups for exploiting the new functionality. That said, we will provide for"},{"line_number":416,"context_line":"Stein a translation mechanism which will avoid them to modify their flavors"},{"line_number":417,"context_line":"before upgrading."},{"line_number":418,"context_line":"See `Documentation Impact`_."},{"line_number":419,"context_line":""},{"line_number":420,"context_line":"There is no impact on the nova or openstack CLIs.  The existing CLI syntax is"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_070c8fe9","line":417,"range":{"start_line":413,"start_character":0,"end_line":417,"end_character":17},"updated":"2018-11-16 00:03:26.000000000","message":"am why can we not auto generate the numberd request groups as log as the operator has not specifed the resources dict manually in the extra specs.\n\ni really am not that comfortable with requrieing the operator to use the numbered syntax.\n\nthe numa extra specs are complicated enjough this would be significantly worse. we should be aiming to have no flavor change at all.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":410,"context_line":"None"},{"line_number":411,"context_line":""},{"line_number":412,"context_line":"Other end user impact"},{"line_number":413,"context_line":"---------------------"},{"line_number":414,"context_line":"Operators will need to modify their flavors by using direct numbered request"},{"line_number":415,"context_line":"groups for exploiting the new functionality. That said, we will provide for"},{"line_number":416,"context_line":"Stein a translation mechanism which will avoid them to modify their flavors"},{"line_number":417,"context_line":"before upgrading."},{"line_number":418,"context_line":"See `Documentation Impact`_."},{"line_number":419,"context_line":""},{"line_number":420,"context_line":"There is no impact on the nova or openstack CLIs.  The existing CLI syntax is"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_de124b64","line":417,"range":{"start_line":413,"start_character":0,"end_line":417,"end_character":17},"in_reply_to":"3f79a3b5_070c8fe9","updated":"2020-01-29 10:27:17.000000000","message":"Keeping old flavor semantics and not asking operators to not directly provide numbered request groups was an option I was considering, but which would require a translation mechanism either way.\n\nGiven consensus I got when asking, asking flavors to be new-style would help us to remove this compat code for translating to Placement which will , I think, also help operators to better understand the queries.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":441,"context_line":"provide a translation mechanism in Stein only that will take the existing"},{"line_number":442,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":443,"context_line":"query. This translation mechanism is only planned to be temporary for the Stein"},{"line_number":444,"context_line":"release."},{"line_number":445,"context_line":""},{"line_number":446,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":447,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_badba978","line":444,"updated":"2018-10-30 12:59:21.000000000","message":"Does it mean that this translation mechanism will be used as a flavor data migration at the same time? I mean when in Train the translation mechanism is dropped will there be still flavors with old numa extra_spec keys?","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"dd43746a2e93a6c3a2cea4018217984ca848f1e9","unresolved":false,"context_lines":[{"line_number":441,"context_line":"provide a translation mechanism in Stein only that will take the existing"},{"line_number":442,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":443,"context_line":"query. This translation mechanism is only planned to be temporary for the Stein"},{"line_number":444,"context_line":"release."},{"line_number":445,"context_line":""},{"line_number":446,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":447,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"}],"source_content_type":"text/x-rst","patch_set":13,"id":"9fdfeff1_4192b4a1","line":444,"in_reply_to":"3f79a3b5_27266b6c","updated":"2019-02-19 07:59:32.000000000","message":"I prefer to keep it also. It is more clear for the end user understand the guest numa topology. Let the user get the numa topology from the request group, that is really pain.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":441,"context_line":"provide a translation mechanism in Stein only that will take the existing"},{"line_number":442,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":443,"context_line":"query. This translation mechanism is only planned to be temporary for the Stein"},{"line_number":444,"context_line":"release."},{"line_number":445,"context_line":""},{"line_number":446,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":447,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_be0bcf29","line":444,"in_reply_to":"3f79a3b5_27266b6c","updated":"2020-01-29 10:27:17.000000000","message":"gibi, excellent point about nested flavors in the instances. We somehow need to translate those in some data migration or old instances could be broken in Train if operators ask to move them.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e4983f5b3a3cc7b333fbf9fd6a8b4c2575f04123","unresolved":false,"context_lines":[{"line_number":441,"context_line":"provide a translation mechanism in Stein only that will take the existing"},{"line_number":442,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":443,"context_line":"query. This translation mechanism is only planned to be temporary for the Stein"},{"line_number":444,"context_line":"release."},{"line_number":445,"context_line":""},{"line_number":446,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":447,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_27266b6c","line":444,"in_reply_to":"3f79a3b5_badba978","updated":"2018-11-16 00:03:26.000000000","message":"i think this is really the wrong direction.\nwe should preserve the current numa extra spec keys and generage teh placemnt groups dynamicaly.","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":449,"context_line":"that will eventually call the `Placement API /reshaper endpoint`_ when"},{"line_number":450,"context_line":"starting the compute service."},{"line_number":451,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":452,"context_line":"Upgrade (FFU) strategy where all controplane is down and should possibly"},{"line_number":453,"context_line":"document any extra step required for FFU."},{"line_number":454,"context_line":""},{"line_number":455,"context_line":"Implementation"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_3ac9d9aa","line":452,"range":{"start_line":452,"start_character":33,"end_line":452,"end_character":44},"updated":"2018-10-30 12:59:21.000000000","message":"nit: control plane","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"f59704a0475932ceab70dd54b7280c8fe190d9ea","unresolved":false,"context_lines":[{"line_number":463,"context_line":"Work Items"},{"line_number":464,"context_line":"----------"},{"line_number":465,"context_line":""},{"line_number":466,"context_line":"* Virt drivers passing NUMA topology through ```update_provider_tree()`` API."},{"line_number":467,"context_line":"* Scheduler translating flavor extra specs for NUMA properties into Placement"},{"line_number":468,"context_line":"  queries."},{"line_number":469,"context_line":"* nova-status check command for looking up existing flavors and see whether"}],"source_content_type":"text/x-rst","patch_set":13,"id":"3f79a3b5_7a3431ac","line":466,"range":{"start_line":466,"start_character":2,"end_line":466,"end_character":22},"updated":"2018-10-30 12:59:21.000000000","message":"every in tree virt drivers?","commit_id":"c4ae8fe22f51dde94b2147203941b83d3f4bdf04"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[],"source_content_type":"","patch_set":14,"id":"3fce034c_0b879dd9","updated":"2019-04-16 16:45:55.000000000","message":"This needs to be moved to specs/train/approved","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":20,"context_line":""},{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. Specific resource capabilities like pinned"},{"line_number":23,"context_line":"  CPUs will be described in other `CPU resources`_ spec. To the same extent,"},{"line_number":24,"context_line":"  how this model can be queried for specific grouped request tied to a certain"},{"line_number":25,"context_line":"  NUMA node (where the main usecase is NUMA affinity) will also be discussed"},{"line_number":26,"context_line":"  in other spec (yet to be proposed)."},{"line_number":27,"context_line":""},{"line_number":28,"context_line":""},{"line_number":29,"context_line":"Problem description"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_4d0e398e","line":26,"range":{"start_line":23,"start_character":57,"end_line":26,"end_character":37},"updated":"2019-04-15 13:27:18.000000000","message":"we state this in the note but we sepend a resonable amount of this spec discussing this later.\n\ni would remove this or just state that how the query is constructed while related is not the primary focuse of this spec.\n\nto that end however if we are to cover that in this spec my personal proposal would be to take a module approch where we do not update any flavor and instead add a nova prefilters or transform function per resouce class.\n\neach transform function will take a request spec + inial placement query and transform it to numa aware version.\n\nthe intent being that as an operator elect to enable reporting of a resouce class via a numa toplogy they \nwoudl also enable the corresponding prefilter/transform but would not need to modify any flavor or images.\n\nthis could be a temproy solution or perment. \n\nanyway that is not the focus of this spec just how i would approch it to minims the upgrade impact.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":42,"context_line":"former verification (ie. the host fit) can be done by the Placement API and"},{"line_number":43,"context_line":"the Nova scheduler (by allocation candidate)."},{"line_number":44,"context_line":""},{"line_number":45,"context_line":"Accordingly, we can model the host CPU (and later memory) topologies as a set"},{"line_number":46,"context_line":"of resource providers arranged in a tree, and just directly allocate resources"},{"line_number":47,"context_line":"for a specific instance from a resource provider representing a NUMA node."},{"line_number":48,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_6dd455be","line":45,"range":{"start_line":45,"start_character":30,"end_line":45,"end_character":57},"updated":"2019-04-15 13:27:18.000000000","message":"nit: i woudl do this the other way around personal as it decouples this futher form the cpu in plamcent spec.\n\nNUMA after all stand for non uniform memory architecture\nthe empsice being that its an effect of memory access not cpus.\n\nif you have an application that runs entirely within l3 cache it will not experience numa effects as the memory access time will be the constant latency of l3 cache. ( yes i know im simplifying as there are cache region locality effects)","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"171f0bf04e07101f3c7ec1701e5e92cedcca4c1f","unresolved":false,"context_lines":[{"line_number":48,"context_line":""},{"line_number":49,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":50,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":51,"context_line":"will be able to accurately query amount information for dedicated CPUs and"},{"line_number":52,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":53,"context_line":"like disk and RAM."},{"line_number":54,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_e80fb7de","line":51,"range":{"start_line":51,"start_character":33,"end_line":51,"end_character":39},"updated":"2019-04-16 15:11:07.000000000","message":"an amount","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":51,"context_line":"will be able to accurately query amount information for dedicated CPUs and"},{"line_number":52,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":53,"context_line":"like disk and RAM."},{"line_number":54,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":55,"context_line":"within a NUMA node for a vCPU`_) would still be only done by the virt driver,"},{"line_number":56,"context_line":"and are not covered by this spec."},{"line_number":57,"context_line":""},{"line_number":58,"context_line":"Use Cases"},{"line_number":59,"context_line":"---------"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_adc1dde6","line":56,"range":{"start_line":54,"start_character":0,"end_line":56,"end_character":33},"updated":"2019-04-15 13:27:18.000000000","message":"this is the other reason it make more sense to start with memory and specifcally mempages.\n\nfor memory we dont need a second level of assignment\n\nwe just need to corralate the RP uuid to the host numa cell and ensure we pin the guest memroy to the correct numa node.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"dd43746a2e93a6c3a2cea4018217984ca848f1e9","unresolved":false,"context_lines":[{"line_number":58,"context_line":"Use Cases"},{"line_number":59,"context_line":"---------"},{"line_number":60,"context_line":""},{"line_number":61,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-vCPU instance"},{"line_number":62,"context_line":"----------------------------------------------------------------------------"},{"line_number":63,"context_line":""},{"line_number":64,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"}],"source_content_type":"text/x-rst","patch_set":14,"id":"9fdfeff1_1d788e29","line":61,"range":{"start_line":61,"start_character":61,"end_line":61,"end_character":76},"updated":"2019-02-19 07:59:32.000000000","message":"2-vCPUs instance probably isn\u0027t good example here. Since only 2 vcpus, we probably only want one numa node. Even without guest numa node, the linux scheduler will probably ensure those 2 vcpus in the same numa node. So we probably should say a large instance which larger than single numa node at here.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":58,"context_line":"Use Cases"},{"line_number":59,"context_line":"---------"},{"line_number":60,"context_line":""},{"line_number":61,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-vCPU instance"},{"line_number":62,"context_line":"----------------------------------------------------------------------------"},{"line_number":63,"context_line":""},{"line_number":64,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_2d9e4d54","line":61,"range":{"start_line":61,"start_character":61,"end_line":61,"end_character":76},"in_reply_to":"9fdfeff1_1d788e29","updated":"2019-04-15 13:27:18.000000000","message":"not nessacally \n\nif you have a workload that is heavily memory bound and had very little cpu  work then you might want it to have 2 numa nodes even with only 2 cpus.\n\nbut i agree teh usecase could be improved. i personally woudl remove the reference to the number of cpus and simpley state\n\n\"as a tenant, i\u0027d like my instances memory allocations to be optimized for low latency.\"\n\nthat can be true of a vm of any size and convays the fact that memory latency ( which is a direct result of NUMA affinity) is the important aspect.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":61,"context_line":"#1 : As a user, I\u0027d like to get fast access to memory for my 2-vCPU instance"},{"line_number":62,"context_line":"----------------------------------------------------------------------------"},{"line_number":63,"context_line":""},{"line_number":64,"context_line":"Consider the following NUMA topology for a \"2-sockets, 4 cores\" host with no"},{"line_number":65,"context_line":"Hyper-Threading:"},{"line_number":66,"context_line":""},{"line_number":67,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_4d46397f","line":64,"range":{"start_line":64,"start_character":46,"end_line":64,"end_character":53},"updated":"2019-04-15 13:27:18.000000000","message":"pleas use the therm numa node\n\nmost server chps have more then 1 numa node in a singel socket.\n\neven on consumer desktop this is the case on intel i9 or amd thread ripper cpus.\n\nwe really should not conflate sockets with numa nodes.\nthat has been incorrect for at least 6 years on high core count systems as of the release of the ivybridge architecture in 2012","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":69,"context_line":"    +--------------------------------------+"},{"line_number":70,"context_line":"    |                  CN1                 |"},{"line_number":71,"context_line":"    +-+---------------+--+---------------+-+"},{"line_number":72,"context_line":"      |     NUMA1     |  |     NUMA2     |"},{"line_number":73,"context_line":"      +-+----+-+----+-+  +-+----+-+----+-+"},{"line_number":74,"context_line":"        |CPU1| |CPU2|      |CPU3| |CPU4|"},{"line_number":75,"context_line":"        +----+ +----+      +----+ +----+"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_cd59c95a","line":72,"range":{"start_line":72,"start_character":12,"end_line":72,"end_character":16},"updated":"2019-04-15 13:27:18.000000000","message":"also by using the term numa node above instead of socket it make it consitent with this diagram :)","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":76,"context_line":""},{"line_number":77,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":78,"context_line":"controller, while CPU3 and CPU4 would share their own memory."},{"line_number":79,"context_line":""},{"line_number":80,"context_line":"Ideally, applications that require low-latency memory access from multiple"},{"line_number":81,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":82,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":83,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":84,"context_line":"I/O-bound of course)."},{"line_number":85,"context_line":"For the moment, if you\u0027re an operator, you can use flavor extra specs to"},{"line_number":86,"context_line":"indicate a desired NUMA topology for your instance like:"},{"line_number":87,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_f8680188","line":84,"range":{"start_line":79,"start_character":0,"end_line":84,"end_character":21},"updated":"2019-04-15 13:27:18.000000000","message":"yes although if the application required high memory bandwith for grater throughput they actully want multiple numa nodes.\n\n\nnot the rest of this comment is related but off topic.\n\nhow would people feel about adding a new\nhw:numa_afinity\u003dlatency|bandwith|Any|legacy flavor extraspec to contol this bevhvior?\n\nlatency would perfer to assign vms virtual numa nodes to the minium number of host numa nodes.\n\nbandwidth woudl perfer to assign the vms virtual numa nodes to the maxium number of host numa nodes (max 1:1).\n\nany woudl ignore numa affintiy\n\nand legacy woudl do what we do today which is map each\nvirtual numa node to a seperate host numa node. this would be the default.\n\nthe difference between legacy and bandwith is bandwith is best effort as is latency.\n\nthe proposal woudl be that for latency,bandwith and any we would allow all host to pass in the numa toplogy filter and would use a weighter to select a host.\n\nfrom a plamcenet perspective we would also use group_policy\u003dnone to not put any restiction on the RPs the resrouce groups corresponding to the numa nodes come form alther each numa nodes resouce would still be a sperate resouce group.\n\ni have wanted to make this change for a few years but there has never been a good time to discuss it, this would be a different spec so i dont want to rathole on it here.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":76,"context_line":""},{"line_number":77,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":78,"context_line":"controller, while CPU3 and CPU4 would share their own memory."},{"line_number":79,"context_line":""},{"line_number":80,"context_line":"Ideally, applications that require low-latency memory access from multiple"},{"line_number":81,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":82,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":83,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":84,"context_line":"I/O-bound of course)."},{"line_number":85,"context_line":"For the moment, if you\u0027re an operator, you can use flavor extra specs to"},{"line_number":86,"context_line":"indicate a desired NUMA topology for your instance like:"},{"line_number":87,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_47e161fe","line":84,"range":{"start_line":79,"start_character":0,"end_line":84,"end_character":21},"in_reply_to":"3fce034c_f8680188","updated":"2019-04-21 16:46:30.000000000","message":"\u003e the difference between legacy and bandwith is bandwith is best\n \u003e effort as is latency.\n\n\nThe above sentence does not make sense to me.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"8dfadd7421c1453aae589a21382ebc3a235f485e","unresolved":false,"context_lines":[{"line_number":76,"context_line":""},{"line_number":77,"context_line":"Here, CPU1 and CPU2 would share the same memory through a common memory"},{"line_number":78,"context_line":"controller, while CPU3 and CPU4 would share their own memory."},{"line_number":79,"context_line":""},{"line_number":80,"context_line":"Ideally, applications that require low-latency memory access from multiple"},{"line_number":81,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":82,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":83,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":84,"context_line":"I/O-bound of course)."},{"line_number":85,"context_line":"For the moment, if you\u0027re an operator, you can use flavor extra specs to"},{"line_number":86,"context_line":"indicate a desired NUMA topology for your instance like:"},{"line_number":87,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fa7e38b_942c3e94","line":84,"range":{"start_line":79,"start_character":0,"end_line":84,"end_character":21},"in_reply_to":"ffb9cba7_47e161fe","updated":"2020-01-15 12:48:57.000000000","message":"oh i ment to say\n\nthe difference between legacy and bandwith is bandwith is best effort and legacy requires each virtual numa node to be mapped to a differnt host numa node.\n\nthe latency policy would also be best effort and if we could not fix on one numa node we would spread the n vitual numa node across up to n host numa node.\n\nso the bandwith policy tries to spread and the latency policy tries to pack. they would be best effort because they would be dont by weighing the allocation candiates returned. the legacy policy however is strict in that its implemented as a filter (the numa toplogy filter) not a weigher so it will only pass host that fulfil the requirement.\n\nwhen useing latency or bandwidth you are only guartee that the amount of numa nodes and the number or core/memory per numa node are correct and that a virtual numa node will not be split across host numa nodes. The weighing they do would be best effort so if no host in the cloud could spread of pack with the policy you asked for but a host could fit the vm while not breaking the requirement that virtual numa nodes must map to at most 1 host numa node, then the vm would boot on that host.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":89,"context_line":""},{"line_number":90,"context_line":"  $ openstack flavor set FLAVOR-NAME \\"},{"line_number":91,"context_line":"      --property hw:numa_nodes\u003dFLAVOR-NODES \\"},{"line_number":92,"context_line":"      --property hw:numa_cpus.N\u003dFLAVOR-CORES \\"},{"line_number":93,"context_line":"      --property hw:numa_mem.N\u003dFLAVOR-MEMORY"},{"line_number":94,"context_line":""},{"line_number":95,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":96,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_b8bfd9ce","line":93,"range":{"start_line":92,"start_character":6,"end_line":93,"end_character":44},"updated":"2019-04-15 13:27:18.000000000","message":"you only need to set these if the cpus and memory cannot\nbe evenly devided by the number of numa nodes.\n\nwhen people set this manually for other reason it is generally a bad idea.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":89,"context_line":""},{"line_number":90,"context_line":"  $ openstack flavor set FLAVOR-NAME \\"},{"line_number":91,"context_line":"      --property hw:numa_nodes\u003dFLAVOR-NODES \\"},{"line_number":92,"context_line":"      --property hw:numa_cpus.N\u003dFLAVOR-CORES \\"},{"line_number":93,"context_line":"      --property hw:numa_mem.N\u003dFLAVOR-MEMORY"},{"line_number":94,"context_line":""},{"line_number":95,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":96,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_e7ce5582","line":93,"range":{"start_line":92,"start_character":6,"end_line":93,"end_character":44},"in_reply_to":"3fce034c_b8bfd9ce","updated":"2019-04-21 16:46:30.000000000","message":"++\n\nI wish we\u0027d just get rid of this over-complexity, frankly.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"171f0bf04e07101f3c7ec1701e5e92cedcca4c1f","unresolved":false,"context_lines":[{"line_number":120,"context_line":""},{"line_number":121,"context_line":"In that case, imagine the user would like to use a CUDA library by running"},{"line_number":122,"context_line":"parallel threads in their instance for computing things like Artificial"},{"line_number":123,"context_line":"Intelligence calculations, they\u0027d love to make sure my GPU devices are"},{"line_number":124,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_a8d0cf20","line":123,"range":{"start_line":123,"start_character":52,"end_line":123,"end_character":54},"updated":"2019-04-16 15:11:07.000000000","message":"their?","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"dd43746a2e93a6c3a2cea4018217984ca848f1e9","unresolved":false,"context_lines":[{"line_number":118,"context_line":"CPU1 and CPU2, but not with CPU3 and CPU4 which have a separate memory mapping"},{"line_number":119,"context_line":"region."},{"line_number":120,"context_line":""},{"line_number":121,"context_line":"In that case, imagine the user would like to use a CUDA library by running"},{"line_number":122,"context_line":"parallel threads in their instance for computing things like Artificial"},{"line_number":123,"context_line":"Intelligence calculations, they\u0027d love to make sure my GPU devices are"},{"line_number":124,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":""},{"line_number":127,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":14,"id":"9fdfeff1_bdd702ef","line":124,"range":{"start_line":121,"start_character":0,"end_line":124,"end_character":57},"updated":"2019-02-19 07:59:32.000000000","message":"just a question, so In this case, we only talk about one numa node guest? Since I didn\u0027t found qemu/KVM has any controlling on the pci device exposed on which guest numa node. That means, if guest has mulitple numa node, the guest don\u0027t know the pci device is attach to which guest numa node.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":118,"context_line":"CPU1 and CPU2, but not with CPU3 and CPU4 which have a separate memory mapping"},{"line_number":119,"context_line":"region."},{"line_number":120,"context_line":""},{"line_number":121,"context_line":"In that case, imagine the user would like to use a CUDA library by running"},{"line_number":122,"context_line":"parallel threads in their instance for computing things like Artificial"},{"line_number":123,"context_line":"Intelligence calculations, they\u0027d love to make sure my GPU devices are"},{"line_number":124,"context_line":"affinitized by NUMA nodes for maximum throughput reasons."},{"line_number":125,"context_line":""},{"line_number":126,"context_line":""},{"line_number":127,"context_line":"Proposed change"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_5884b50c","line":124,"range":{"start_line":121,"start_character":0,"end_line":124,"end_character":57},"in_reply_to":"9fdfeff1_bdd702ef","updated":"2019-04-15 13:27:18.000000000","message":"actully qemu can handel this mapping if you use the q35 chipset.\n\nwhat happens today is that in a multi numa node guest all pci devices are exposed via a singel root pci complex so all pci device are effectly associcate via numa node 0 in the guset.\n\ni have proplsed creating 1 pci route complex per numa node going forward and associating the devices correctly but i have not had time to work on that yet.\n\nso in the future guest will know what numa node the deice is on and can optimise coreectly.\n\nas a workaroudn operators can define hw:numa_nodes\u003d1\n\n\nthis is yet another spec i need to write...","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7eac80960e415301c7eb4e2562de57ca8d4d883e","unresolved":false,"context_lines":[{"line_number":152,"context_line":"   +------------------+                 +-----------------+"},{"line_number":153,"context_line":"            |                                 |"},{"line_number":154,"context_line":"    +---------------+                     +--------------------+"},{"line_number":155,"context_line":"    | \u003cPGPU_ID\u003e     |                     | \u003cPHYS_FUNC_PCI_ID\u003e |"},{"line_number":156,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":157,"context_line":"    +---------------+                     +--------------------+"},{"line_number":158,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_42a2694d","line":155,"range":{"start_line":155,"start_character":43,"end_line":155,"end_character":62},"updated":"2019-04-16 14:52:14.000000000","message":"For bandwidth we already introduced a RP that represents an SRIOV PF. See the figure in [1]. I think we should not create another RP for the VF inventory. But re-use the existing PF RP from [1]. Of course this means that that PF RP needs to be moved under a different parent. However now neutron creates the RP under the SRIOV agent RP. \n\nSo if this spec want to solve PCI modelling then it\n* needs to sync the model with the existing bandwidth model[1]\n* if such model needs to be changed then this spec (or another RFE) needs to describe the resulting neutron impact.\n\n[1]https://specs.openstack.org/openstack/neutron-specs/specs/rocky/minimum-bandwidth-allocation-placement-api.html#reporting-available-resources","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7eac80960e415301c7eb4e2562de57ca8d4d883e","unresolved":false,"context_lines":[{"line_number":155,"context_line":"    | \u003cPGPU_ID\u003e     |                     | \u003cPHYS_FUNC_PCI_ID\u003e |"},{"line_number":156,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":157,"context_line":"    +---------------+                     +--------------------+"},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Resource Provider names for NUMA nodes shall follow a convention of"},{"line_number":160,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"},{"line_number":161,"context_line":"the virt driver) and where NUMA# would literally be a string made of \u0027NUMA\u0027"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_487a8b88","line":158,"updated":"2019-04-16 14:52:14.000000000","message":"Will the TRAITS that are currently on the compute RP will also be moved to the NUMA RPS? E.g.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":155,"context_line":"    | \u003cPGPU_ID\u003e     |                     | \u003cPHYS_FUNC_PCI_ID\u003e |"},{"line_number":156,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":157,"context_line":"    +---------------+                     +--------------------+"},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Resource Provider names for NUMA nodes shall follow a convention of"},{"line_number":160,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"},{"line_number":161,"context_line":"the virt driver) and where NUMA# would literally be a string made of \u0027NUMA\u0027"}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_079fe950","line":158,"in_reply_to":"3fce034c_48328ba9","updated":"2019-04-21 16:46:30.000000000","message":"If different traits can potentially be assigned to different NUMA node resource providers, then yes. If not, then I don\u0027t see a reason to move them out of the compute node provider.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"9c3853a42c5eac6623df144beeddb2a99584c405","unresolved":false,"context_lines":[{"line_number":155,"context_line":"    | \u003cPGPU_ID\u003e     |                     | \u003cPHYS_FUNC_PCI_ID\u003e |"},{"line_number":156,"context_line":"    | VGPU: 8       |                     | SRIOV_NET_VF: 8    |"},{"line_number":157,"context_line":"    +---------------+                     +--------------------+"},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Resource Provider names for NUMA nodes shall follow a convention of"},{"line_number":160,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"},{"line_number":161,"context_line":"the virt driver) and where NUMA# would literally be a string made of \u0027NUMA\u0027"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_48328ba9","line":158,"in_reply_to":"3fce034c_487a8b88","updated":"2019-04-16 14:58:05.000000000","message":"I mean E.g. HW_CPU_X86_SSE","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":157,"context_line":"    +---------------+                     +--------------------+"},{"line_number":158,"context_line":""},{"line_number":159,"context_line":"Resource Provider names for NUMA nodes shall follow a convention of"},{"line_number":160,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"},{"line_number":161,"context_line":"the virt driver) and where NUMA# would literally be a string made of \u0027NUMA\u0027"},{"line_number":162,"context_line":"postfixed by the NUMA cell ID which is provided by the virt driver."},{"line_number":163,"context_line":"For PCI devices, we propose the convention to be ``nodename__PCIaddr`` where"},{"line_number":164,"context_line":"PCIaddr would be the (formatted) PCI address of the device. That said, it\u0027s an"}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_a7ab7d73","line":161,"range":{"start_line":160,"start_character":47,"end_line":161,"end_character":16},"updated":"2019-04-21 16:46:30.000000000","message":"and what happens when the hypervisor_hostname changes? we\u0027ve seen a bunch of bugs and issues when this happens, particularly for Ironic nodes. :( Not sure this is a huge problem since all of these changes describe non-Ironic compute node providers, but just thought I\u0027d mention it. If you\u0027re going to \"looking up\" providers in a provider tree by this naming convention, you should be aware of what happens when hypervisor_hostname changes suddenly and placement isn\u0027t aware of the changes.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":160,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"},{"line_number":161,"context_line":"the virt driver) and where NUMA# would literally be a string made of \u0027NUMA\u0027"},{"line_number":162,"context_line":"postfixed by the NUMA cell ID which is provided by the virt driver."},{"line_number":163,"context_line":"For PCI devices, we propose the convention to be ``nodename__PCIaddr`` where"},{"line_number":164,"context_line":"PCIaddr would be the (formatted) PCI address of the device. That said, it\u0027s an"},{"line_number":165,"context_line":"implementation detail that will be discussed during the code review."},{"line_number":166,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_87bc39bc","line":163,"range":{"start_line":163,"start_character":0,"end_line":163,"end_character":70},"updated":"2019-04-21 16:46:30.000000000","message":"See Gibi\u0027s comment above. The neutron SR-IOV agent is already creating resource providers representing the \"backend\" (OVS vs. direct, etc) along with child providers representing the physical PCI devices assigned to those backends.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":167,"context_line":"Each NUMA node would be then a child Resource Provider, having two resource"},{"line_number":168,"context_line":"classes :"},{"line_number":169,"context_line":""},{"line_number":170,"context_line":"* ``VCPU``: for telling how many cores the NUMA node has (threaded or not)."},{"line_number":171,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has."},{"line_number":172,"context_line":""},{"line_number":173,"context_line":".. note::"}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_27b1cde2","line":170,"range":{"start_line":170,"start_character":57,"end_line":170,"end_character":74},"updated":"2019-04-21 16:46:30.000000000","message":"remove this, IMHO. it\u0027s not necessary.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":178,"context_line":"The root Resource Provider (ie. the compute node) would only provide resources"},{"line_number":179,"context_line":"for classes that are not NUMA-related."},{"line_number":180,"context_line":""},{"line_number":181,"context_line":"Each PCI device (like a physical GPU device) having a defined NUMA locality"},{"line_number":182,"context_line":"would then be a child where each one would have specific resource classes."},{"line_number":183,"context_line":"For example:"},{"line_number":184,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_07cda95c","line":181,"range":{"start_line":181,"start_character":67,"end_line":181,"end_character":75},"updated":"2019-04-21 16:46:30.000000000","message":"affinity","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":179,"context_line":"for classes that are not NUMA-related."},{"line_number":180,"context_line":""},{"line_number":181,"context_line":"Each PCI device (like a physical GPU device) having a defined NUMA locality"},{"line_number":182,"context_line":"would then be a child where each one would have specific resource classes."},{"line_number":183,"context_line":"For example:"},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"* GPU devices have a specific VGPU resource class that counts the number of"}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_c7d6b1f2","line":182,"range":{"start_line":182,"start_character":22,"end_line":182,"end_character":74},"updated":"2019-04-21 16:46:30.000000000","message":"delete this and replace with \"provider of the resource provider representing the associated NUMA cell\"","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7eac80960e415301c7eb4e2562de57ca8d4d883e","unresolved":false,"context_lines":[{"line_number":184,"context_line":""},{"line_number":185,"context_line":"* GPU devices have a specific VGPU resource class that counts the number of"},{"line_number":186,"context_line":"  virtual GPUs it can create."},{"line_number":187,"context_line":"* An SR-IOV physical function could count the number of virtual functions"},{"line_number":188,"context_line":"  it can create."},{"line_number":189,"context_line":""},{"line_number":190,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":191,"context_line":"          that needs a separate discussion on how to provide that feature using"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_c2a65954","line":188,"range":{"start_line":187,"start_character":0,"end_line":188,"end_character":16},"updated":"2019-04-16 14:52:14.000000000","message":"SRIOV PF are already modeled (see above). So that model needs to be evolved instead of a new model for SRIOV PFs.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":184,"context_line":""},{"line_number":185,"context_line":"* GPU devices have a specific VGPU resource class that counts the number of"},{"line_number":186,"context_line":"  virtual GPUs it can create."},{"line_number":187,"context_line":"* An SR-IOV physical function could count the number of virtual functions"},{"line_number":188,"context_line":"  it can create."},{"line_number":189,"context_line":""},{"line_number":190,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":191,"context_line":"          that needs a separate discussion on how to provide that feature using"}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_4704017c","line":188,"range":{"start_line":187,"start_character":0,"end_line":188,"end_character":16},"in_reply_to":"3fce034c_c2a65954","updated":"2019-04-21 16:46:30.000000000","message":"Gibi, AFAIK, the SRIOV_NET_VF existing resource class isn\u0027t actually being used yet, though, correct? Only the NET_BW_[IN|E]GR_KILOBIT_PER_SEC resource classes are currently being tracked as inventory by the SR-IOV Neutron agent, yeah?","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":197,"context_line":"Asking for vCPUs split evenly between NUMA nodes"},{"line_number":198,"context_line":"------------------------------------------------"},{"line_number":199,"context_line":""},{"line_number":200,"context_line":"The existing feature of separating all your vCPUs between NUMA nodes will be"},{"line_number":201,"context_line":"asked by providing a numbered request group query like::"},{"line_number":202,"context_line":""},{"line_number":203,"context_line":"  resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_e7f8957d","line":200,"range":{"start_line":200,"start_character":44,"end_line":200,"end_character":49},"updated":"2019-04-21 16:46:30.000000000","message":"I really don\u0027t see the point of doing CPU pinning with the vCPU resource class. The PCPU resource class is the thing that should be used for pinning/dedicated guest CPUs.\n\nIf this proposal goes forward as-is, we\u0027ll just want another major reshape from VCPU -\u003e PCPU which will make things ever more complicated than they already are.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7eac80960e415301c7eb4e2562de57ca8d4d883e","unresolved":false,"context_lines":[{"line_number":202,"context_line":""},{"line_number":203,"context_line":"  resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":206,"context_line":"microversion, we would amend the above by::"},{"line_number":207,"context_line":""},{"line_number":208,"context_line":"  group_policy\u003disolate"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_027221e4","line":205,"range":{"start_line":205,"start_character":42,"end_line":205,"end_character":47},"updated":"2019-04-16 14:52:14.000000000","message":"Is shard a good term here?","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"171f0bf04e07101f3c7ec1701e5e92cedcca4c1f","unresolved":false,"context_lines":[{"line_number":202,"context_line":""},{"line_number":203,"context_line":"  resources1\u003dVCPU:1\u0026resources2\u003dVCPU:1\u0026...\u0026resources\u003cvCPU_count\u003e\u003dVCPU:1"},{"line_number":204,"context_line":""},{"line_number":205,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":206,"context_line":"microversion, we would amend the above by::"},{"line_number":207,"context_line":""},{"line_number":208,"context_line":"  group_policy\u003disolate"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_8873934d","line":205,"range":{"start_line":205,"start_character":42,"end_line":205,"end_character":47},"in_reply_to":"3fce034c_027221e4","updated":"2019-04-16 15:11:07.000000000","message":"No, I think shard includes other connotation that we don\u0027t want to imply here. I think just the following would be better:\n\n \"added the ability to separate resource requests into relates groups in the...\"","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":205,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":206,"context_line":"microversion, we would amend the above by::"},{"line_number":207,"context_line":""},{"line_number":208,"context_line":"  group_policy\u003disolate"},{"line_number":209,"context_line":""},{"line_number":210,"context_line":"For example:"},{"line_number":211,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_d8e425af","line":208,"range":{"start_line":208,"start_character":2,"end_line":208,"end_character":22},"updated":"2019-04-15 13:27:18.000000000","message":"i have noted this on other spec but i will state it here also for clarity.\n\nas group_policy is global it is not safe to use it in general as it can break other resouce requests.\n\n\nhere are two exampels that break today if you use group_policy\u003disolate.\n\nfirst is i have 1 cinder backend and i therefor have 1 sharing resouce provider. if you use group_policy\u003disolate you cannot boot a vm with two cinder volumes as each volume would be its own numbered request group and with the isoalte policy both cant come form the same RP.\n\nthe second case is with bandwith based schduling. \nif you use group_policy\u003disolate you cannot have 2 network interface form the same RP that request bandwith. that means\nif you are using bandwith based schduling with ovs for example you are limited to 1 port with a bandwith request\n\nso without the ablity to specfiy relationships between resouce groups the global isolate policy is too agressive and will break lots of usecase as a result the only safe default is \ngroup_policy\u003dnone.\n\nhttps://review.openstack.org/#/c/650476/\n\ntrys to fix this in a way by intoducing subtree affintiy and i propsoed a posably solution like this\n\ngroup_policy\u003dnone;isolate:2,3;subtree:4,5;\nwhere we coudl have a global policy and override it for specific groups but without something like that\ni think we should always be useing\ngroup_policy\u003dNone.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":205,"context_line":"As the Placement API added the ability to shard resources in the 1.25"},{"line_number":206,"context_line":"microversion, we would amend the above by::"},{"line_number":207,"context_line":""},{"line_number":208,"context_line":"  group_policy\u003disolate"},{"line_number":209,"context_line":""},{"line_number":210,"context_line":"For example:"},{"line_number":211,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_8e463b4f","line":208,"range":{"start_line":208,"start_character":2,"end_line":208,"end_character":22},"in_reply_to":"3fce034c_d8e425af","updated":"2019-04-16 16:45:55.000000000","message":"I agree with Sean, we need to make group_policy more... ahem, \"granular\".\n\nAs written, this example will only land on hosts with at least as many NUMA cells as VCPUs requested, and with at least one VCPU available in each.\n\nI suspect what you were going for here was the \"any fit\" use case, where we use group_policy\u003dnone and split a request for N resource units into single units across N request groups so that we maximize our chances of landing *somewhere*.\n\nThe examples below don\u0027t gel with this though. To align with the below examples, you would want something more like:\n\n hw:numa_nodes\u003dN\n vcpus\u003dX\n\ntranslates to:\n\n ?resources1\u003dVCPU:$(X/N)\n \u0026resources2\u003dVCPU:$(X/N)\n \u0026...\n \u0026resources$N\u003dVCPU:$(X/N)\n \u0026group_policy\u003disolate","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":217,"context_line":""},{"line_number":218,"context_line":"    resources1:VCPU\u003d8"},{"line_number":219,"context_line":""},{"line_number":220,"context_line":"Due to an upgrade concern, the existing extra spec property ``hw:numa_nodes\u003dN``"},{"line_number":221,"context_line":"(where N is an integer) will be supported but deprecated in Stein. The"},{"line_number":222,"context_line":"scheduler service will actually translate this into the numbered request group"},{"line_number":223,"context_line":"query above for Stein, but we will remove the translation in the next cycle."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"},{"line_number":226,"context_line":"--------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_b84fd990","line":223,"range":{"start_line":220,"start_character":0,"end_line":223,"end_character":76},"updated":"2019-04-15 13:27:18.000000000","message":"so i would actully propose that we keep the hw:numa_nodes\u003dN option and make the traslation layer permenent.\n\nthe fact that nova uses placment is really an implementation detail that im not sure we shoudl be leakign via our api.\n\nthat is just my personal preference but it think in the long run requiring operators to understand placement resouces and how to request them is leaking impleemnation detail that we shoudl be encapsulating.\n\nin the long rune not requireing operator to add plamence stuff to flavor will reduce upgrade impacts if and when new features are added to placment as we can simple updte the translation code without needing to teach the operator how to use placment feature x.\n\nwithout a live/inplace resize option i think assuming operator can just power off tenat vms to do a normal resize\nis a non starter therefore we shoudl avoid change to flavor if we can do this transparently.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":217,"context_line":""},{"line_number":218,"context_line":"    resources1:VCPU\u003d8"},{"line_number":219,"context_line":""},{"line_number":220,"context_line":"Due to an upgrade concern, the existing extra spec property ``hw:numa_nodes\u003dN``"},{"line_number":221,"context_line":"(where N is an integer) will be supported but deprecated in Stein. The"},{"line_number":222,"context_line":"scheduler service will actually translate this into the numbered request group"},{"line_number":223,"context_line":"query above for Stein, but we will remove the translation in the next cycle."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"},{"line_number":226,"context_line":"--------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_ce93e3a2","line":223,"range":{"start_line":220,"start_character":0,"end_line":223,"end_character":76},"in_reply_to":"3fce034c_88d633f1","updated":"2019-04-16 16:45:55.000000000","message":"Agree we should maintain the translation layer \"forever\". We can support manual specification of placement-ese in flavors, but should strive to provide simpler nova-ese alternatives anywhere we can. In cases where we\u0027re trying to support existing use cases with existing syntax, as here, we should use that existing syntax.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"171f0bf04e07101f3c7ec1701e5e92cedcca4c1f","unresolved":false,"context_lines":[{"line_number":217,"context_line":""},{"line_number":218,"context_line":"    resources1:VCPU\u003d8"},{"line_number":219,"context_line":""},{"line_number":220,"context_line":"Due to an upgrade concern, the existing extra spec property ``hw:numa_nodes\u003dN``"},{"line_number":221,"context_line":"(where N is an integer) will be supported but deprecated in Stein. The"},{"line_number":222,"context_line":"scheduler service will actually translate this into the numbered request group"},{"line_number":223,"context_line":"query above for Stein, but we will remove the translation in the next cycle."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"},{"line_number":226,"context_line":"--------------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_88d633f1","line":223,"range":{"start_line":220,"start_character":0,"end_line":223,"end_character":76},"in_reply_to":"3fce034c_b84fd990","updated":"2019-04-16 15:11:07.000000000","message":"Again, I don\u0027t think you can just stop honoring something like hw:numa_nodes, especially not just one release after you introduce the alternative. IMHO, you can:\n\n 1. Continue to honor it, translating it as appropriate\n 2. Refuse to boot if it\u0027s set on the flavor/image (and do something for existing instances)\n 3. Survey the entire database for uses of it in existing flavors and instance-embedded flavors to remove all uses\n\nRemember that this could be set in thousands of flavors embedded in instances, and refusing to honor it will cause them all to break on migration.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":228,"context_line":"The existing feature of asking a specific number of vCPUs for a NUMA node"},{"line_number":229,"context_line":"will be asked by providing the below numbered request group query::"},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"  group_policy\u003disolate\u0026resources1:VCPU\u003dN\u0026resources2:VCPU\u003dM"},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"where N is the number of vCPUs for one NUMA node and M the vCPUs number of a"},{"line_number":234,"context_line":"second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_d31c066c","line":231,"range":{"start_line":231,"start_character":2,"end_line":231,"end_character":23},"updated":"2019-04-15 13:27:18.000000000","message":"again i think we need to use group_policy\u003dnone here\nand handel the anti afinty of the cpus in the numa toplogy filter.\n\nidealy by adding the RP uuid to the hostCell object and passing the allocation_summieras/allocation_candates to the nova filters.\n\nwe would still have two resource groups but we would not requrie placemetn to do the anit afinity untill we have a non global group policy\n\nthat could be a post Train optimisation assumeing an enhanced query syntax is added to placement in train.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":228,"context_line":"The existing feature of asking a specific number of vCPUs for a NUMA node"},{"line_number":229,"context_line":"will be asked by providing the below numbered request group query::"},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"  group_policy\u003disolate\u0026resources1:VCPU\u003dN\u0026resources2:VCPU\u003dM"},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"where N is the number of vCPUs for one NUMA node and M the vCPUs number of a"},{"line_number":234,"context_line":"second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_877d19e9","line":231,"range":{"start_line":231,"start_character":2,"end_line":231,"end_character":23},"in_reply_to":"3fce034c_d31c066c","updated":"2019-04-21 16:46:30.000000000","message":"The entire purpose of adding the group_policy\u003disolate stuff into placement was to support these whacky NUMA affinity use cases. Why *wouldn\u0027t* we use it?","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"8dfadd7421c1453aae589a21382ebc3a235f485e","unresolved":false,"context_lines":[{"line_number":228,"context_line":"The existing feature of asking a specific number of vCPUs for a NUMA node"},{"line_number":229,"context_line":"will be asked by providing the below numbered request group query::"},{"line_number":230,"context_line":""},{"line_number":231,"context_line":"  group_policy\u003disolate\u0026resources1:VCPU\u003dN\u0026resources2:VCPU\u003dM"},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"where N is the number of vCPUs for one NUMA node and M the vCPUs number of a"},{"line_number":234,"context_line":"second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fa7e38b_9488be6b","line":231,"range":{"start_line":231,"start_character":2,"end_line":231,"end_character":23},"in_reply_to":"ffb9cba7_877d19e9","updated":"2020-01-15 12:48:57.000000000","message":"we would not use it as it is a global option that would get applied to all groups including those form neutron, cinder and cyborg\n\nmeanign that if cinder resouce were modeled as sharing resouce providers we could not have 2 volums form the same provider or 2 ports on the same network backend, or 2 acclerator from the same inventor in the cyborg case.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":235,"context_line":""},{"line_number":236,"context_line":"Note that, as the previous usecase, we will deprecate the flavor extra spec"},{"line_number":237,"context_line":"property ``hw:numa_cpus.N\u003dY`` in Stein but we will still transform it into"},{"line_number":238,"context_line":"a Placement query in the scheduler service (that will be eventually removed"},{"line_number":239,"context_line":"after Stein)."},{"line_number":240,"context_line":"For example, for a flavor of 8 VCPUs with extra specs set with"},{"line_number":241,"context_line":"``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_cpus.1\u003d2,3,4,5,6,7``, it will"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_4eedb323","line":238,"range":{"start_line":238,"start_character":57,"end_line":238,"end_character":75},"updated":"2019-04-16 16:45:55.000000000","message":"see above.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":240,"context_line":"For example, for a flavor of 8 VCPUs with extra specs set with"},{"line_number":241,"context_line":"``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_cpus.1\u003d2,3,4,5,6,7``, it will"},{"line_number":242,"context_line":"translate the Placement query straight into"},{"line_number":243,"context_line":"``group_policy\u003disolate\u0026resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":244,"context_line":""},{"line_number":245,"context_line":"Asking for memory sharded between NUMA nodes"},{"line_number":246,"context_line":"--------------------------------------------"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_0ef7abf7","line":243,"updated":"2019-04-16 16:45:55.000000000","message":"yup, and maintain this translation layer indefinitely.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":249,"context_line":"using a numbered request group query specifying which resource group for which"},{"line_number":250,"context_line":"``MEMORY_MB`` resource class::"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"  group_policy\u003disolate\u0026resources1:MEMORY_MB\u003dN\u0026resources2:MEMORY_MB\u003dM"},{"line_number":253,"context_line":""},{"line_number":254,"context_line":"where N would be the amount of memory for the first NUMA node and M the amount"},{"line_number":255,"context_line":"for a second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_13afeee3","line":252,"range":{"start_line":252,"start_character":2,"end_line":252,"end_character":22},"updated":"2019-04-15 13:27:18.000000000","message":"same comment as above. i think this need to be none for train\n\nill skip this on the other example but it obviously applies to all uses of group_policy\u003disolate.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":249,"context_line":"using a numbered request group query specifying which resource group for which"},{"line_number":250,"context_line":"``MEMORY_MB`` resource class::"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"  group_policy\u003disolate\u0026resources1:MEMORY_MB\u003dN\u0026resources2:MEMORY_MB\u003dM"},{"line_number":253,"context_line":""},{"line_number":254,"context_line":"where N would be the amount of memory for the first NUMA node and M the amount"},{"line_number":255,"context_line":"for a second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_3390ea4e","line":252,"range":{"start_line":252,"start_character":34,"end_line":252,"end_character":44},"updated":"2019-04-15 13:27:18.000000000","message":"we definetly could  just port MEMORY_MB but perhaps we would be better reporting mempages instead. i dont know if you want to declare hugepages out of scope but if we do i want to ensure we do not break them with this proposal.\n\n\nif we intent to support hugepages in this spec\ni would propose 2 new resouce classes\n\nMEM_PAGE_SMALL\nMEM_PAGE_LARGE\n\nwith a trait for the page size.\nHW_MEM_SIZE_1G HW_MEM_SIZE_2M HW_MEM_SIZE_CUSTOM\n\nNon numa guests woudl continue to use MEMORY_MB on host that are not configured to not report meory per numa node\n\nand numa guest woudl use either MEM_PAGE_SMALL or\nMEM_PAGE_LARGE to target host that are configured for numa guests.\n\nToday we tell operators not to mix numa guests with non numa as it leads to OOM events resulting in vms being killed this would provide a way of preventing that in the future.\n\nnote on x86 hugepages are 2MB or 1GB but they can be of different sizes on other architetures HW_MEM_SIZE_CUSTOM\n\nwoudl be a placeholder untill we add standard resouce classes for the other sizes. i we know what they are we could just add them instead and not need to do this.\n\nif we provide a translation for the flavor\n\nby default numa guest woudl request memeroy via the MEM_PAGE_SMALL resouce class.\n\nif hw_mem_page_size\u003dlarge then it woudl use MEM_PAGE_LARGE\n\nif it set to a value that maps to 1g or 2mb hugepage then the MEM_PAGE_LARGE with the traits request.\n\nif we decide to not provide a translation then the operator can simply specify the resource/tratis request in teh extra spec/image metadata.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":7,"name":"Jay Pipes","email":"jaypipes@gmail.com","username":"jaypipes"},"change_message_id":"5cc68ea759e04dc0bac8f02286fefdb488c57ec0","unresolved":false,"context_lines":[{"line_number":249,"context_line":"using a numbered request group query specifying which resource group for which"},{"line_number":250,"context_line":"``MEMORY_MB`` resource class::"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"  group_policy\u003disolate\u0026resources1:MEMORY_MB\u003dN\u0026resources2:MEMORY_MB\u003dM"},{"line_number":253,"context_line":""},{"line_number":254,"context_line":"where N would be the amount of memory for the first NUMA node and M the amount"},{"line_number":255,"context_line":"for a second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"ffb9cba7_6761053a","line":252,"range":{"start_line":252,"start_character":34,"end_line":252,"end_character":44},"in_reply_to":"3fce034c_3390ea4e","updated":"2019-04-21 16:46:30.000000000","message":"Please do not use traits for sizes :(\n\nPlease just use resource classes for quantitative things.\n\nI already proposed adding memory pages as resource classes more than 2 years ago:\n\nhttps://review.opendev.org/#/c/442718/","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"8dfadd7421c1453aae589a21382ebc3a235f485e","unresolved":false,"context_lines":[{"line_number":249,"context_line":"using a numbered request group query specifying which resource group for which"},{"line_number":250,"context_line":"``MEMORY_MB`` resource class::"},{"line_number":251,"context_line":""},{"line_number":252,"context_line":"  group_policy\u003disolate\u0026resources1:MEMORY_MB\u003dN\u0026resources2:MEMORY_MB\u003dM"},{"line_number":253,"context_line":""},{"line_number":254,"context_line":"where N would be the amount of memory for the first NUMA node and M the amount"},{"line_number":255,"context_line":"for a second NUMA node."}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fa7e38b_3431cabc","line":252,"range":{"start_line":252,"start_character":34,"end_line":252,"end_character":44},"in_reply_to":"ffb9cba7_6761053a","updated":"2020-01-15 12:48:57.000000000","message":"the issue with that approch is how do we make hw:mem_page_size\u003dsmall or hw:mem_page_size\u003dlarge work?\nthe most common useage is not to specify a specific page size and just specify the catagory e.g. large or small\n\nwhere small is defiend as the smallest page size on a host and large is anything else.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"171f0bf04e07101f3c7ec1701e5e92cedcca4c1f","unresolved":false,"context_lines":[{"line_number":265,"context_line":"The case of unbalanced query for both memory and CPU between NUMA nodes"},{"line_number":266,"context_line":"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"},{"line_number":267,"context_line":""},{"line_number":268,"context_line":"It was previously possible to ask for assymetric usage of vCPUs and RAM between"},{"line_number":269,"context_line":"NUMA nodes, such as::"},{"line_number":270,"context_line":""},{"line_number":271,"context_line":"  hw:numa_nodes\u003d2"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_a8fa0f72","line":268,"range":{"start_line":268,"start_character":38,"end_line":268,"end_character":48},"updated":"2019-04-16 15:11:07.000000000","message":"asymmetric","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":4393,"name":"Dan Smith","email":"dms@danplanet.com","username":"danms"},"change_message_id":"171f0bf04e07101f3c7ec1701e5e92cedcca4c1f","unresolved":false,"context_lines":[{"line_number":300,"context_line":""},{"line_number":301,"context_line":"By this spec, we acknowledge the fact that in order to describe a NUMA affinity"},{"line_number":302,"context_line":"in terms of numbered request groups, we somehow need to amend the Placement API"},{"line_number":303,"context_line":"so that we could query some resources where some providers could be optionnally"},{"line_number":304,"context_line":"in the same subtree."},{"line_number":305,"context_line":"Since Placement API doesn\u0027t provide such expression for the moment, we also"},{"line_number":306,"context_line":"acknowledge the fact that NUMA affinity will be, at least for Stein, done by"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_482febf5","line":303,"range":{"start_line":303,"start_character":68,"end_line":303,"end_character":79},"updated":"2019-04-16 15:11:07.000000000","message":"optionally","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":327,"context_line":".. code::"},{"line_number":328,"context_line":""},{"line_number":329,"context_line":"  [numa]"},{"line_number":330,"context_line":"  resource_classes \u003d [VCPU, MEMORY_MB, VGPU]"},{"line_number":331,"context_line":""},{"line_number":332,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"},{"line_number":333,"context_line":"for that specific compute node nova.conf which resources classes to use, then"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_ee9fbf00","line":330,"updated":"2019-04-16 16:45:55.000000000","message":"- If I set this on a host with only one NUMA cell, do we still create a (linear) nesting structure?\n- So the case you mention on L319-321 would still apply; the only thing you\u0027re doing by providing this option is allowing the admin to decide which hosts are going to be configured with NUMA providers and which are not.\n- If you require this option to be set appropriately to create a NUMA-split provider tree, then suddenly flavors requesting hw:numa_nodes\u003d{!1} will refuse to land anywhere until that\u0027s been done. Are we going to infer numa.resource_classes\u003d[VCPU,MEMORY_MB] for certain hosts based on... some existing configuration?\n\nIn general, I\u0027m not loving this as the way to address the issue described. Instead, this is where you could apply the \"single unit resource split\" translation you (accidentally) introduced at L203. I.e. a flavor with VCPU\u003dX and no hw:numa* would be translated into a GET /a_c request like\n\n ?resources1\u003dVCPU:1\n \u0026resources2\u003dVCPU:1\n \u0026...\n \u0026resources$X\u003dVCPU:1\n \u0026group_policy\u003dnone\n\nProblem is, how do you do that ^ for MEMORY_MB? :(","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":335,"context_line":"and provide that resource class for the children NUMA nodes."},{"line_number":336,"context_line":""},{"line_number":337,"context_line":".. note::"},{"line_number":338,"context_line":"   By default, the value for that configuration option will be None for upgrade"},{"line_number":339,"context_line":"   reasons, so an operator wanting to use Placement API for NUMA workloads will"},{"line_number":340,"context_line":"   need to set it accordingly."},{"line_number":341,"context_line":"   We reserve the choice to modify the default value to implicitly list all the"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_73eaf2ac","line":338,"range":{"start_line":338,"start_character":63,"end_line":338,"end_character":67},"updated":"2019-04-15 13:27:18.000000000","message":"empty\n\nits a list field so the default should be []","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1a07981c6cbed0687435939832b0281aff103743","unresolved":false,"context_lines":[{"line_number":342,"context_line":"   NUMA-related resource classes, but that would be done in a later cycle."},{"line_number":343,"context_line":""},{"line_number":344,"context_line":".. note::"},{"line_number":345,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":346,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":347,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":348,"context_line":"   than once after upgrading."},{"line_number":349,"context_line":""},{"line_number":350,"context_line":"E.g., a nova.conf having set ``[numa]/resource_classes \u003d VGPU`` would"},{"line_number":351,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_53001668","line":348,"range":{"start_line":345,"start_character":1,"end_line":348,"end_character":29},"updated":"2019-04-15 13:27:18.000000000","message":"i think we need to have reshapes in both directions if a resource class is added or removed.\n\nalternatively we need to make it very clear that one you add a resource class you can never remove it again which might be a bad user experience. espcially if we change the default in the future and operators want to override that.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":342,"context_line":"   NUMA-related resource classes, but that would be done in a later cycle."},{"line_number":343,"context_line":""},{"line_number":344,"context_line":".. note::"},{"line_number":345,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":346,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":347,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":348,"context_line":"   than once after upgrading."},{"line_number":349,"context_line":""},{"line_number":350,"context_line":"E.g., a nova.conf having set ``[numa]/resource_classes \u003d VGPU`` would"},{"line_number":351,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_ae28d7e1","line":348,"range":{"start_line":345,"start_character":1,"end_line":348,"end_character":29},"in_reply_to":"3fce034c_28005f01","updated":"2019-04-16 16:45:55.000000000","message":"Note that this also implies allowing reshapes other than just at upgrade boundaries. OR you have to clear out your host and then it\u0027s not a reshape, just a normal update_provider_tree. We need to take a stance on which one we\u0027re going to support.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"438a42ed2448433ad229fc3f8783663081e0cab4","unresolved":false,"context_lines":[{"line_number":342,"context_line":"   NUMA-related resource classes, but that would be done in a later cycle."},{"line_number":343,"context_line":""},{"line_number":344,"context_line":".. note::"},{"line_number":345,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":346,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":347,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":348,"context_line":"   than once after upgrading."},{"line_number":349,"context_line":""},{"line_number":350,"context_line":"E.g., a nova.conf having set ``[numa]/resource_classes \u003d VGPU`` would"},{"line_number":351,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_f1dd5e42","line":348,"range":{"start_line":345,"start_character":1,"end_line":348,"end_character":29},"in_reply_to":"3fce034c_3185265b","updated":"2019-04-16 17:07:46.000000000","message":"++","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7eac80960e415301c7eb4e2562de57ca8d4d883e","unresolved":false,"context_lines":[{"line_number":342,"context_line":"   NUMA-related resource classes, but that would be done in a later cycle."},{"line_number":343,"context_line":""},{"line_number":344,"context_line":".. note::"},{"line_number":345,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":346,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":347,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":348,"context_line":"   than once after upgrading."},{"line_number":349,"context_line":""},{"line_number":350,"context_line":"E.g., a nova.conf having set ``[numa]/resource_classes \u003d VGPU`` would"},{"line_number":351,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_28005f01","line":348,"range":{"start_line":345,"start_character":1,"end_line":348,"end_character":29},"in_reply_to":"3fce034c_53001668","updated":"2019-04-16 14:52:14.000000000","message":"Good point!","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":11564,"name":"Chris Dent","email":"cdent@anticdent.org","username":"chdent"},"change_message_id":"72cd42685477e18c83d0ebf8da4b1afba0de612b","unresolved":false,"context_lines":[{"line_number":342,"context_line":"   NUMA-related resource classes, but that would be done in a later cycle."},{"line_number":343,"context_line":""},{"line_number":344,"context_line":".. note::"},{"line_number":345,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":346,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":347,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":348,"context_line":"   than once after upgrading."},{"line_number":349,"context_line":""},{"line_number":350,"context_line":"E.g., a nova.conf having set ``[numa]/resource_classes \u003d VGPU`` would"},{"line_number":351,"context_line":"only create the below tree (related to the previous NUMA topology said above) :"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_3185265b","line":348,"range":{"start_line":345,"start_character":1,"end_line":348,"end_character":29},"in_reply_to":"3fce034c_ae28d7e1","updated":"2019-04-16 16:57:46.000000000","message":"I think it is both good and inevitable that reshapes must happen whenever the need for them is detected so it really becomes a question of how that need is signalled and interpreted accurately.\n\nBased on discussions in the past few weeks (see also shared disk), it\u0027s pretty clear that at least some of the time the need for a reshape will be the result of turning on a feature via configuration: `i_would_like_you_to_be_aware_{shared_disk,numa_topology} \u003d True` and that config being re-read.","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":407,"context_line":"Other end user impact"},{"line_number":408,"context_line":"---------------------"},{"line_number":409,"context_line":"Operators will need to modify their flavors by using direct numbered request"},{"line_number":410,"context_line":"groups for exploiting the new functionality. That said, we will provide for"},{"line_number":411,"context_line":"Stein a translation mechanism which will avoid them to modify their flavors"},{"line_number":412,"context_line":"before upgrading."},{"line_number":413,"context_line":"See `Documentation Impact`_."},{"line_number":414,"context_line":""}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_cead2332","line":411,"range":{"start_line":410,"start_character":72,"end_line":411,"end_character":7},"updated":"2019-04-16 16:45:55.000000000","message":"a permanent","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":433,"context_line":"--------------"},{"line_number":434,"context_line":""},{"line_number":435,"context_line":"As described above, in order to prevent a flavor update during upgrade, we will"},{"line_number":436,"context_line":"provide a translation mechanism in Stein only that will take the existing"},{"line_number":437,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":438,"context_line":"query. This translation mechanism is only planned to be temporary for the Stein"},{"line_number":439,"context_line":"release. An online data migration mechanism will also need to be written for"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_6eac2f34","line":436,"range":{"start_line":436,"start_character":32,"end_line":436,"end_character":45},"updated":"2019-04-16 16:45:55.000000000","message":"forever","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":435,"context_line":"As described above, in order to prevent a flavor update during upgrade, we will"},{"line_number":436,"context_line":"provide a translation mechanism in Stein only that will take the existing"},{"line_number":437,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":438,"context_line":"query. This translation mechanism is only planned to be temporary for the Stein"},{"line_number":439,"context_line":"release. An online data migration mechanism will also need to be written for"},{"line_number":440,"context_line":"migrating instance nested flavors before the next release."},{"line_number":441,"context_line":""},{"line_number":442,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":443,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_4e67f326","line":440,"range":{"start_line":438,"start_character":7,"end_line":440,"end_character":58},"updated":"2019-04-16 16:45:55.000000000","message":"X","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"7eac80960e415301c7eb4e2562de57ca8d4d883e","unresolved":false,"context_lines":[{"line_number":439,"context_line":"release. An online data migration mechanism will also need to be written for"},{"line_number":440,"context_line":"migrating instance nested flavors before the next release."},{"line_number":441,"context_line":""},{"line_number":442,"context_line":"Since root provider inventories will have to change when upgrading from Rocky"},{"line_number":443,"context_line":"besides the existing allocations for the ``VCPU`` and ``MEMORY_MB`` resource"},{"line_number":444,"context_line":"classes, the virt drivers will be responsible for providing a reshape mechanism"},{"line_number":445,"context_line":"that will eventually call the `Placement API /reshaper endpoint`_ when"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_08748398","line":442,"range":{"start_line":442,"start_character":67,"end_line":442,"end_character":77},"updated":"2019-04-16 14:52:14.000000000","message":"Stein","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":472,"context_line":""},{"line_number":473,"context_line":"None."},{"line_number":474,"context_line":""},{"line_number":475,"context_line":"While we\u0027re commenting the use of a ``PCPU`` resource class, that spec doesn\u0027t"},{"line_number":476,"context_line":"formally depend on `CPU resources`_ spec."},{"line_number":477,"context_line":""},{"line_number":478,"context_line":"Testing"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_ae7777d4","line":475,"range":{"start_line":475,"start_character":61,"end_line":475,"end_character":65},"updated":"2019-04-16 16:45:55.000000000","message":"this","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"bba6431532ddb1eac964ebd244a9a8e4184ec09a","unresolved":false,"context_lines":[{"line_number":492,"context_line":"* `CPU resources`_ spec"},{"line_number":493,"context_line":""},{"line_number":494,"context_line":".. _`Nested Resource Providers`: https://specs.openstack.org/openstack/nova-specs/specs/queens/approved/nested-resource-providers.html"},{"line_number":495,"context_line":".. _`choosing a specific CPU pin within a NUMA node for a vCPU`: https://docs.openstack.org/nova/pike/admin/cpu-topologies.html#customizing-instance-cpu-pinning-policies"},{"line_number":496,"context_line":".. _`CPU resources`: https://review.openstack.org/#/c/555081/"},{"line_number":497,"context_line":".. _`NUMA possible extra specs`: https://docs.openstack.org/nova/pike/admin/flavors.html#extra-specs-numa-topology"},{"line_number":498,"context_line":".. _`Huge pages`: https://docs.openstack.org/nova/pike/admin/huge-pages.html"}],"source_content_type":"text/x-rst","patch_set":14,"id":"3fce034c_8bf94d4e","line":495,"range":{"start_line":495,"start_character":97,"end_line":495,"end_character":101},"updated":"2019-04-16 16:45:55.000000000","message":"worth pointing to a more recent release? (here and below)","commit_id":"6fa84c5b056e1936056b7a448a07754ba124e5c8"}],"specs/ussuri/approved/numa-topology-with-rps.rst":[{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":19,"context_line":".. note::"},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. To the same extent, how this model can be"},{"line_number":23,"context_line":"  queried for specific grouped request tied to a certain NUMA node (where the"},{"line_number":24,"context_line":"  main usecase is NUMA affinity) will also be discussed in other spec (yet to"},{"line_number":25,"context_line":"  be proposed). To make it clear, we won\u0027t address in this spec how we should"},{"line_number":26,"context_line":"  model NUMA-affinized hardware like PCI devices or GPUs and will discuss on"},{"line_number":27,"context_line":"  the relationships in a later spec."}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_92e80b15","line":24,"range":{"start_line":22,"start_character":57,"end_line":24,"end_character":69},"updated":"2020-01-29 21:23:55.000000000","message":"FWIW I don\u0027t think it makes sense to try to defer that discussion. Whether/how we are able to do that query 100% informs how we do the modeling. We made lots of design decisions and changes to placement to support a particular way of doing the modeling, so we should proceed by doing the modeling that way.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":19,"context_line":".. note::"},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. To the same extent, how this model can be"},{"line_number":23,"context_line":"  queried for specific grouped request tied to a certain NUMA node (where the"},{"line_number":24,"context_line":"  main usecase is NUMA affinity) will also be discussed in other spec (yet to"},{"line_number":25,"context_line":"  be proposed). To make it clear, we won\u0027t address in this spec how we should"},{"line_number":26,"context_line":"  model NUMA-affinized hardware like PCI devices or GPUs and will discuss on"},{"line_number":27,"context_line":"  the relationships in a later spec."}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_ac16cb56","line":24,"range":{"start_line":22,"start_character":57,"end_line":24,"end_character":69},"in_reply_to":"3fa7e38b_92e80b15","updated":"2020-02-12 09:09:51.000000000","message":"We agreed at the PTG with Gibi, Stephen and a couple of folks around that given this spec is already huge, we should discuss about the PCI devices affinities by another spec.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be needed for this filter, the"},{"line_number":43,"context_line":"former verification (ie. the host fit) can be done by the Placement API and"},{"line_number":44,"context_line":"the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_72330fc5","line":44,"updated":"2020-01-29 21:23:55.000000000","message":"I don\u0027t think this is true anymore. With rg/rp mappings in the mix, we should be able to do both via the placement query.\n\nI doubt we\u0027ll be ready to get rid of the NTF completely in U, especially if we\u0027re deferring modeling PCI devices, but we should be able to make a large percentage of it obsolete (or rather, rewrite it to use the placement response instead of doing the figuring over again).","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be needed for this filter, the"},{"line_number":43,"context_line":"former verification (ie. the host fit) can be done by the Placement API and"},{"line_number":44,"context_line":"the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_ef57d5f4","line":44,"in_reply_to":"3fa7e38b_72330fc5","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"e458369e99dbd78815235b190140c47299509de1","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_84dbade8","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"updated":"2020-01-15 15:54:55.000000000","message":"i dont think we should be modeling MEMORY_MB at the numa node level and instead shoudl be modeling mempages at the numa node level.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"f767e72a953743bcd7f0f65634110aab03baf3a7","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_c6f57053","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_26756433","updated":"2020-01-29 11:32:20.000000000","message":"i had considerd stating that we should also keep VCPU\non the root provider but i was hoping you would not bring that up as i was undecided if i wanted to assert VCPU should also not be on the numa node.\n\nPCPU should definitly be on the numa node and\nhugepages shoudl defintly be on the numa node. both hugepage and pCPUs can only be used by numa instances.\n\nvCPUs and MEMORY_MB can both be used by both numa and  non numa instnace so that make the tricky.\n\ni kind of feel that we might want to consider haveing a SCPU\nresouce class to model shared cpus per numa node.\n\nso when you want a numa affiend cpu you would request SCPU or PCPU resouces and if you wanted a floating instance you would use VCPU.\n\non line 290 below the spec intoduces a resouce_class config option \n\n  [numa]\n  resource_classes \u003d [VCPU, MEMORY_MB, PCPU]\n\nwe could allow operators to choose if they want the host to report numa local cpus via that\n\ne.g.\n  [numa]\n  resource_classes \u003d [SCPU, MEMORY_MB, PCPU]\n\nfrom a nova side when we are traslating the flavor and image into resouce request we simple need to see if the instance has a numa toplogy to determin if it should be vcpu or scpu.\n\ni kind of wanted to think about this a little more before proposing that as a solution but yes the same issue exists for cpus.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"3bcb6dae65937ef97502fd617ec7a9338578faff","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_fe99086a","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_43040713","updated":"2020-01-29 21:30:32.000000000","message":"I forgot to mention that the NUMA providers should get a HW_NUMA_ROOT trait. This is used in the etherpad. For further explanation of the design motivation behind it, see the placement docs we added around same_subtree: https://docs.openstack.org/placement/latest/user/provider-tree.html#filtering-by-same-subtree","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"3e10c36f82f9b0883e9d86b8da58d7b75409102e","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_26756433","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_660a7cd0","updated":"2020-01-29 11:09:40.000000000","message":"\u003e if we move MEMORY_MB to the numa node we need to make all vms have\n \u003e a numa topology and affine them to a numa nodes that match there\n \u003e allocations. we can nolonger allow the vm memory to float. i would\n \u003e be ok with doing that but other would not.\n \u003e \n \u003e if we want to allow vm memory to continue to float across numa\n \u003e nodes then MEMORY_MB has to stay on the root provider.\n\ndon\u0027t we have this same issue with CPUs?","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_a97ff913","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_8242240c","updated":"2020-01-29 10:27:17.000000000","message":"Yeah, that\u0027s the exact reason why I think we should begin to accept MEMORY_MB resources for NUMA.\n\nSee also L156 for this.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"41a959a66fcbdd0b030539e3ec6cd05efa3d3132","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_49cc4589","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_8242240c","updated":"2020-01-28 13:46:50.000000000","message":"am kind of.\n\nwhen using the non-granuarl request group\ne.g. resouces:MEMORY_MB\u003d512\n\nwhile each resources class can be allocated form different RP the resocues for a singel resouces class cannot be split.\n\nso if you previously had a vm withou a numa toplogy that was floating it could be useing memory form both numa nodes.\nif it was using more memory then could fit in one numa node then after moving the MEMORY_MB to the numa node we could nolonger fit it on the host since it cant be split.\n\ni think i would prefer to keep MEMORY_MB solly for non numa instnace and tracking only 4k memory.\n\nthen have numa local mempage tracking based on the mempage info form the Resource tracker per numa node.\ni dont think we really shoudl model memory per numa node without mempage support.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"34ddc8109dee5eb0e2e2ee42fc8b45db96206371","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_8242240c","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_84dbade8","updated":"2020-01-27 17:03:44.000000000","message":"I think this is part of the iterative approach. First just move existing memory resources to numa nodes then later split them into separate pages.\n\n@Sean: or do you have something specific against MEMORY_MB under NUMA?\n\n// later\n\nWith the above tree we commit to a solution where (temporarily) both placement and the NumaTopologyFilter is needed to make scheduling decision. Which also means that filter might need to work on allocation candidates instead of hosts. But I think we discussed this before.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"653276d0d9e9ede01dd0b2c4436fe47203f92800","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_660a7cd0","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_a97ff913","updated":"2020-01-29 11:01:02.000000000","message":"if we move MEMORY_MB to the numa node we need to make all vms have a numa topology and affine them to a numa nodes that match there allocations. we can nolonger allow the vm memory to float. i would be ok with doing that but other would not.\n\nif we want to allow vm memory to continue to float across numa nodes then MEMORY_MB has to stay on the root provider.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_43040713","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_c6f57053","updated":"2020-01-29 21:23:55.000000000","message":"Summarizing discussion [1] and etherpad [2], we\u0027re proposing a three-tiered model.\n- Memory is provided as MEMORY_MB resource across grandchild providers (children of the NUMA RPs), one per page size.\n- Each such provider has a standard trait representing the \"abstract\" page size: MEMORY_PAGE_SIZE_SMALL/_LARGE\n- Each such provider has a custom trait following a predictable pattern representing the discrete page size in KB, e.g. CUSTOM_MEMORY_PAGE_SIZE_1024 means 1MB pages.\n- The step_size of the inventory is set to the page size in MB, or 1 if pages are less than 1MB, since we only allow granularity of MB in the flavor anyway.\n\nSalient design points:\n- We had agreed previously that we would force deployments to be segregated such that VMs requesting a NUMA topology would all/only land on NUMA-modeled hosts, and vice versa. So we don\u0027t need to worry about \"fitting\" a \"simple\" VM on a NUMA-modeled host. This is also why we abandoned the idea of can_split [3], which would be required to make that work.\n- If you don\u0027t ask for large pages explicitly, you get small pages. (This is what we do today.)\n- If you ask for large pages (but not a specific size), you always get all of one page size, never mixed page sizes, even if those exist on the host. (This is what we support today.)\n\nSee the etherpad for a sample host model and examples of how different flavors need to be translated to placement-ese.\n\nThis model addresses (or makes moot) many of the issues in flight below, which I\u0027ll mark accordingly.\n\n[1] http://eavesdrop.openstack.org/irclogs/%23openstack-nova/%23openstack-nova.2020-01-29.log.html#t2020-01-29T16:42:32\n[2] https://etherpad.openstack.org/p/mem_page_size_and_placement\n[3] https://review.opendev.org/658510","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"72248e74fec053967426d0cc9067aa1d1e75c233","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_c087710a","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_cef7b4d1","updated":"2020-01-31 11:35:28.000000000","message":"i think the 3 layer approch also solves any issue related to vcpus too by the way. not because of the 3 layer approch but rather because we are declaring that you will have numa hosts for numa instnaces and non numa enabled host for floating instances.  so we dont strictly need to have an SCPU that is a numa affined version of a VCPU and only report one or the other. we still could do that but we don\u0027t need too.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"2d7319e4acdcf70950cbf3eef423ac6d7bec5ae8","unresolved":false,"context_lines":[{"line_number":123,"context_line":"   | \u003cNUMA_NODE_O\u003e    |                 | \u003cNUMA_NODE_1\u003e   |"},{"line_number":124,"context_line":"   | VCPU: 8          |                 | VCPU: 8         |"},{"line_number":125,"context_line":"   | PCPU: 8          |                 | PCPU: 8         |"},{"line_number":126,"context_line":"   | MEMORY_MB: 4096  |                 | MEMORY_MB: 4096 |"},{"line_number":127,"context_line":"   +------------------+                 +-----------------+"},{"line_number":128,"context_line":""},{"line_number":129,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_cef7b4d1","line":126,"range":{"start_line":126,"start_character":4,"end_line":126,"end_character":59},"in_reply_to":"3fa7e38b_fe99086a","updated":"2020-01-30 13:18:20.000000000","message":"Based on the etherpad and the summary the proposed model looks good to me!","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"34ddc8109dee5eb0e2e2ee42fc8b45db96206371","unresolved":false,"context_lines":[{"line_number":132,"context_line":"    at the moment."},{"line_number":133,"context_line":"    Other current children RPs for a root compute node, like ones for VGPU"},{"line_number":134,"context_line":"    resources or bandwidth resources would still have their parent be the"},{"line_number":135,"context_line":"    compute node."},{"line_number":136,"context_line":""},{"line_number":137,"context_line":"Resource Provider names for NUMA nodes shall follow a convention of"},{"line_number":138,"context_line":"``nodename_NUMA#`` where nodename would be the hypervisor hostname (given by"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_c2557cc2","line":135,"updated":"2020-01-27 17:03:44.000000000","message":"+1","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"653276d0d9e9ede01dd0b2c4436fe47203f92800","unresolved":false,"context_lines":[{"line_number":153,"context_line":"for classes that are not NUMA-related."},{"line_number":154,"context_line":""},{"line_number":155,"context_line":""},{"line_number":156,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":157,"context_line":"          that needs a separate discussion on how to provide that feature using"},{"line_number":158,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":159,"context_line":"          A potential solution would involve a reshape but given we don\u0027t have"},{"line_number":160,"context_line":"          yet a consensus, that\u0027s why we prefer to just provide the above."},{"line_number":161,"context_line":""},{"line_number":162,"context_line":""},{"line_number":163,"context_line":"Asking for vCPUs split evenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_06706872","line":160,"range":{"start_line":156,"start_character":0,"end_line":160,"end_character":74},"updated":"2020-01-29 11:01:02.000000000","message":"to be clear i was stating above that i dont think its accpetable to defer mempage/hugepage support to another sepc.\n\nif we model memory per numa node we shoudl model it properly per page size form the start so either we only do CPUs in this spec or we do cpu and memroy but if we do memory we have to do hugepage too.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":153,"context_line":"for classes that are not NUMA-related."},{"line_number":154,"context_line":""},{"line_number":155,"context_line":""},{"line_number":156,"context_line":".. note:: `huge pages`_ (or specific memory page size) are a separate feature"},{"line_number":157,"context_line":"          that needs a separate discussion on how to provide that feature using"},{"line_number":158,"context_line":"          Placement resource traits or classes hence being out of this spec."},{"line_number":159,"context_line":"          A potential solution would involve a reshape but given we don\u0027t have"},{"line_number":160,"context_line":"          yet a consensus, that\u0027s why we prefer to just provide the above."},{"line_number":161,"context_line":""},{"line_number":162,"context_line":""},{"line_number":163,"context_line":"Asking for vCPUs split evenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_834c3f75","line":160,"range":{"start_line":156,"start_character":0,"end_line":160,"end_character":74},"in_reply_to":"3fa7e38b_06706872","updated":"2020-01-29 21:23:55.000000000","message":"See L126","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"34ddc8109dee5eb0e2e2ee42fc8b45db96206371","unresolved":false,"context_lines":[{"line_number":193,"context_line":"    to make sure that NUMA Resource Providers will only support the above"},{"line_number":194,"context_line":"    resource classes (ie. PCPU, VCPU and MEMORY_MB) so that necessarly it would"},{"line_number":195,"context_line":"    isolate the NUMA resource providers from any other provider like the ones"},{"line_number":196,"context_line":"    supporting bandwidth resources or VGPU resources."},{"line_number":197,"context_line":""},{"line_number":198,"context_line":""},{"line_number":199,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_828c6439","line":196,"updated":"2020-01-27 17:03:44.000000000","message":"Still group_policy\u003disolate will act on the resource groups coming from neutron. If a single server can have two qos ports with the same vnic_type and same physnet then the resulting placement query might fail due to isolating the two groups that might target the same provider.\nI think the final solution is \u0027can_split\u0027 but that has not been implemented yet in placement \n\nhttps://review.opendev.org/#/c/658510/","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":193,"context_line":"    to make sure that NUMA Resource Providers will only support the above"},{"line_number":194,"context_line":"    resource classes (ie. PCPU, VCPU and MEMORY_MB) so that necessarly it would"},{"line_number":195,"context_line":"    isolate the NUMA resource providers from any other provider like the ones"},{"line_number":196,"context_line":"    supporting bandwidth resources or VGPU resources."},{"line_number":197,"context_line":""},{"line_number":198,"context_line":""},{"line_number":199,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_6397c39d","line":196,"in_reply_to":"3fa7e38b_5b3e39e3","updated":"2020-01-29 21:23:55.000000000","message":"See L126 and the examples in the etherpad. TL;DR: group_policy is meaningless/irrelevant in the proposed model, so we can just leave it alone and let it be whatever is specified by the flavor/port or defaulted or whatever.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":193,"context_line":"    to make sure that NUMA Resource Providers will only support the above"},{"line_number":194,"context_line":"    resource classes (ie. PCPU, VCPU and MEMORY_MB) so that necessarly it would"},{"line_number":195,"context_line":"    isolate the NUMA resource providers from any other provider like the ones"},{"line_number":196,"context_line":"    supporting bandwidth resources or VGPU resources."},{"line_number":197,"context_line":""},{"line_number":198,"context_line":""},{"line_number":199,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_2f8c0d80","line":196,"in_reply_to":"3fa7e38b_6397c39d","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":193,"context_line":"    to make sure that NUMA Resource Providers will only support the above"},{"line_number":194,"context_line":"    resource classes (ie. PCPU, VCPU and MEMORY_MB) so that necessarly it would"},{"line_number":195,"context_line":"    isolate the NUMA resource providers from any other provider like the ones"},{"line_number":196,"context_line":"    supporting bandwidth resources or VGPU resources."},{"line_number":197,"context_line":""},{"line_number":198,"context_line":""},{"line_number":199,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_5b3e39e3","line":196,"in_reply_to":"3fa7e38b_828c6439","updated":"2020-01-29 10:27:17.000000000","message":"Argh, need to consider some alternative approach then.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"41a959a66fcbdd0b030539e3ec6cd05efa3d3132","unresolved":false,"context_lines":[{"line_number":193,"context_line":"    to make sure that NUMA Resource Providers will only support the above"},{"line_number":194,"context_line":"    resource classes (ie. PCPU, VCPU and MEMORY_MB) so that necessarly it would"},{"line_number":195,"context_line":"    isolate the NUMA resource providers from any other provider like the ones"},{"line_number":196,"context_line":"    supporting bandwidth resources or VGPU resources."},{"line_number":197,"context_line":""},{"line_number":198,"context_line":""},{"line_number":199,"context_line":"Asking for vCPUs split unevenly between NUMA nodes"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_89a31db0","line":196,"in_reply_to":"3fa7e38b_828c6439","updated":"2020-01-28 13:46:50.000000000","message":"no i dont think can split is the solution. i think we need to consider how we use groups very carfully.\n\ncan split was for a different usecase.\nand correct it is not supproted in placmenet yet.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":208,"context_line":"second NUMA node."},{"line_number":209,"context_line":""},{"line_number":210,"context_line":"For example, for a flavor of 8 VCPUs with extra specs set with"},{"line_number":211,"context_line":"``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_cpus.1\u003d2,3,4,5,6,7``, it will"},{"line_number":212,"context_line":"translate the Placement query straight into"},{"line_number":213,"context_line":"``group_policy\u003disolate\u0026resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":214,"context_line":""},{"line_number":215,"context_line":".. warning ::"},{"line_number":216,"context_line":""}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_dec1ac82","line":213,"range":{"start_line":211,"start_character":0,"end_line":213,"end_character":60},"updated":"2020-01-29 21:23:55.000000000","message":"I believe strongly that we should be primarily focused on supporting the existing hw:numa* flavor syntax and translating that to placement-ese under the covers, which seems to be what you\u0027re doing here, so ++. We should *not* try to mix and match flavor-ese and placement-ese, and we should not try to document how the user can write their flavor in placement-ese to achieve their desired topology.\n\n(I would even support *forbidding* granular request groups in the flavor. This would be a regression, as we technically support them today; but I can\u0027t imagine anyone is actually using them for any reasonable purpose.)","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":208,"context_line":"second NUMA node."},{"line_number":209,"context_line":""},{"line_number":210,"context_line":"For example, for a flavor of 8 VCPUs with extra specs set with"},{"line_number":211,"context_line":"``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_cpus.1\u003d2,3,4,5,6,7``, it will"},{"line_number":212,"context_line":"translate the Placement query straight into"},{"line_number":213,"context_line":"``group_policy\u003disolate\u0026resources1:VCPU\u003d2\u0026resources2:VCPU\u003d6``"},{"line_number":214,"context_line":""},{"line_number":215,"context_line":".. warning ::"},{"line_number":216,"context_line":""}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_72489688","line":213,"range":{"start_line":211,"start_character":0,"end_line":213,"end_character":60},"in_reply_to":"3fa7e38b_dec1ac82","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":268,"context_line":""},{"line_number":269,"context_line":"To be discussed in other spec as stated above."},{"line_number":270,"context_line":""},{"line_number":271,"context_line":"Optionally configured NUMA resources"},{"line_number":272,"context_line":"------------------------------------"},{"line_number":273,"context_line":""},{"line_number":274,"context_line":"Given there are NUMA workloads but also non-NUMA workloads, it\u0027s also important"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_1e0744ca","line":271,"range":{"start_line":271,"start_character":0,"end_line":271,"end_character":36},"updated":"2020-01-29 21:23:55.000000000","message":"Okay, so while I appreciate the flexibility this gives us, I\u0027m not convinced it\u0027s necessary or desirable if we go with the modeling suggested at L126. Since we decided on the segregation thing, we do need a toggle, but I think it may be better if it was all-or-nothing. It would definitely simplify the test/support surface, versus trying to make sure our flavor-to-placement translations work for all possible permutations.\n\n(The upgrade path to a world where devices become NUMA affined also works seamlessly in this case, I believe. In a subsequent release when computes start putting VGPUs in grandchild providers under NUMA nodes, a flavor can start demanding affinity, which will simply cause the VGPU\u0027s request group to be added to the same_subtree with the other groups for its NUMA node. That flavor will only be able to land on computes that have already upgraded, which is as it should be. Legacy flavors that don\u0027t demand affinity won\u0027t muck with same_subtree, and the VGPU can be provided from wherever, on both old and new computes.)","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"a6759df4545cd182688633f2f29a839013f74ca4","unresolved":false,"context_lines":[{"line_number":287,"context_line":".. code::"},{"line_number":288,"context_line":""},{"line_number":289,"context_line":"  [numa]"},{"line_number":290,"context_line":"  resource_classes \u003d [VCPU, MEMORY_MB, PCPU]"},{"line_number":291,"context_line":""},{"line_number":292,"context_line":"Each of the items in the ListOpt would be a resource class. If operator says"},{"line_number":293,"context_line":"for that specific compute node nova.conf which resources classes to use, then"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_a5284aed","line":290,"updated":"2020-01-31 03:33:29.000000000","message":"Does this need to be a list? Seems like it\u0027s an all-or-nothing thing... Like, there won\u0027t be a situation where PCPU inventories will be on the NUMA nodes, but hte MEMORY_MB inventory will be on the compute node itself.\n\n\u003clater\u003e, oh, for upgrade considerations...","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"34ddc8109dee5eb0e2e2ee42fc8b45db96206371","unresolved":false,"context_lines":[{"line_number":325,"context_line":"   +------------------+   +--------------+     +-----------------+"},{"line_number":326,"context_line":"   | NUMA1_rp         |   | \u003cGPU_type_1\u003e |     | NUMA2_rp        |"},{"line_number":327,"context_line":"   +------------------+   | VGPU: 8      |     +-----------------+"},{"line_number":328,"context_line":"                          +--------------+"},{"line_number":329,"context_line":""},{"line_number":330,"context_line":""},{"line_number":331,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_e2b438d6","line":328,"updated":"2020-01-27 17:03:44.000000000","message":"Doesn\u0027t the resource_classes \u003d VGPU means the VGPU resources needs to be under NUMA nodes? On this picture the VGPU resources are not under the NUMA node","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"f2f9750c222282fb4e8b1d31949f8c36928c9c46","unresolved":false,"context_lines":[{"line_number":325,"context_line":"   +------------------+   +--------------+     +-----------------+"},{"line_number":326,"context_line":"   | NUMA1_rp         |   | \u003cGPU_type_1\u003e |     | NUMA2_rp        |"},{"line_number":327,"context_line":"   +------------------+   | VGPU: 8      |     +-----------------+"},{"line_number":328,"context_line":"                          +--------------+"},{"line_number":329,"context_line":""},{"line_number":330,"context_line":""},{"line_number":331,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_4991a552","line":328,"in_reply_to":"3fa7e38b_e2b438d6","updated":"2020-01-29 10:27:17.000000000","message":"That\u0027s normal : we don\u0027t plan to have VGPU resources to be parenting by NUMA RPs for this spec. \nAs you can see for the option, \u0027VGPU\u0027 won\u0027t be a possible value by this spec (at least until we discuss by another spec how to work with PCI devices)","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"602ac5baefc534e71f4b548dcd73657555ab04a9","unresolved":false,"context_lines":[{"line_number":365,"context_line":""},{"line_number":366,"context_line":"Other end user impact"},{"line_number":367,"context_line":"---------------------"},{"line_number":368,"context_line":"Operators could want to modify their flavors by using direct numbered request"},{"line_number":369,"context_line":"groups for exploiting the new functionality. That said, we will provide a"},{"line_number":370,"context_line":"translation mechanism within the scheduler servcie which will avoid them to"},{"line_number":371,"context_line":"modify their flavors if they don\u0027t want to."},{"line_number":372,"context_line":"See `Documentation Impact`_."}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_9e96d449","line":369,"range":{"start_line":368,"start_character":10,"end_line":369,"end_character":6},"updated":"2020-01-29 21:23:55.000000000","message":"As noted above, I don\u0027t like this idea at all. If we don\u0027t actually forbid it in code, we should discourage it (or at least not *en*courage it) via documentation.","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"a6759df4545cd182688633f2f29a839013f74ca4","unresolved":false,"context_lines":[{"line_number":412,"context_line":"-----------"},{"line_number":413,"context_line":""},{"line_number":414,"context_line":"* bauzas"},{"line_number":415,"context_line":"* someone else for Hyper-V"},{"line_number":416,"context_line":""},{"line_number":417,"context_line":"Feature Liaison"},{"line_number":418,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":15,"id":"3fa7e38b_65325260","line":415,"updated":"2020-01-31 03:33:29.000000000","message":"lulz","commit_id":"e82d864d931155b67daf8b1495ba64a1548b0586"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":19,"context_line":".. note::"},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. To the same extent, how this model can be"},{"line_number":23,"context_line":"  queried for specific grouped request tied to a certain NUMA node (where the"},{"line_number":24,"context_line":"  main usecase is NUMA affinity) will also be discussed in other spec (yet to"},{"line_number":25,"context_line":"  be proposed). To make it clear, we won\u0027t address in this spec how we should"},{"line_number":26,"context_line":"  model NUMA-affinized hardware like PCI devices or GPUs and will discuss on"},{"line_number":27,"context_line":"  the relationships in a later spec."},{"line_number":28,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_7bb6e7fe","line":25,"range":{"start_line":22,"start_character":57,"end_line":25,"end_character":14},"updated":"2020-02-10 18:52:43.000000000","message":"This statement isn\u0027t true at this point. The main thrust of this design is proc/mem affinity. (The subsequent statement about deferring device affinity is still true for now.)","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":19,"context_line":".. note::"},{"line_number":20,"context_line":""},{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. To the same extent, how this model can be"},{"line_number":23,"context_line":"  queried for specific grouped request tied to a certain NUMA node (where the"},{"line_number":24,"context_line":"  main usecase is NUMA affinity) will also be discussed in other spec (yet to"},{"line_number":25,"context_line":"  be proposed). To make it clear, we won\u0027t address in this spec how we should"},{"line_number":26,"context_line":"  model NUMA-affinized hardware like PCI devices or GPUs and will discuss on"},{"line_number":27,"context_line":"  the relationships in a later spec."},{"line_number":28,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_f3d433be","line":25,"range":{"start_line":22,"start_character":57,"end_line":25,"end_character":14},"in_reply_to":"3fa7e38b_7bb6e7fe","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":31,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":32,"context_line":""},{"line_number":33,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":34,"context_line":"policies, CPU pinned instances and memory page sizes. Actually, it does two"},{"line_number":35,"context_line":"different verifications :"},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"- *whether* some host can fit the query because it has enough capacity"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ece3ff01","line":34,"range":{"start_line":34,"start_character":54,"end_line":34,"end_character":62},"updated":"2020-02-10 20:50:41.000000000","message":"nit: \"Additionally\", not \"actually\"","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":31,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":32,"context_line":""},{"line_number":33,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":34,"context_line":"policies, CPU pinned instances and memory page sizes. Actually, it does two"},{"line_number":35,"context_line":"different verifications :"},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"- *whether* some host can fit the query because it has enough capacity"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_53cc2732","line":34,"range":{"start_line":34,"start_character":54,"end_line":34,"end_character":62},"in_reply_to":"3fa7e38b_ece3ff01","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":31,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":32,"context_line":""},{"line_number":33,"context_line":"The NUMATopologyFilter checks a number of resources, including emulator threads"},{"line_number":34,"context_line":"policies, CPU pinned instances and memory page sizes. Actually, it does two"},{"line_number":35,"context_line":"different verifications :"},{"line_number":36,"context_line":""},{"line_number":37,"context_line":"- *whether* some host can fit the query because it has enough capacity"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8a7901fb","line":34,"range":{"start_line":34,"start_character":54,"end_line":34,"end_character":62},"in_reply_to":"3fa7e38b_ece3ff01","updated":"2020-02-11 14:14:22.000000000","message":"both would work.\nthe previous section does not say how it check the resources.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":39,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":40,"context_line":"  node)"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be done by this filter for the"},{"line_number":43,"context_line":"moment, the former verification (ie. the host fit) can be done by the Placement"},{"line_number":44,"context_line":"API and the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_1bf1f396","line":44,"range":{"start_line":42,"start_character":30,"end_line":44,"end_character":53},"updated":"2020-02-10 18:52:43.000000000","message":"Again, this is no longer accurate. The NTF will still have a role to play, but Placement *will* be addressing both of the above (the \u0027whether\u0027 and the \u0027which\u0027).\n\nTo expand a bit, the filter will:\n- still have exclusive control over *device* affinity;\n- sometimes (when anti-affinity is needed) be used to decide which of several allocation candidates the scheduler picks;\n- be used on the host to decide which specific CPUs and memory pages are assigned to the guest -- but now only within the bounds of the subset already allocated from Placement.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"cf99e147fc9799b4474e25963be47c760961b344","unresolved":false,"context_lines":[{"line_number":39,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":40,"context_line":"  node)"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be done by this filter for the"},{"line_number":43,"context_line":"moment, the former verification (ie. the host fit) can be done by the Placement"},{"line_number":44,"context_line":"API and the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_d347b051","line":44,"range":{"start_line":42,"start_character":30,"end_line":44,"end_character":53},"in_reply_to":"3fa7e38b_104bfe28","updated":"2020-02-11 15:24:29.000000000","message":"no the NTF calls shared code in the nova.virt.hardware module which is called form both the compute, scheduler and api.\n\nthe NTF works by invoking numa_fit_instance_to_host\nwhich which is the same code the that the compute agent uses\n\nhttps://github.com/openstack/nova/blob/4bdecee385ccf68b1b27ae9ead9a72861ea6cc8d/nova/virt/hardware.py#L1999\n\nif that function can fit the requested numa toplogy to the host it retuns a fully populated InstanceNUMATopology object\nif it can it and returns None if it cant. that is what the numa toplogy filter uses to determin if the host passes.\n\n\nhttps://github.com/openstack/nova/blob/4bdecee385ccf68b1b27ae9ead9a72861ea6cc8d/nova/scheduler/filters/numa_topology_filter.py#L100-L122\n\nthe compute manger calls the same function here\nhttps://github.com/openstack/nova/blob/4bdecee385ccf68b1b27ae9ead9a72861ea6cc8d/nova/scheduler/filters/numa_topology_filter.py#L100-L122\n\nas part of the construction of a resouce tracker claim.\n\ni would prefer to do this in the conductor before we call spawn eventually but for now we end up doing fit to host part twice.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":39,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":40,"context_line":"  node)"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be done by this filter for the"},{"line_number":43,"context_line":"moment, the former verification (ie. the host fit) can be done by the Placement"},{"line_number":44,"context_line":"API and the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8e8ef823","line":44,"range":{"start_line":42,"start_character":30,"end_line":44,"end_character":53},"in_reply_to":"3fa7e38b_1bf1f396","updated":"2020-02-12 09:09:51.000000000","message":"\u003e Again, this is no longer accurate. The NTF will still have a role\n \u003e to play, but Placement *will* be addressing both of the above (the\n \u003e \u0027whether\u0027 and the \u0027which\u0027).\n \u003e \n \u003e To expand a bit, the filter will:\n \u003e - still have exclusive control over *device* affinity;\n \u003e - sometimes (when anti-affinity is needed) be used to decide which\n \u003e of several allocation candidates the scheduler picks;\n \u003e - be used on the host to decide which specific CPUs and memory\n \u003e pages are assigned to the guest -- but now only within the bounds\n \u003e of the subset already allocated from Placement.\n\nI rephrased it completely.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":39,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":40,"context_line":"  node)"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be done by this filter for the"},{"line_number":43,"context_line":"moment, the former verification (ie. the host fit) can be done by the Placement"},{"line_number":44,"context_line":"API and the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ca8ff9d0","line":44,"range":{"start_line":42,"start_character":30,"end_line":44,"end_character":53},"in_reply_to":"3fa7e38b_1bf1f396","updated":"2020-02-11 14:14:22.000000000","message":"yes although the actual assignment will still be done on the compute node not in the filter. since we do not caim the resource tracker until we do the downcall all the filter does is say we could claim them. the claiming gets done by the compute manager during spawn.\n\ni have expressed interest in doing this in the conductor on the past but people keep telling me to wait for placement to fix it even though it cant by design since assignment is out of scope.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"90e50b59574872e8464bb83dcf1ad0db3afae3ae","unresolved":false,"context_lines":[{"line_number":39,"context_line":"- *which* resource(s) should be used for this query (eg. which pCPUs or NUMA"},{"line_number":40,"context_line":"  node)"},{"line_number":41,"context_line":""},{"line_number":42,"context_line":"While the latter verification *will* still be done by this filter for the"},{"line_number":43,"context_line":"moment, the former verification (ie. the host fit) can be done by the Placement"},{"line_number":44,"context_line":"API and the Nova scheduler (by allocation candidate)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_104bfe28","line":44,"range":{"start_line":42,"start_character":30,"end_line":44,"end_character":53},"in_reply_to":"3fa7e38b_ca8ff9d0","updated":"2020-02-11 14:39:02.000000000","message":"I thought the NTF runs on both the scheduler and the compute","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"},{"line_number":48,"context_line":"a specific instance from a resource provider representing a NUMA node."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9b2ac3e3","line":48,"range":{"start_line":48,"start_character":27,"end_line":48,"end_character":44},"updated":"2020-02-10 18:52:43.000000000","message":"technically \"resource provider subtree\" now that we\u0027re splitting proc and mem into separate providers.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we can model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"},{"line_number":48,"context_line":"a specific instance from a resource provider representing a NUMA node."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8eb7d8d7","line":48,"range":{"start_line":48,"start_character":27,"end_line":48,"end_character":44},"in_reply_to":"3fa7e38b_9b2ac3e3","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"},{"line_number":48,"context_line":"a specific instance from a resource provider representing a NUMA node."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ecbc5fd6","line":50,"range":{"start_line":50,"start_character":18,"end_line":50,"end_character":27},"updated":"2020-02-10 20:50:41.000000000","message":"\"allocated\" has a very specific meaning in Placement-world, I think you mean \"requires\" or something similar?","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"},{"line_number":48,"context_line":"a specific instance from a resource provider representing a NUMA node."},{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0a8991c6","line":50,"range":{"start_line":50,"start_character":18,"end_line":50,"end_character":27},"in_reply_to":"3fa7e38b_ecbc5fd6","updated":"2020-02-11 14:14:22.000000000","message":"no allocated is correct in this case as he is refering to placement allcoations in this context.\n\nthis is not refering to cpu assinment on the host.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":54,"context_line":"like disk and RAM."},{"line_number":55,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_aca0a7a5","line":52,"range":{"start_line":52,"start_character":27,"end_line":52,"end_character":54},"updated":"2020-02-10 20:50:41.000000000","message":"aka \"request allocation candidates\"?","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":54,"context_line":"like disk and RAM."},{"line_number":55,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_2edd8411","line":52,"range":{"start_line":52,"start_character":27,"end_line":52,"end_character":54},"in_reply_to":"3fa7e38b_aca0a7a5","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":49,"context_line":""},{"line_number":50,"context_line":"If an instance is allocated dedicated CPU or memory page resources from a"},{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":54,"context_line":"like disk and RAM."},{"line_number":55,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ea62f514","line":52,"range":{"start_line":52,"start_character":27,"end_line":52,"end_character":54},"in_reply_to":"3fa7e38b_aca0a7a5","updated":"2020-02-11 14:14:22.000000000","message":"no here he is referign to usign the placement usage api endpoint to get capsity infomation related to the cpus and memory exctra per numa node/RP.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":51,"context_line":"resource provider representing a specific NUMA node on a compute host, then we"},{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":54,"context_line":"like disk and RAM."},{"line_number":55,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":56,"context_line":"within a NUMA node for a vCPU`_) would still be only done by the virt driver,"},{"line_number":57,"context_line":"and are not covered by this spec."}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0ec8c84a","line":54,"updated":"2020-02-12 09:09:51.000000000","message":"I completely removed this paragraph which was redundant.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":54,"context_line":"like disk and RAM."},{"line_number":55,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":56,"context_line":"within a NUMA node for a vCPU`_) would still be only done by the virt driver,"},{"line_number":57,"context_line":"and are not covered by this spec."},{"line_number":58,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6ca62fc2","line":55,"updated":"2020-02-10 20:50:41.000000000","message":"Weird newline.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":52,"context_line":"will be able to accurately query an amount information for dedicated CPUs and"},{"line_number":53,"context_line":"memory pages resources in the same fashion we do for other resource classes"},{"line_number":54,"context_line":"like disk and RAM."},{"line_number":55,"context_line":"That said, non resource-related features (like `choosing a specific CPU pin"},{"line_number":56,"context_line":"within a NUMA node for a vCPU`_) would still be only done by the virt driver,"},{"line_number":57,"context_line":"and are not covered by this spec."},{"line_number":58,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_4ee240d5","line":55,"in_reply_to":"3fa7e38b_6ca62fc2","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":79,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":80,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":81,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":82,"context_line":"I/O-bound of course)."},{"line_number":83,"context_line":"For the moment, if you\u0027re an operator, you can use flavor extra specs to"},{"line_number":84,"context_line":"indicate a desired NUMA topology for your instance like:"},{"line_number":85,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8c84cb0f","line":82,"updated":"2020-02-10 20:50:41.000000000","message":"Weird newline.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":79,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":80,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":81,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":82,"context_line":"I/O-bound of course)."},{"line_number":83,"context_line":"For the moment, if you\u0027re an operator, you can use flavor extra specs to"},{"line_number":84,"context_line":"indicate a desired NUMA topology for your instance like:"},{"line_number":85,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_cecdd05b","line":82,"in_reply_to":"3fa7e38b_8c84cb0f","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":87,"context_line":""},{"line_number":88,"context_line":"  $ openstack flavor set FLAVOR-NAME \\"},{"line_number":89,"context_line":"      --property hw:numa_nodes\u003dFLAVOR-NODES \\"},{"line_number":90,"context_line":"      --property hw:numa_cpus.N\u003dFLAVOR-CORES \\"},{"line_number":91,"context_line":"      --property hw:numa_mem.N\u003dFLAVOR-MEMORY"},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":94,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ec6a1f4e","line":91,"range":{"start_line":90,"start_character":0,"end_line":91,"end_character":44},"updated":"2020-02-10 20:50:41.000000000","message":"This doesn\u0027t quite fit - above you\u0027re talking about *host* NUMA topology (in terms of memory access latency), but the hw:numa_cpus and hw:numa_mem are about *guest* NUMA topology.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":87,"context_line":""},{"line_number":88,"context_line":"  $ openstack flavor set FLAVOR-NAME \\"},{"line_number":89,"context_line":"      --property hw:numa_nodes\u003dFLAVOR-NODES \\"},{"line_number":90,"context_line":"      --property hw:numa_cpus.N\u003dFLAVOR-CORES \\"},{"line_number":91,"context_line":"      --property hw:numa_mem.N\u003dFLAVOR-MEMORY"},{"line_number":92,"context_line":""},{"line_number":93,"context_line":"See all the `NUMA possible extra specs`_ for a flavor."},{"line_number":94,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6e25bc15","line":91,"range":{"start_line":90,"start_character":0,"end_line":91,"end_character":44},"in_reply_to":"3fa7e38b_ec6a1f4e","updated":"2020-02-12 09:09:51.000000000","message":"I just added the word \"guest NUMA topology\" to clarify.\nHTH.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Given virt drivers can amend a provider tree given by the compute node"},{"line_number":111,"context_line":"ResourceTracker, then the libvirt driver could create child providers for each"},{"line_number":112,"context_line":"of the 2 sockets representing separate NUMA node but also for each of any"},{"line_number":113,"context_line":"device having possible NUMA affinity, like one GPU device and one SRIOV PF:"},{"line_number":114,"context_line":""},{"line_number":115,"context_line":".. code::"},{"line_number":116,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_fb4f37bc","line":113,"range":{"start_line":112,"start_character":49,"end_line":113,"end_character":36},"updated":"2020-02-10 18:52:43.000000000","message":"This is confusing since the diagram doesn\u0027t show any devices. The note on L147-151 already explains that we\u0027re not messing with those in U, so I think you should omit references to devices here.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Given virt drivers can amend a provider tree given by the compute node"},{"line_number":111,"context_line":"ResourceTracker, then the libvirt driver could create child providers for each"},{"line_number":112,"context_line":"of the 2 sockets representing separate NUMA node but also for each of any"},{"line_number":113,"context_line":"device having possible NUMA affinity, like one GPU device and one SRIOV PF:"},{"line_number":114,"context_line":""},{"line_number":115,"context_line":".. code::"},{"line_number":116,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0e368850","line":113,"range":{"start_line":112,"start_character":49,"end_line":113,"end_character":36},"in_reply_to":"3fa7e38b_0c9d3b57","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Given virt drivers can amend a provider tree given by the compute node"},{"line_number":111,"context_line":"ResourceTracker, then the libvirt driver could create child providers for each"},{"line_number":112,"context_line":"of the 2 sockets representing separate NUMA node but also for each of any"},{"line_number":113,"context_line":"device having possible NUMA affinity, like one GPU device and one SRIOV PF:"},{"line_number":114,"context_line":""},{"line_number":115,"context_line":".. code::"},{"line_number":116,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_aa363d00","line":113,"range":{"start_line":112,"start_character":49,"end_line":113,"end_character":36},"in_reply_to":"3fa7e38b_0c9d3b57","updated":"2020-02-11 14:14:22.000000000","message":"the\n  |\n /+\\\n\nunder the root rp is there to model sub devices.\nthat is why i added it in the etherpad.\nbut yes we could remove this. i think sylvain was just trying to say that there will be child RP for devices(vGPUs) and for numa nodes beneath the root RP.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":109,"context_line":""},{"line_number":110,"context_line":"Given virt drivers can amend a provider tree given by the compute node"},{"line_number":111,"context_line":"ResourceTracker, then the libvirt driver could create child providers for each"},{"line_number":112,"context_line":"of the 2 sockets representing separate NUMA node but also for each of any"},{"line_number":113,"context_line":"device having possible NUMA affinity, like one GPU device and one SRIOV PF:"},{"line_number":114,"context_line":""},{"line_number":115,"context_line":".. code::"},{"line_number":116,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0c9d3b57","line":113,"range":{"start_line":112,"start_character":49,"end_line":113,"end_character":36},"in_reply_to":"3fa7e38b_fb4f37bc","updated":"2020-02-10 20:50:41.000000000","message":"Yeah, drop this please :)","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":139,"context_line":"   +-------------------------+   +----------------------------+   +-------------------------------+"},{"line_number":140,"context_line":"   |MEMORY_PAGE_SIZE_SMALL   |   |MEMORY_PAGE_SIZE_LARGE      |   |MEMORY_PAGE_SIZE_LARGE         |"},{"line_number":141,"context_line":"   |CUSTOM_MEMORY_PAGE_SIZE_4|   |CUSTOM_MEMORY_PAGE_SIZE_2048|   |CUSTOM_MEMORY_PAGE_SIZE_1048576|"},{"line_number":142,"context_line":"   +-------------------------+   +----------------------------+   +-------------------------------+"},{"line_number":143,"context_line":""},{"line_number":144,"context_line":""},{"line_number":145,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_eccd3f5b","line":142,"updated":"2020-02-10 20:50:41.000000000","message":"So, for anyone who has context from reviewing this spec previously, they\u0027ll immediately understand what those MEMORY_ RPs are, but for a first time reader they appear out of the blue - would need the explanation of why we\u0027re doing it this way moved before this graphic.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":139,"context_line":"   +-------------------------+   +----------------------------+   +-------------------------------+"},{"line_number":140,"context_line":"   |MEMORY_PAGE_SIZE_SMALL   |   |MEMORY_PAGE_SIZE_LARGE      |   |MEMORY_PAGE_SIZE_LARGE         |"},{"line_number":141,"context_line":"   |CUSTOM_MEMORY_PAGE_SIZE_4|   |CUSTOM_MEMORY_PAGE_SIZE_2048|   |CUSTOM_MEMORY_PAGE_SIZE_1048576|"},{"line_number":142,"context_line":"   +-------------------------+   +----------------------------+   +-------------------------------+"},{"line_number":143,"context_line":""},{"line_number":144,"context_line":""},{"line_number":145,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ee59ac5b","line":142,"in_reply_to":"3fa7e38b_eccd3f5b","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":139,"context_line":"   +-------------------------+   +----------------------------+   +-------------------------------+"},{"line_number":140,"context_line":"   |MEMORY_PAGE_SIZE_SMALL   |   |MEMORY_PAGE_SIZE_LARGE      |   |MEMORY_PAGE_SIZE_LARGE         |"},{"line_number":141,"context_line":"   |CUSTOM_MEMORY_PAGE_SIZE_4|   |CUSTOM_MEMORY_PAGE_SIZE_2048|   |CUSTOM_MEMORY_PAGE_SIZE_1048576|"},{"line_number":142,"context_line":"   +-------------------------+   +----------------------------+   +-------------------------------+"},{"line_number":143,"context_line":""},{"line_number":144,"context_line":""},{"line_number":145,"context_line":".. note ::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8d059bc2","line":142,"in_reply_to":"3fa7e38b_eccd3f5b","updated":"2020-02-11 14:14:22.000000000","message":"for anyone following this topic for the last few years i presented this exact toplogy in at least twice before. The first time publically was as a motivating usecase for why we needed to add nested resouce providers to placment back at the qeens ptg when we had this lovely discussion \nhttps://etherpad.openstack.org/p/nova-ptg-queens-generic-device-management\n\ni have litrally drawn this on a whiteboad at at least 2 ptgs after that and many internal meetups over the years.\n\nthis is the frist time it has been proposed since we have had the ablity to actully query nested resouces, since i have not mentioned this design agin upstream since i left intel until now at least in the context of mempages.\n\ni did draw a similar diagram at the denver ptg where i noted that the cpu RPs should also be moved to child RP of the numa node to model l3 cache affintiy for CAT but we decided to not add supprot for cache allocation to nova.\n\nSo i hope this is familiar to most people as this is how i have wanted to model memory since before we agreed to add nesting to placement. That said if you were just looking at the spec and had not been following irc then it might not be obvious why we are going back to this design.\n\nthe expanded version of this also contains device rps for sriov and would move the cpus to cache region RP leaving the numa RP with no inventories but that is out of scope for this spec.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":146,"context_line":""},{"line_number":147,"context_line":"    As we said above, we don\u0027t want to support children PCI devices for Ussuri"},{"line_number":148,"context_line":"    at the moment."},{"line_number":149,"context_line":"    Other current children RPs for a root compute node, like ones for VGPU"},{"line_number":150,"context_line":"    resources or bandwidth resources would still have their parent be the"},{"line_number":151,"context_line":"    compute node."},{"line_number":152,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6cfb8f79","line":149,"updated":"2020-02-10 20:50:41.000000000","message":"Weird newline.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":146,"context_line":""},{"line_number":147,"context_line":"    As we said above, we don\u0027t want to support children PCI devices for Ussuri"},{"line_number":148,"context_line":"    at the moment."},{"line_number":149,"context_line":"    Other current children RPs for a root compute node, like ones for VGPU"},{"line_number":150,"context_line":"    resources or bandwidth resources would still have their parent be the"},{"line_number":151,"context_line":"    compute node."},{"line_number":152,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_29181e70","line":149,"in_reply_to":"3fa7e38b_6cfb8f79","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":165,"context_line":"  node has."},{"line_number":166,"context_line":"* ``PCPU``: for telling how many possible pinned cores the NUMA node has."},{"line_number":167,"context_line":""},{"line_number":168,"context_line":"A specific trait should be decorating it : ``HW_NUMA_ROOT``."},{"line_number":169,"context_line":""},{"line_number":170,"context_line":"Memory pagesize RP"},{"line_number":171,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_bbda1fe4","line":168,"range":{"start_line":168,"start_character":45,"end_line":168,"end_character":57},"updated":"2020-02-10 18:52:43.000000000","message":"nts: be on the lookout for explanations of these traits and their purpose in the query\n\n[Later] Yeah, I didn\u0027t see that anywhere. We should explain why HW_NUMA_ROOT needs to exist and what it\u0027s used for. Similarly below we should justify the page size traits.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":165,"context_line":"  node has."},{"line_number":166,"context_line":"* ``PCPU``: for telling how many possible pinned cores the NUMA node has."},{"line_number":167,"context_line":""},{"line_number":168,"context_line":"A specific trait should be decorating it : ``HW_NUMA_ROOT``."},{"line_number":169,"context_line":""},{"line_number":170,"context_line":"Memory pagesize RP"},{"line_number":171,"context_line":"------------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8927d2b5","line":168,"range":{"start_line":168,"start_character":45,"end_line":168,"end_character":57},"in_reply_to":"3fa7e38b_bbda1fe4","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":173,"context_line":"Each `NUMA RP`_ should be having children RPs for each possible memory page"},{"line_number":174,"context_line":"size per host, and having a single resource class :"},{"line_number":175,"context_line":""},{"line_number":176,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has."},{"line_number":177,"context_line":""},{"line_number":178,"context_line":""},{"line_number":179,"context_line":"This RP would be decorated by two traits :"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9bef2385","line":176,"range":{"start_line":176,"start_character":62,"end_line":176,"end_character":63},"updated":"2020-02-10 18:52:43.000000000","message":"...in that specific page size","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":179,"context_line":"This RP would be decorated by two traits :"},{"line_number":180,"context_line":" - either ``MEMORY_PAGE_SIZE_SMALL`` (default if not configured) or"},{"line_number":181,"context_line":"   ``MEMORY_PAGE_SIZE_LARGE`` (if large pages are configured)"},{"line_number":182,"context_line":" - the size of the page size : CUSTOM_MEMORY_PAGE_SIZE_# (where # is the size)"},{"line_number":183,"context_line":""},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"Compute node RP"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9b9403e9","line":182,"range":{"start_line":182,"start_character":73,"end_line":182,"end_character":77},"updated":"2020-02-10 18:52:43.000000000","message":"...in $unit (I think we decided KB).\n\nImportantly, the naming of this trait has to be deterministic, since both the virt driver and the scheduler need to be able to generate it independently and get the same string.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":179,"context_line":"This RP would be decorated by two traits :"},{"line_number":180,"context_line":" - either ``MEMORY_PAGE_SIZE_SMALL`` (default if not configured) or"},{"line_number":181,"context_line":"   ``MEMORY_PAGE_SIZE_LARGE`` (if large pages are configured)"},{"line_number":182,"context_line":" - the size of the page size : CUSTOM_MEMORY_PAGE_SIZE_# (where # is the size)"},{"line_number":183,"context_line":""},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"Compute node RP"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0938c2c8","line":182,"range":{"start_line":182,"start_character":73,"end_line":182,"end_character":77},"in_reply_to":"3fa7e38b_2c9a775f","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"31faa37c3b6c0c441854cf92b02098c0f8852140","unresolved":false,"context_lines":[{"line_number":179,"context_line":"This RP would be decorated by two traits :"},{"line_number":180,"context_line":" - either ``MEMORY_PAGE_SIZE_SMALL`` (default if not configured) or"},{"line_number":181,"context_line":"   ``MEMORY_PAGE_SIZE_LARGE`` (if large pages are configured)"},{"line_number":182,"context_line":" - the size of the page size : CUSTOM_MEMORY_PAGE_SIZE_# (where # is the size)"},{"line_number":183,"context_line":""},{"line_number":184,"context_line":""},{"line_number":185,"context_line":"Compute node RP"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_2c9a775f","line":182,"range":{"start_line":182,"start_character":73,"end_line":182,"end_character":77},"in_reply_to":"3fa7e38b_9b9403e9","updated":"2020-02-10 20:19:04.000000000","message":"yes in kb because the schduler need to be virt diriver indepenent so we need something that will work for all hyperivros and since the kerenl use 4KB pages by default we cannot have a granularity larger the KB as our minium unit size.\n\nin the specific case of the libvirt driver this is also conviant since it nativly reports memory in KB and the api allows page size to be specified in KB too.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":187,"context_line":""},{"line_number":188,"context_line":"The root Resource Provider (ie. the compute node) would only provide resources"},{"line_number":189,"context_line":"for classes that are not NUMA-related."},{"line_number":190,"context_line":"Existing children RPs for vGPUs or bandwidth-aware resources should still be"},{"line_number":191,"context_line":"having this parent (until we discuss about NUMA affinity for PCI devices)."},{"line_number":192,"context_line":""},{"line_number":193,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_4c04936c","line":190,"updated":"2020-02-10 20:50:41.000000000","message":"Weird newline.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":187,"context_line":""},{"line_number":188,"context_line":"The root Resource Provider (ie. the compute node) would only provide resources"},{"line_number":189,"context_line":"for classes that are not NUMA-related."},{"line_number":190,"context_line":"Existing children RPs for vGPUs or bandwidth-aware resources should still be"},{"line_number":191,"context_line":"having this parent (until we discuss about NUMA affinity for PCI devices)."},{"line_number":192,"context_line":""},{"line_number":193,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_89759299","line":190,"in_reply_to":"3fa7e38b_4c04936c","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":199,"context_line":"That said, having the compute node resources to be split between multiple"},{"line_number":200,"context_line":"NUMA nodes could be a problem for those non-NUMA workloads if they want to keep"},{"line_number":201,"context_line":"the existing behaviour."},{"line_number":202,"context_line":"For example, say an instance with 2 vCPUs and one host having 2 NUMA nodes but"},{"line_number":203,"context_line":"each one only accepting one VCPU, then the Placement API wouldn\u0027t accept that"},{"line_number":204,"context_line":"host (given each nested RP only accepts one VCPU). For that reason, we need to"},{"line_number":205,"context_line":"have a configuration for saying which resources should be nested."}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6c094f81","line":202,"updated":"2020-02-10 20:50:41.000000000","message":"Weird newline.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":199,"context_line":"That said, having the compute node resources to be split between multiple"},{"line_number":200,"context_line":"NUMA nodes could be a problem for those non-NUMA workloads if they want to keep"},{"line_number":201,"context_line":"the existing behaviour."},{"line_number":202,"context_line":"For example, say an instance with 2 vCPUs and one host having 2 NUMA nodes but"},{"line_number":203,"context_line":"each one only accepting one VCPU, then the Placement API wouldn\u0027t accept that"},{"line_number":204,"context_line":"host (given each nested RP only accepts one VCPU). For that reason, we need to"},{"line_number":205,"context_line":"have a configuration for saying which resources should be nested."}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_2966def1","line":202,"in_reply_to":"3fa7e38b_6c094f81","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":209,"context_line":""},{"line_number":210,"context_line":".. code::"},{"line_number":211,"context_line":""},{"line_number":212,"context_line":"  enable_numa_topology \u003d \u003cbool\u003e (default to False)"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_de56e9f9","line":212,"range":{"start_line":212,"start_character":2,"end_line":212,"end_character":50},"updated":"2020-02-10 18:52:43.000000000","message":"We talked with Dan about the long-term goal being that all hosts present NUMA topologies, when we\u0027ve somehow solved the packing problem such that we don\u0027t bounce non-NUMA-aware workloads as described on L202-4. That being the case, perhaps we should present this a bit differently. Here\u0027s an idea for discussion:\n\n [workarounds]\n disable_numa_provider_topology \u003d \u003cbool\u003e (default True for U)\n\nThis has the exact same affect as described, but it sets the appropriate expectation for the future.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":209,"context_line":""},{"line_number":210,"context_line":".. code::"},{"line_number":211,"context_line":""},{"line_number":212,"context_line":"  enable_numa_topology \u003d \u003cbool\u003e (default to False)"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_a9f88ec1","line":212,"range":{"start_line":212,"start_character":2,"end_line":212,"end_character":50},"in_reply_to":"3fa7e38b_6c414f62","updated":"2020-02-12 09:09:51.000000000","message":"\u003e Thinking out loud here - with this option named as proposed, how do\n \u003e we handle the upgrade path for a future when devices have NUMA\n \u003e affinity in placement? Do we even need anything besides reshaping\n \u003e the provider tree to put the devices under their NUMA nodes, and\n \u003e some placement magic for the optional affinity part? Maybe we won\u0027t\n \u003e need any further config options besides this one...\n\nYup, because it should be the virt driver responsibility to identify which NUMA node the device is colocated with and accordinglu reshape the RP (say a pGPU) to be a child RP of the NUMA node.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":209,"context_line":""},{"line_number":210,"context_line":".. code::"},{"line_number":211,"context_line":""},{"line_number":212,"context_line":"  enable_numa_topology \u003d \u003cbool\u003e (default to False)"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6d75bf32","line":212,"range":{"start_line":212,"start_character":2,"end_line":212,"end_character":50},"in_reply_to":"3fa7e38b_6c414f62","updated":"2020-02-11 14:14:22.000000000","message":"we should not need anything beside this option no.\n\nnext cycle we will need to do a reshape to move the gpus from there current location as child rps of the root to beneth the numa nodes.\n\ni have previously susgested just changeing the parrent RP uuid on the existing RPs as we have 1 RP per phyical GPU but that is not supported by placement so we will need to create new RP and reshape the allcoations.\n\nwe will start doing that if numa reporting is enabled which by V should be the default. if its disable they can continue to live as child RPs of the root RP.\n\nwe do not require numa afinity for gpus that is fine for now.\nthat said i know that people want to enforce that at some point.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":209,"context_line":""},{"line_number":210,"context_line":".. code::"},{"line_number":211,"context_line":""},{"line_number":212,"context_line":"  enable_numa_topology \u003d \u003cbool\u003e (default to False)"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6c414f62","line":212,"range":{"start_line":212,"start_character":2,"end_line":212,"end_character":50},"in_reply_to":"3fa7e38b_de56e9f9","updated":"2020-02-10 20:50:41.000000000","message":"Thinking out loud here - with this option named as proposed, how do we handle the upgrade path for a future when devices have NUMA affinity in placement? Do we even need anything besides reshaping the provider tree to put the devices under their NUMA nodes, and some placement magic for the optional affinity part? Maybe we won\u0027t need any further config options besides this one...","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"31faa37c3b6c0c441854cf92b02098c0f8852140","unresolved":false,"context_lines":[{"line_number":209,"context_line":""},{"line_number":210,"context_line":".. code::"},{"line_number":211,"context_line":""},{"line_number":212,"context_line":"  enable_numa_topology \u003d \u003cbool\u003e (default to False)"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ec845f59","line":212,"range":{"start_line":212,"start_character":2,"end_line":212,"end_character":50},"in_reply_to":"3fa7e38b_de56e9f9","updated":"2020-02-10 20:19:04.000000000","message":"it feels a little odd to have a workaround config option enabled by default in a release.\n\ni would suggest\n\n[compute]\nplacement_numa_reporting\u003dTure|False \n\nDefault to False for U and in V we default to True  and deprecate the option to remove it in W+\n\ni do think we should intend to enable numa reporting by default in the medium term.\n\nwith that said while i fine it weird to enable a workaround  by default i would be ok with that approach too.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":209,"context_line":""},{"line_number":210,"context_line":".. code::"},{"line_number":211,"context_line":""},{"line_number":212,"context_line":"  enable_numa_topology \u003d \u003cbool\u003e (default to False)"},{"line_number":213,"context_line":""},{"line_number":214,"context_line":""},{"line_number":215,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_69285635","line":212,"range":{"start_line":212,"start_character":2,"end_line":212,"end_character":50},"in_reply_to":"3fa7e38b_ec845f59","updated":"2020-02-12 09:09:51.000000000","message":"\u003e it feels a little odd to have a workaround config option enabled by\n \u003e default in a release.\n \u003e \n \u003e i would suggest\n \u003e \n \u003e [compute]\n \u003e placement_numa_reporting\u003dTure|False\n \u003e \n \u003e Default to False for U and in V we default to True  and deprecate\n \u003e the option to remove it in W+\n \u003e \n \u003e i do think we should intend to enable numa reporting by default in\n \u003e the medium term.\n \u003e \n \u003e with that said while i fine it weird to enable a workaround  by\n \u003e default i would be ok with that approach too.\n\nOk, I\u0027ll propose the [workarounds] trick.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_5228af83","line":223,"updated":"2020-02-10 17:01:42.000000000","message":"So If I have a deployment that has a bunch of NUMA aware hosts and upgrade to Ussuri with this feature implemented then the NUMA aware part of my cloud will act totally resource deprived until I reconfigure every affected compute hosts. Can we add an upgrade check that prints out those hosts that are NUMA aware based on that there are huge pages or pinned CPUs allocated on the host but are doesn\u0027t have the enable_numa_topology set to True in the config?\n\nThis way I can see which compute needs to be reconfigured.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_f359935b","line":223,"in_reply_to":"3fa7e38b_0c8f7b2f","updated":"2020-02-11 09:52:01.000000000","message":"\u003e all systemd that are likely to be in production are numa hosts.\n \u003e that was true 5 years ago letalone today.\n \u003e \n\nDoes it mean that potentially all host needs to be re-shaped to continue being NUMA-aware? But then why we want to keep host not reshaped? \n\nI think there are host today where the machine is NUMA aware but _it is not used to host NUMA-aware workloads_ and those are the host we don\u0027t want to reshape. I would like to have a tool that tells me which hosts in my deployment needs to be re-shaped and which not.\n\n \u003e we could maybe infer that the host is a numa host if it had\n \u003e cpu_dedicated_set defiend since those cpus can only be consumed by\n \u003e numa affiend vms.\n \u003e we cannot use the absence of cpu_dedicated_set to infer its not a\n \u003e numa host. nor can we use the presence or absence of\n \u003e hugepages as they could be used for the hosts.\n \u003e \n \u003e so not in general we cant detect if a host is intended to be used\n \u003e for numa vms. we could print out a list of host that have vms with\n \u003e a numa toplogy but that could miss hosts if you are packign rather\n \u003e then spreading vms.\n\nI think if today I have a host where there are 1G huge pages available to nova VMs then that host needs to be re-shaped to keep the 1G huge pages available for VMs. Isn\u0027t it?","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"cf99e147fc9799b4474e25963be47c760961b344","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_33e4040b","line":223,"in_reply_to":"3fa7e38b_0d252bc0","updated":"2020-02-11 15:24:29.000000000","message":"ya the tool would basicaly be a nova status check or similar that would advice that X host should be mark as reporting numa because we have detected that they may be running numa workloads or have configured resouces to be avaiable that are only consumable with numa instance in the case of PCPUs and pmem.\n\nif people have been using aggreate as advised then the tool is not needed. for those that have not been it might be of some use but i agree it should not do the reshap or configure it jsut list the host which appear to be configured for numa and leave the rest to the operator to do.\n\nim not sure its actually need but if it is limited in scope then fine.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0d8d9b0a14fc48e85c28eb634664abb695bb0f40","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_de0497c7","line":223,"in_reply_to":"3fa7e38b_33e4040b","updated":"2020-02-11 16:06:07.000000000","message":"I might chasing a non existing use case here. So I treat this as a low prio thing and won\u0027t block on it. BUT assume that today I have a deployment where I host both NUMA aware and non-NUMA workloads (on different hosts). I decide to upgrade to Ussuri. As I did not notice the reno about this change I will go with the default config (so disable_placement_numa_reporting \u003d False). This means I will lose all the NUMA aware capacity of my cloud. Which sounds bad to me. I would develop some kind of warning for the deployer if possible. \n\nHm, or will I loose all the NUMA aware capacity? Will the Ussuri scheduler starts generating the new placement requests _before_ the computes are all upgraded to Ussuri and had a chance to re-shape?\n\n// later \n\nI see the new Upgrade impact section with a fallback mechanism. That solves my use case. So I rest my case.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"31faa37c3b6c0c441854cf92b02098c0f8852140","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0c8f7b2f","line":223,"in_reply_to":"3fa7e38b_5228af83","updated":"2020-02-10 20:19:04.000000000","message":"all systemd that are likely to be in production are numa hosts. that was true 5 years ago letalone today.\n\nwe could maybe infer that the host is a numa host if it had cpu_dedicated_set defiend since those cpus can only be consumed by numa affiend vms.\nwe cannot use the absence of cpu_dedicated_set to infer its not a numa host. nor can we use the presence or absence of\nhugepages as they could be used for the hosts.\n\nso not in general we cant detect if a host is intended to be used for numa vms. we could print out a list of host that have vms with a numa toplogy but that could miss hosts if you are packign rather then spreading vms.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_09752233","line":223,"in_reply_to":"3fa7e38b_f359935b","updated":"2020-02-12 09:09:51.000000000","message":"\u003e I think if today I have a host where there are 1G huge pages\n \u003e available to nova VMs then that host needs to be re-shaped to keep\n \u003e the 1G huge pages available for VMs. Isn\u0027t it?\n\nYes, for large pages, you\u0027ll be asked to reshape your hosts.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"90e50b59574872e8464bb83dcf1ad0db3afae3ae","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0d252bc0","line":223,"in_reply_to":"3fa7e38b_f359935b","updated":"2020-02-11 14:39:02.000000000","message":"Maybe I was misunderstanding the purpose of the tool.\n\nI thought we had seen a common practice of clouds already being segregated along AZ lines for this purpose. So the tool would just be for deployments that don\u0027t do that, correct?\n\nIf so, the tool is really just advisory, it doesn\u0027t need to be perfect. We can use the techniques discussed above, but document the other factors that should influence the operator\u0027s decision whether to reshape.\n\nTo be clear, the tool should *not* automatically reshape (or configure for reshaping) hosts it identifies. It just reports.\n\nNot sure if I\u0027ve addressed the concern here.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":220,"context_line":".. note::"},{"line_number":221,"context_line":"   By default, the value for that configuration option will be False for"},{"line_number":222,"context_line":"   upgrade reasons, so an operator wanting to use Placement API for NUMA"},{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_841337b7","line":223,"in_reply_to":"3fa7e38b_f359935b","updated":"2020-02-11 14:14:22.000000000","message":"there are host that have a numa toplogy that are not used for numa workloads. every intel server with more then 1 socket prdouced since nehelem in 2008 has a numa topology.\nthat also holds more or less for other architectures too so its been over a decade since it was common not to have a numa topology in the server world and its even common to have numa hardware in the client desktop space now.\n\nthe only reason to have config attribute is  so you can declare this numa capable host will not be used for numa workloads.\n\nthere are two case where if a resouce is present we can know that it is a numa hosts, PCPU(controlled by cpu_dedicated_set and PMEM(persisten memory). both PMEM and PCPU can only be used by vms with a numa toplogy.\n\nhugepages may or may not be used by vms even if they are availabel on the host. unless hw:mem_page_size\u003dlarge is defiend in the flavor or image then a vm landing on that host wont use the huge pages.\n\nyou can have a host with a numa workload without using any of those resocues e.g. a vm with explict 4k memory (hw:mem_page_size\u003dsmall) or explict numa (hw:numa_nodes\u003d\u003canything\u003e) this is often done for host with sriov devices.\n\nso from only looking at the individual host we cant ever know for sure that it wont have a numa workload even if it has only 1 numa node reported. since there is no standard way to declare the host aggregate that is used to partition hosts we cant rely on that to create a tool.\n\nwe might be able to develop a huristic that guess based on a number of factors, e.g. if PMEM or PCPUs are present its defiantly a numa hosts.\n\nif it has hugepages its likely a numa hosts but they may not be available used by vms.There is technically a reserved hugepage config option to say these hugepages are not availabel for vms. im not sure that people use it since the recomendation before it was added was to modify the to restrict which hugetlbfs mount point could be used by vms.\nso you would mount the hugepage to be used for vms in one mount point and the hugepages to be used for the host in another.\n\nif all vms in a host aggregate have a numa toplogy then we could maybe infer all hosts in that host aggreated are numa hosts. a slightly more conservative heuristic would assume that any host that has a vm with a numa toplogy is a numa host and report it as such.\n\nputting all of that together we would have to report a host as a numa host if the following conditions were met.\nthe host had PCPU,PMEM or hugepages, had a vm with a numa toplogy or was a member of a host aggreate where all vms had a numa toplogy.\n\noperators should know this already without the aid of a tool\nif they dont they have been negligent int the deployment of there cloud because they have been required to partion hosts based on numa since it was added in juno? maybe before that.\n\nif the above was acceptable we could maybe provide a tool to indicate its likely a numa host but i think its unreasonable to expect a tool that will tell you definitively.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_acc66714","line":226,"updated":"2020-02-10 20:50:41.000000000","message":"This might be an implementation detail, but how will we reshape PCPU inventories? Currently they\u0027re all on the compute node / root RP, so if you have instances using, respectively, 4, 8, and 16 PCPUs, the RP will have an total of 28 PCPUs allocated.\n\nWhen we reshape, we can\u0027t randomly split this allocation of 28 CPUs across child NUMA node RPs - we need to know which instance is affined to which host NUMA node, and put its PCPU allocations against the correct NUMA node RP.\n\nI guess this is doable, and maybe we don\u0027t need to design it here in this spec, but it feels like a pretty big deal.\n\nPS: it also applies to memory pages.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_09c76267","line":226,"in_reply_to":"3fa7e38b_53366703","updated":"2020-02-12 09:09:51.000000000","message":"Yup, indeed, it *has* to be some lookup of every instance and identify its NUMA colocality but we had kind of same concerns when we had to reshape GPU allocations (finding every GPU usage for each instance)","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_e44f8bbc","line":226,"in_reply_to":"3fa7e38b_53366703","updated":"2020-02-11 14:14:22.000000000","message":"you can request PCPU or hugepage without haveign a numa toplogy so we will never need to split the allocations randomly. we know exactly shiche cpus and hugepages are allcoated to the vm in the instance numa toplogy so we can may that directly to the resouce providres.\n\nthis should be fairly simple. the only ting we need to do is ensure when the virt driver names the RP it follows an naming convention that it decided to uniquly associtate each numa rp with the host host numa node.\n\ne.g. ${hypervisor-hostname}_NUMA_${numa node number}\n\nif we follow that convention specificlly then we can potentally depend on it form cyborg and neutron too so that they can report there inventories per numa node in the future.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":223,"context_line":"   workloads will need to set it accordingly."},{"line_number":224,"context_line":""},{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_53366703","line":226,"in_reply_to":"3fa7e38b_acc66714","updated":"2020-02-11 09:52:01.000000000","message":"yeah this will need some fat algos. We have to check each instance on the host to see where they are pinned and split the resources and then the allocations accordingly.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":225,"context_line":".. note::"},{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_beae8de2","line":229,"range":{"start_line":228,"start_character":64,"end_line":229,"end_character":12},"updated":"2020-02-10 18:52:43.000000000","message":"eh? Do what more than once?","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_7e5a55f5","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"updated":"2020-02-10 18:52:43.000000000","message":"Do we really want to allow that? It would be a whole separate (and nontrivial) reshape method.\n\nI guess it would be kind of cruel for the operator to cut over and then find his non-NUMA flavors are basically useless but he can\u0027t go back.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"5ab16c254438cd2c4c6607b106f91ebd8900cba3","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ca5db954","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"in_reply_to":"3fa7e38b_6ef93c1a","updated":"2020-02-11 13:20:30.000000000","message":"If we don\u0027t allow to backward to a non-NUMA host, that sounds like we announce the non-NUMA instance is deprecated in U release.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_2495a3e2","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"in_reply_to":"3fa7e38b_6ef93c1a","updated":"2020-02-11 14:14:22.000000000","message":"i dont think we want to allow that.\ni think the only way we should allow the reshape to be un done is to delete the RP and all allocations and recreate it.\nonce the config option is set then we shoudl not allow you to undo the reshap in code.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_6ef93c1a","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"in_reply_to":"3fa7e38b_7e5a55f5","updated":"2020-02-11 09:52:01.000000000","message":"If we want to prevent that then we need special care for the config handling. Basically not to allow starting the compute service if the config would indicate a reshape-back transition. It is doable just want to make sure we don\u0027t forget it","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"90e50b59574872e8464bb83dcf1ad0db3afae3ae","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_10c0de4a","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"in_reply_to":"3fa7e38b_8d0a7b26","updated":"2020-02-11 14:39:02.000000000","message":"Having slept on it, I think it\u0027s reasonable to allow bidirectional reshaping considering we don\u0027t yet provide all the capabilities you need on a NUMA-aware host. In the future, once we\u0027ve got that support, we can disable the backwards reshaper. (Realistically, we don\u0027t need to \u0027disable\u0027 it since we\u0027ll be deprecating/removing the conf option that would allow you to trigger it.)","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0d8d9b0a14fc48e85c28eb634664abb695bb0f40","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_d3149052","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"in_reply_to":"3fa7e38b_ca5db954","updated":"2020-02-11 16:06:07.000000000","message":"as it is not preventing adding new computes with non-NUMA aware config I don\u0027t think this would be really a deprecation.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":226,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":227,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":228,"context_line":"   explaining that it would be a performance issue if operators do that more"},{"line_number":229,"context_line":"   than once after upgrading. This also implies that an operator can go"},{"line_number":230,"context_line":"   backwards to ask to have a non-NUMA host, which would then trigger a reshape"},{"line_number":231,"context_line":"   asking the resources to be moved back to the root compute resource provider."},{"line_number":232,"context_line":""},{"line_number":233,"context_line":".. note:: Since the discovery of a NUMA topology is made by virt drivers, it"},{"line_number":234,"context_line":"          makes the population of those nested Resource Providers to necessarly"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8d0a7b26","line":231,"range":{"start_line":229,"start_character":65,"end_line":231,"end_character":79},"in_reply_to":"3fa7e38b_ca5db954","updated":"2020-02-11 14:14:22.000000000","message":"not we will allow non numa instance but we will not allow you to reshap after you have move a host to report numa toplogies. for U we are proposing to default to not reporting numa and only make that change in V.\n\ni would like to deprecate non numa aware guests but we are not doing that in this this spec.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":246,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":247,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":252,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones."},{"line_number":253,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":254,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":255,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_d2801f9f","line":252,"range":{"start_line":249,"start_character":0,"end_line":252,"end_character":66},"updated":"2020-02-10 17:01:42.000000000","message":"The above request allows allocation candidates from a NUMA-aware (re-shaped) host as well. The unnumbered request group will be satisfied by two RPs, one NUMA node RP and one memory RP under a NUMA node RP (to necessary the same NUMA node RP).\n\nSee http://paste.openstack.org/show/789380/ as an example\n\nTo prevent scheduling non NUMA aware request to the reshaped NUMA-aware nodes you have to add a required\u003d!HW_NUMA_ROOT to the request. As that will precent selecting the NUMA root RP for to satisfy VCPUs and no other RPs provider VCPUs in a reshaped tree.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":246,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":247,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":252,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones."},{"line_number":253,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":254,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":255,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_70f032db","line":252,"range":{"start_line":249,"start_character":0,"end_line":252,"end_character":66},"in_reply_to":"3fa7e38b_2e5024d1","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":246,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":247,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":252,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones."},{"line_number":253,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":254,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":255,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_645ddb52","line":252,"range":{"start_line":249,"start_character":0,"end_line":252,"end_character":66},"in_reply_to":"3fa7e38b_2e5024d1","updated":"2020-02-11 14:14:22.000000000","message":"we covered this explcitly in the etherpad and propsoed useing a granualr request grop to prevent this. a forbdien trait would work too.  if we go the trait route then evey request would be treated as if  trait:HW_NUMA_NODE\u003drequired|forbiden is always set based on if the requst_spec has it numa_toplogy field populated.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":246,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":247,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":252,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones."},{"line_number":253,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":254,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":255,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_2e5024d1","line":252,"range":{"start_line":249,"start_character":0,"end_line":252,"end_character":66},"in_reply_to":"3fa7e38b_3eacdda6","updated":"2020-02-11 09:52:01.000000000","message":"Yeah one suffixed group would prevent the NUMA aware nodes to fulfill the non numa request. I feel the required\u003d!HW_NUMA_ROOT a bit more explicit though.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":246,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":247,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":248,"context_line":""},{"line_number":249,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":250,"context_line":""},{"line_number":251,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":252,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones."},{"line_number":253,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":254,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":255,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_3eacdda6","line":252,"range":{"start_line":249,"start_character":0,"end_line":252,"end_character":66},"in_reply_to":"3fa7e38b_d2801f9f","updated":"2020-02-10 18:52:43.000000000","message":"\u003e To prevent scheduling non NUMA aware request to the reshaped\n \u003e NUMA-aware nodes you have to add a required\u003d!HW_NUMA_ROOT to the\n \u003e request. As that will precent selecting the NUMA root RP for to\n \u003e satisfy VCPUs and no other RPs provider VCPUs in a reshaped tree.\n\nOr you could force the VCPU and MEMORY_MB resources to come from the same provider by using a suffixed request group.\n\nI\u0027m having a hard time deciding which would be better.\n\nBut I agree, we need to do something to make sure this non-NUMA workload doesn\u0027t land on a NUMA-aware host.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":261,"context_line":""},{"line_number":262,"context_line":"As NUMA-aware hosts have a specific topology with memory being in a grand-child"},{"line_number":263,"context_line":"RP, we basically need to ensure we can translate the existing expressiveness in"},{"line_number":264,"context_line":"the flavor extraspecs into a Placement allocation candidates query that asks"},{"line_number":265,"context_line":"for parenting between the NUMA RP containing the ``VCPU`` resources and the"},{"line_number":266,"context_line":"memory pagesize RP containing the ``MEMORY_MB`` resources."},{"line_number":267,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_4ce01394","line":264,"range":{"start_line":264,"start_character":11,"end_line":264,"end_character":21},"updated":"2020-02-10 20:50:41.000000000","message":"extra specs","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":261,"context_line":""},{"line_number":262,"context_line":"As NUMA-aware hosts have a specific topology with memory being in a grand-child"},{"line_number":263,"context_line":"RP, we basically need to ensure we can translate the existing expressiveness in"},{"line_number":264,"context_line":"the flavor extraspecs into a Placement allocation candidates query that asks"},{"line_number":265,"context_line":"for parenting between the NUMA RP containing the ``VCPU`` resources and the"},{"line_number":266,"context_line":"memory pagesize RP containing the ``MEMORY_MB`` resources."},{"line_number":267,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_29dd7e11","line":264,"range":{"start_line":264,"start_character":11,"end_line":264,"end_character":21},"in_reply_to":"3fa7e38b_4ce01394","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":265,"context_line":"for parenting between the NUMA RP containing the ``VCPU`` resources and the"},{"line_number":266,"context_line":"memory pagesize RP containing the ``MEMORY_MB`` resources."},{"line_number":267,"context_line":""},{"line_number":268,"context_line":"Accrdingly, here are some examples:"},{"line_number":269,"context_line":""},{"line_number":270,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d2``::"},{"line_number":271,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_32b9737b","line":268,"range":{"start_line":268,"start_character":0,"end_line":268,"end_character":10},"updated":"2020-02-10 17:01:42.000000000","message":"nit: Accordingly","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":265,"context_line":"for parenting between the NUMA RP containing the ``VCPU`` resources and the"},{"line_number":266,"context_line":"memory pagesize RP containing the ``MEMORY_MB`` resources."},{"line_number":267,"context_line":""},{"line_number":268,"context_line":"Accrdingly, here are some examples:"},{"line_number":269,"context_line":""},{"line_number":270,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d2``::"},{"line_number":271,"context_line":""}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_49e23ad5","line":268,"range":{"start_line":268,"start_character":0,"end_line":268,"end_character":10},"in_reply_to":"3fa7e38b_32b9737b","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":267,"context_line":""},{"line_number":268,"context_line":"Accrdingly, here are some examples:"},{"line_number":269,"context_line":""},{"line_number":270,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d2``::"},{"line_number":271,"context_line":""},{"line_number":272,"context_line":"    resources_MEM1\u003dMEMORY_MB:4096"},{"line_number":273,"context_line":"    \u0026required_MEM1\u003dMEMORY_PAGE_SIZE_SMALL"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_be944d43","line":270,"range":{"start_line":270,"start_character":61,"end_line":270,"end_character":63},"updated":"2020-02-10 18:52:43.000000000","message":"This example could use some annotations explaining things like:\n- When not otherwise specified, we divide resources evenly among NUMA nodes. That\u0027s why the VCPUs are 4/4 and the memory is 4096/4096 below.\n- When not otherwise specified, small pages are the default. That\u0027s why MEMORY_PAGE_SIZE_SMALL is added below.\n\n[Later] I see these are explained at the bottom of the section.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":279,"context_line":"    \u0026resources_PROC2\u003dVCPU:4"},{"line_number":280,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":281,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":282,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":""},{"line_number":285,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9274e7ce","line":282,"range":{"start_line":282,"start_character":3,"end_line":282,"end_character":22},"updated":"2020-02-10 17:01:42.000000000","message":"I think this means that PROC1 group can be satisfied from the same RP than PROC2 group but that is against the definition of hw:numa_nodes\u003d2","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"90e50b59574872e8464bb83dcf1ad0db3afae3ae","unresolved":false,"context_lines":[{"line_number":279,"context_line":"    \u0026resources_PROC2\u003dVCPU:4"},{"line_number":280,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":281,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":282,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":""},{"line_number":285,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_70759232","line":282,"range":{"start_line":282,"start_character":3,"end_line":282,"end_character":22},"in_reply_to":"3fa7e38b_240a8328","updated":"2020-02-11 14:39:02.000000000","message":"This is why we have rp/rg mappings in the allocation candidate response now. Those need to make it down to the virt driver, as they do for qos as of https://review.opendev.org/#/c/696992/","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":279,"context_line":"    \u0026resources_PROC2\u003dVCPU:4"},{"line_number":280,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":281,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":282,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":""},{"line_number":285,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ae63b4ab","line":282,"range":{"start_line":282,"start_character":3,"end_line":282,"end_character":22},"in_reply_to":"3fa7e38b_5e943935","updated":"2020-02-11 09:52:01.000000000","message":"Good point the NTF will help filtering down that to anti-affinity. Also this means NTF needs to work on allocation candidates instead of host (as today).","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":279,"context_line":"    \u0026resources_PROC2\u003dVCPU:4"},{"line_number":280,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":281,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":282,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":""},{"line_number":285,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_5e943935","line":282,"range":{"start_line":282,"start_character":3,"end_line":282,"end_character":22},"in_reply_to":"3fa7e38b_9274e7ce","updated":"2020-02-10 18:52:43.000000000","message":"Correct, but we decided we needed to keep this to accommodate requests with bandwidth resources that need to come from the same provider. So as you say, the Placement result will include candidates where all the resources come from the same NUMA node; the NUMATopologyFilter will be responsible for eliminating those candidates.\n\n(nts: make sure that ^ is explained *somewhere* in the spec. Could link to https://docs.openstack.org/placement/train/specs/train/implemented/2005575-nested-magic-1.html#anti-affinity)\n\n([Later] I didn\u0027t see that explanation anywhere; it should be added.)","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":279,"context_line":"    \u0026resources_PROC2\u003dVCPU:4"},{"line_number":280,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":281,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":282,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":""},{"line_number":285,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_d05c8681","line":282,"range":{"start_line":282,"start_character":3,"end_line":282,"end_character":22},"in_reply_to":"3fa7e38b_ae63b4ab","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":279,"context_line":"    \u0026resources_PROC2\u003dVCPU:4"},{"line_number":280,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":281,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":282,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":""},{"line_number":285,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_240a8328","line":282,"range":{"start_line":282,"start_character":3,"end_line":282,"end_character":22},"in_reply_to":"3fa7e38b_ae63b4ab","updated":"2020-02-11 14:14:22.000000000","message":"yes it will although the NTF just calls the hardware.py module which will also need to be enhanced to be allocation candiate aware anyway to do the assignment on the host when invoked form the virt dirver.\n\nwe will need to pass the allocation candiate to the filters or have a new sett of alocation filters which specificly opterate on allocations.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":303,"context_line":"    \u0026resources_PROC2\u003dVCPU:6"},{"line_number":304,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":305,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":306,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":307,"context_line":""},{"line_number":308,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and"},{"line_number":309,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_mem.0\u003d1024"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_322c13b1","line":306,"updated":"2020-02-10 17:01:42.000000000","message":"ditto, this request does not guarantee that we have two numa nodes.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"90e50b59574872e8464bb83dcf1ad0db3afae3ae","unresolved":false,"context_lines":[{"line_number":303,"context_line":"    \u0026resources_PROC2\u003dVCPU:6"},{"line_number":304,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":305,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":306,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":307,"context_line":""},{"line_number":308,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and"},{"line_number":309,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_mem.0\u003d1024"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_d000066c","line":306,"in_reply_to":"3fa7e38b_2a3ecdb5","updated":"2020-02-11 14:39:02.000000000","message":"This doesn\u0027t make sense to me. Your device is on a PCI bus that\u0027s affined to a specific NUMA node. If you don\u0027t care about the device being affined to a specific NUMA node, you simply omit the device\u0027s group suffix from same_subtree. If you do care, you specify it.\n\nOr are you talking about a case where there are e.g. four NUMA cells on the host, and you want a two-cell guest topology, and you want your device affined to either of the guest cells that you\u0027re actually using, but not the ones that you\u0027re not?\n\nThat seems like a really obscure corner case, considering that 99% of the time your guest NUMA cells will be symmetrical so you could just affine the device to either one and the result will be correct.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":303,"context_line":"    \u0026resources_PROC2\u003dVCPU:6"},{"line_number":304,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":305,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":306,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":307,"context_line":""},{"line_number":308,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and"},{"line_number":309,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_mem.0\u003d1024"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_90528e69","line":306,"in_reply_to":"3fa7e38b_322c13b1","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":303,"context_line":"    \u0026resources_PROC2\u003dVCPU:6"},{"line_number":304,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":305,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":306,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":307,"context_line":""},{"line_number":308,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and"},{"line_number":309,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_mem.0\u003d1024"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_2a3ecdb5","line":306,"in_reply_to":"3fa7e38b_322c13b1","updated":"2020-02-11 14:14:22.000000000","message":"correct as above the numa toplogy filter will do this.\n\ndo not have the ablity to say \u0026same_subtree\u003d!_NUMA1,_NUMA2\nto enforece anti affintity.\n\nwe will also need to enhacne same_subtree to support \"or\" at some point before we can fully remove the numa toplogy filter. or is need for sriov devices and gpus so that we can require the the sriov device is in the same subtree as _NUMA1 or _NUMA2. this matters if you have more numa nodes on the host then the guest requsts.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"cf99e147fc9799b4474e25963be47c760961b344","unresolved":false,"context_lines":[{"line_number":303,"context_line":"    \u0026resources_PROC2\u003dVCPU:6"},{"line_number":304,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":305,"context_line":"    \u0026same_subtree\u003d_MEM2,_PROC2,_NUMA2"},{"line_number":306,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":307,"context_line":""},{"line_number":308,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and"},{"line_number":309,"context_line":"  ``hw:numa_nodes\u003d2\u0026hw:numa_cpus.0\u003d0,1\u0026hw:numa_mem.0\u003d1024"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_b3e234d2","line":306,"in_reply_to":"3fa7e38b_d000066c","updated":"2020-02-11 15:24:29.000000000","message":"our default policy today for pci numa affinty required that the pci/sriov device must be assocated with one of the numa node the vm is attach too.\n\nso if you have 4 numa node on the host and only ask for 2 numa node on the guest then you need the \"or\" operator to say that the pci request group must be in the same subtree as one of the  numa nodes but it does not matter which one.\n\nwe cant just assume that we can afine the device request to the first numa node as that will cause  failure to scudel if i have only 1 PF per host numa node and ask for 2.\n\nso its not a corner case its our default policy default policy when we have a numa guest.\n\nthe ! is obvious i think\nwe want to say that both numa node rps shoudl not be the same \n\u0026same_subtree\u003d!_NUMA1,_NUMA2\nso it just the basic anti afinity case.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"5ab16c254438cd2c4c6607b106f91ebd8900cba3","unresolved":false,"context_lines":[{"line_number":344,"context_line":""},{"line_number":345,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":346,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":347,"context_line":"    \u0026required_MEM1\u003dCUSTOM_MEMORY_PAGE_SIZE_2048"},{"line_number":348,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":349,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":350,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_cd22b3a6","line":347,"range":{"start_line":347,"start_character":4,"end_line":347,"end_character":47},"updated":"2020-02-11 13:20:30.000000000","message":"With placement-ese extra spec, the user can request two kind page sizes for an instance, which we don\u0027t allow. Probably we should take care of it.\n\nOr we should disable the placement-ese extra specs...","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":344,"context_line":""},{"line_number":345,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":346,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":347,"context_line":"    \u0026required_MEM1\u003dCUSTOM_MEMORY_PAGE_SIZE_2048"},{"line_number":348,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":349,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":350,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_f09422e9","line":347,"range":{"start_line":347,"start_character":4,"end_line":347,"end_character":47},"in_reply_to":"3fa7e38b_9004ce6d","updated":"2020-02-12 09:09:51.000000000","message":"Yeah, let\u0027s doc it first and not try to code it yet.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"cf99e147fc9799b4474e25963be47c760961b344","unresolved":false,"context_lines":[{"line_number":344,"context_line":""},{"line_number":345,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":346,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":347,"context_line":"    \u0026required_MEM1\u003dCUSTOM_MEMORY_PAGE_SIZE_2048"},{"line_number":348,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":349,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":350,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_13ea28a0","line":347,"range":{"start_line":347,"start_character":4,"end_line":347,"end_character":47},"in_reply_to":"3fa7e38b_9004ce6d","updated":"2020-02-11 15:24:29.000000000","message":"you could set it up in the flavor but the hardware module would ignore it when constucting the instance numa toplogy object. so the virtdriver would not honor the request as it just renders the xml based on the content of the instnace numa toplogy object.\n\nwe should document that its not supported for sure.\ni dont know if we want to explictly block it but we could.\neither via a follow up to stephens extra spec validation or specific in the hardware mouled get_numa_constriats function that is used in the api to validate the hw:* extraspces make sense.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"90e50b59574872e8464bb83dcf1ad0db3afae3ae","unresolved":false,"context_lines":[{"line_number":344,"context_line":""},{"line_number":345,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":346,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":347,"context_line":"    \u0026required_MEM1\u003dCUSTOM_MEMORY_PAGE_SIZE_2048"},{"line_number":348,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":349,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":350,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9004ce6d","line":347,"range":{"start_line":347,"start_character":4,"end_line":347,"end_character":47},"in_reply_to":"3fa7e38b_b0046a14","updated":"2020-02-11 14:39:02.000000000","message":"Unless we explicitly disable the placement-ese syntax here, nothing would prevent you from setting up a flavor to use it. But you can always create a flavor that will never land, or otherwise blow up, if you try hard enough. As noted in the mixed CPU spec discussions, we should document that you should use hw:-ish syntax and not placement-ish syntax; and if the operator tries the latter, they deserve what they get when things blow up.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":344,"context_line":""},{"line_number":345,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":346,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":347,"context_line":"    \u0026required_MEM1\u003dCUSTOM_MEMORY_PAGE_SIZE_2048"},{"line_number":348,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":349,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":350,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_b0046a14","line":347,"range":{"start_line":347,"start_character":4,"end_line":347,"end_character":47},"in_reply_to":"3fa7e38b_cd22b3a6","updated":"2020-02-11 14:14:22.000000000","message":"we are not going to support the pacemetn syntax for mempages.\nwe are only supporting hw:mem_page_size\nif you use the raw palcement syntax your vm will not have hugepages.\n\nwe have no support for the placment syntax today for hugepages and we should not add it. at least not until the non placment version is supported and we have a stong usecase that requires it.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":353,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":354,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":355,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":356,"context_line":""},{"line_number":357,"context_line":""},{"line_number":358,"context_line":"Alternatives"},{"line_number":359,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_decd895b","line":356,"updated":"2020-02-10 18:52:43.000000000","message":"Should add an example where the flavor simply requests \u0027large\u0027 pages. This will result in requiring the MEMORY_PAGE_SIZE_LARGE trait, but not a specific CUSTOM_MEMORY_PAGE_SIZE_*. It demonstrates that the result can include candidates where the memory requirement is satisfied by *either* of the large-page-size grandchild nodes.\n\nThat would also be a good place to note that we would never split such memory across two providers with different large page sizes. That\u0027s by design, and matches what we do today when \u0027large\u0027 pages are requested.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":353,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":354,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":355,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":356,"context_line":""},{"line_number":357,"context_line":""},{"line_number":358,"context_line":"Alternatives"},{"line_number":359,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9337f8ae","line":356,"in_reply_to":"3fa7e38b_0aaf51e1","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":353,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":354,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":355,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":356,"context_line":""},{"line_number":357,"context_line":""},{"line_number":358,"context_line":"Alternatives"},{"line_number":359,"context_line":"------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0aaf51e1","line":356,"in_reply_to":"3fa7e38b_decd895b","updated":"2020-02-11 14:14:22.000000000","message":"+1\n\ni would also prefer if a flavor was added as a table before each of these requests to show what we are translating form \nfor the placemetn request above i think its \n\nflavor.vcpus\u003d2\nflavor.ram\u003d4096\nflavor.disk\u003d0 ? you are not asking for any in the example.\nhw:numa_nodes\u003d2\nhw:mem_page_size\u003d2MB\n\nits eaiser to read if its presented in  table we also should add the disk request to each to make it more reflective of a real query. although both of these are just nits.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":370,"context_line":"be reshaped to be NUMA-aware but then non-NUMA-aware instances could"},{"line_number":371,"context_line":"potentially land on those hosts. That wouldn\u0027t change the fact that for"},{"line_number":372,"context_line":"optimal capacity, operators need to shard their clouds between NUMA workloads"},{"line_number":373,"context_line":"and non-NUMA ones, but from a Placement perspective, all hosts would be equal."},{"line_number":374,"context_line":""},{"line_number":375,"context_line":"Data model impact"},{"line_number":376,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_1e36c150","line":373,"updated":"2020-02-10 18:52:43.000000000","message":"could link to https://review.opendev.org/#/c/658510/ here","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":370,"context_line":"be reshaped to be NUMA-aware but then non-NUMA-aware instances could"},{"line_number":371,"context_line":"potentially land on those hosts. That wouldn\u0027t change the fact that for"},{"line_number":372,"context_line":"optimal capacity, operators need to shard their clouds between NUMA workloads"},{"line_number":373,"context_line":"and non-NUMA ones, but from a Placement perspective, all hosts would be equal."},{"line_number":374,"context_line":""},{"line_number":375,"context_line":"Data model impact"},{"line_number":376,"context_line":"-----------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_f3042cc7","line":373,"in_reply_to":"3fa7e38b_1e36c150","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9928e61b374c22b2375886371f75b5776de358d2","unresolved":false,"context_lines":[{"line_number":398,"context_line":"Performance Impact"},{"line_number":399,"context_line":"------------------"},{"line_number":400,"context_line":""},{"line_number":401,"context_line":"Only when changing the configuration option, a reshape is done."},{"line_number":402,"context_line":""},{"line_number":403,"context_line":"Other deployer impact"},{"line_number":404,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_7ec1f5fc","line":401,"updated":"2020-02-10 18:52:43.000000000","message":"The expense of the reshape will be proportional to the number of instances whose allocations need to be moved; but it\u0027s all done in a single placement call, so unless you\u0027ve got hundreds/thousands of instances on a single host, I can\u0027t imagine this being a noticeable amount of extra time on compute service startup.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":398,"context_line":"Performance Impact"},{"line_number":399,"context_line":"------------------"},{"line_number":400,"context_line":""},{"line_number":401,"context_line":"Only when changing the configuration option, a reshape is done."},{"line_number":402,"context_line":""},{"line_number":403,"context_line":"Other deployer impact"},{"line_number":404,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_503bb6c7","line":401,"in_reply_to":"3fa7e38b_7ec1f5fc","updated":"2020-02-11 14:14:22.000000000","message":"ya numa host tend to have no oversubstiption since cpu pinnign disable it for cpus and hugepages or explcit page size requerst disabled it for memory.\n\nso its unlikely to have more then 128 instnaces per hosts.\nim basing that on a 8G vm and the fact most hosts dont exceed 1Tb of ram and 256-512 is more common. i think 32 VMs or less per host would be more common.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"5ab16c254438cd2c4c6607b106f91ebd8900cba3","unresolved":false,"context_lines":[{"line_number":415,"context_line":"Upgrade impact"},{"line_number":416,"context_line":"--------------"},{"line_number":417,"context_line":""},{"line_number":418,"context_line":"As described above, in order to prevent a flavor update during upgrade, we will"},{"line_number":419,"context_line":"provide a translation mechanism that will take the existing"},{"line_number":420,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":421,"context_line":"query."},{"line_number":422,"context_line":""},{"line_number":423,"context_line":"Since there will be a configuration option for telling that a host would become"},{"line_number":424,"context_line":"NUMA-aware, the corresponding allocations accordingly have to change hence the"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ad6bb7b5","line":421,"range":{"start_line":418,"start_character":0,"end_line":421,"end_character":6},"updated":"2020-02-11 13:20:30.000000000","message":"The probably need something we have done for the \u0027standard cpu resource tracking\u0027 feature. Since in the early stage of upgrade, you will have very few compute nodes to the NUMA-aware RPs. But if the controller already begin to translate the extra spec into placement numbered groups, that means the request may be not successful, since too few hosts. So probably we can query the by numbered groups first, and then fallback the orignal way.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":415,"context_line":"Upgrade impact"},{"line_number":416,"context_line":"--------------"},{"line_number":417,"context_line":""},{"line_number":418,"context_line":"As described above, in order to prevent a flavor update during upgrade, we will"},{"line_number":419,"context_line":"provide a translation mechanism that will take the existing"},{"line_number":420,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":421,"context_line":"query."},{"line_number":422,"context_line":""},{"line_number":423,"context_line":"Since there will be a configuration option for telling that a host would become"},{"line_number":424,"context_line":"NUMA-aware, the corresponding allocations accordingly have to change hence the"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_d07006aa","line":421,"range":{"start_line":418,"start_character":0,"end_line":421,"end_character":6},"in_reply_to":"3fa7e38b_ad6bb7b5","updated":"2020-02-11 14:14:22.000000000","message":"we talked about doing a fallback query for numa instance to fall back to a non numa query and allow the numa toplogy filter and host aggreates to elimiate hosts that could not fit the vm.\n\nthis would be the same as the fallback query we do for PCPUs today.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":415,"context_line":"Upgrade impact"},{"line_number":416,"context_line":"--------------"},{"line_number":417,"context_line":""},{"line_number":418,"context_line":"As described above, in order to prevent a flavor update during upgrade, we will"},{"line_number":419,"context_line":"provide a translation mechanism that will take the existing"},{"line_number":420,"context_line":"flavor extra spec properties and transform them into Placement numbered groups"},{"line_number":421,"context_line":"query."},{"line_number":422,"context_line":""},{"line_number":423,"context_line":"Since there will be a configuration option for telling that a host would become"},{"line_number":424,"context_line":"NUMA-aware, the corresponding allocations accordingly have to change hence the"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_9305b8c2","line":421,"range":{"start_line":418,"start_character":0,"end_line":421,"end_character":6},"in_reply_to":"3fa7e38b_d07006aa","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":8864,"name":"Artom Lifshitz","email":"notartom@gmail.com","username":"artom"},"change_message_id":"45d682709166f989bf72d0685cdb0f8407bca444","unresolved":false,"context_lines":[{"line_number":424,"context_line":"NUMA-aware, the corresponding allocations accordingly have to change hence the"},{"line_number":425,"context_line":"virt drivers be responsible for providing a reshape mechanism that will"},{"line_number":426,"context_line":"eventually call the `Placement API /reshaper endpoint`_ when starting the"},{"line_number":427,"context_line":"compute service."},{"line_number":428,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":429,"context_line":"Upgrade (FFU) strategy where all controlplane is down and should possibly"},{"line_number":430,"context_line":"document any extra step required for FFU."}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_ec73ffe3","line":427,"updated":"2020-02-10 20:50:41.000000000","message":"Weird newline.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":425,"context_line":"virt drivers be responsible for providing a reshape mechanism that will"},{"line_number":426,"context_line":"eventually call the `Placement API /reshaper endpoint`_ when starting the"},{"line_number":427,"context_line":"compute service."},{"line_number":428,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":429,"context_line":"Upgrade (FFU) strategy where all controlplane is down and should possibly"},{"line_number":430,"context_line":"document any extra step required for FFU."},{"line_number":431,"context_line":""},{"line_number":432,"context_line":"Implementation"},{"line_number":433,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_183819c9","line":430,"range":{"start_line":428,"start_character":0,"end_line":430,"end_character":41},"updated":"2020-02-10 17:01:42.000000000","message":"Is this only mean that we keep the reshape code in nova-compute forever? So that if somebody do an FFU from Stein -\u003e W then the W compute can still do the reshape at startup.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":425,"context_line":"virt drivers be responsible for providing a reshape mechanism that will"},{"line_number":426,"context_line":"eventually call the `Placement API /reshaper endpoint`_ when starting the"},{"line_number":427,"context_line":"compute service."},{"line_number":428,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":429,"context_line":"Upgrade (FFU) strategy where all controlplane is down and should possibly"},{"line_number":430,"context_line":"document any extra step required for FFU."},{"line_number":431,"context_line":""},{"line_number":432,"context_line":"Implementation"},{"line_number":433,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_eec9ac79","line":430,"range":{"start_line":428,"start_character":0,"end_line":430,"end_character":41},"in_reply_to":"3fa7e38b_0ce1bbfd","updated":"2020-02-11 09:52:01.000000000","message":"Yeah forever was a bit too much. A well documented plan is acceptable to me.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"31faa37c3b6c0c441854cf92b02098c0f8852140","unresolved":false,"context_lines":[{"line_number":425,"context_line":"virt drivers be responsible for providing a reshape mechanism that will"},{"line_number":426,"context_line":"eventually call the `Placement API /reshaper endpoint`_ when starting the"},{"line_number":427,"context_line":"compute service."},{"line_number":428,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":429,"context_line":"Upgrade (FFU) strategy where all controlplane is down and should possibly"},{"line_number":430,"context_line":"document any extra step required for FFU."},{"line_number":431,"context_line":""},{"line_number":432,"context_line":"Implementation"},{"line_number":433,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0ce1bbfd","line":430,"range":{"start_line":428,"start_character":0,"end_line":430,"end_character":41},"in_reply_to":"3fa7e38b_183819c9","updated":"2020-02-10 20:19:04.000000000","message":"im not sure if we have to keep it forever but we shoudl keep it for a couple of release e.g. 2-3 at a minium\n\nif we go through the cycle of intoducign a config optio, updateing its defaul, deprecatinging it and removing it i think that is more then enough time to do a FFU. that said if its small it might not hurt to keep it.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":425,"context_line":"virt drivers be responsible for providing a reshape mechanism that will"},{"line_number":426,"context_line":"eventually call the `Placement API /reshaper endpoint`_ when starting the"},{"line_number":427,"context_line":"compute service."},{"line_number":428,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":429,"context_line":"Upgrade (FFU) strategy where all controlplane is down and should possibly"},{"line_number":430,"context_line":"document any extra step required for FFU."},{"line_number":431,"context_line":""},{"line_number":432,"context_line":"Implementation"},{"line_number":433,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_33f644d7","line":430,"range":{"start_line":428,"start_character":0,"end_line":430,"end_character":41},"in_reply_to":"3fa7e38b_b079ca81","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":425,"context_line":"virt drivers be responsible for providing a reshape mechanism that will"},{"line_number":426,"context_line":"eventually call the `Placement API /reshaper endpoint`_ when starting the"},{"line_number":427,"context_line":"compute service."},{"line_number":428,"context_line":"This reshape implementation will absolutely need to consider the Fast Forward"},{"line_number":429,"context_line":"Upgrade (FFU) strategy where all controlplane is down and should possibly"},{"line_number":430,"context_line":"document any extra step required for FFU."},{"line_number":431,"context_line":""},{"line_number":432,"context_line":"Implementation"},{"line_number":433,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_b079ca81","line":430,"range":{"start_line":428,"start_character":0,"end_line":430,"end_character":41},"in_reply_to":"3fa7e38b_eec9ac79","updated":"2020-02-11 14:14:22.000000000","message":"as long as we check with operators, vendors and the installation tools before we drop support i think we can manage this. \n\nredhat osp 16 has just been released based on train. \n\ni know that i will be advocating for us to move to numa aware reporting by default downstream in osp 17 which will be based on Victora and by osp 18 i hope that downstream that is the only configratuion we will support in the product but we will have to discuss that with customer and product mangaers.\n\nso i hop that in 3-4 release from now we will not need to support this reshape.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"61faf6af7f6b10a2cc1c76633b97b7532fa2ea17","unresolved":false,"context_lines":[{"line_number":436,"context_line":"-----------"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"* bauzas"},{"line_number":439,"context_line":"* someone else for Hyper-V"},{"line_number":440,"context_line":""},{"line_number":441,"context_line":"Feature Liaison"},{"line_number":442,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_8ac26187","line":439,"range":{"start_line":439,"start_character":1,"end_line":439,"end_character":26},"updated":"2020-02-11 14:14:22.000000000","message":"we could just drop this.\nhyperv should be able to reuse this design but since your going to work on this for libvirt primarliy i think thats all that matters.\n\nfor what its worth ill also work on this with you if needed although i dont think i need to be list specificlly.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":436,"context_line":"-----------"},{"line_number":437,"context_line":""},{"line_number":438,"context_line":"* bauzas"},{"line_number":439,"context_line":"* someone else for Hyper-V"},{"line_number":440,"context_line":""},{"line_number":441,"context_line":"Feature Liaison"},{"line_number":442,"context_line":"---------------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_b3231457","line":439,"range":{"start_line":439,"start_character":1,"end_line":439,"end_character":26},"in_reply_to":"3fa7e38b_8ac26187","updated":"2020-02-12 09:09:51.000000000","message":"Done","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":440,"context_line":""},{"line_number":441,"context_line":"Feature Liaison"},{"line_number":442,"context_line":"---------------"},{"line_number":443,"context_line":"None"},{"line_number":444,"context_line":""},{"line_number":445,"context_line":"Work Items"},{"line_number":446,"context_line":"----------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_78702da8","line":443,"updated":"2020-02-10 17:01:42.000000000","message":"you can put yourselve here","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"13db8e9f9be922a9f12f9da20b711a6671baccc4","unresolved":false,"context_lines":[{"line_number":440,"context_line":""},{"line_number":441,"context_line":"Feature Liaison"},{"line_number":442,"context_line":"---------------"},{"line_number":443,"context_line":"None"},{"line_number":444,"context_line":""},{"line_number":445,"context_line":"Work Items"},{"line_number":446,"context_line":"----------"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_73191c21","line":443,"in_reply_to":"3fa7e38b_78702da8","updated":"2020-02-12 09:09:51.000000000","message":"\u003e you can put yourselve here\n\nWell, it\u0027s acceptable to say None as we agreed when discussing the Feature Liaison thingy :-)","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"8bfc45cfb55cf0ea75d28a50250cadf2bb577495","unresolved":false,"context_lines":[{"line_number":459,"context_line":"Testing"},{"line_number":460,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":461,"context_line":""},{"line_number":462,"context_line":"Functional tests and unittests."},{"line_number":463,"context_line":""},{"line_number":464,"context_line":"Documentation Impact"},{"line_number":465,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_787eed6d","line":462,"range":{"start_line":462,"start_character":0,"end_line":462,"end_character":10},"updated":"2020-02-10 17:01:42.000000000","message":"as topology is passed from the virt driver these functional tests needs either libvirt-like fake virt driver with numa typology or even the libvirt driver itself.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"304b118005077e32db65100a55966fbeb90685c8","unresolved":false,"context_lines":[{"line_number":459,"context_line":"Testing"},{"line_number":460,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":461,"context_line":""},{"line_number":462,"context_line":"Functional tests and unittests."},{"line_number":463,"context_line":""},{"line_number":464,"context_line":"Documentation Impact"},{"line_number":465,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_cebcf0d6","line":462,"range":{"start_line":462,"start_character":0,"end_line":462,"end_character":10},"in_reply_to":"3fa7e38b_0c6e9b96","updated":"2020-02-11 09:52:01.000000000","message":"I\u0027m glad that we have such and func env.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"31faa37c3b6c0c441854cf92b02098c0f8852140","unresolved":false,"context_lines":[{"line_number":459,"context_line":"Testing"},{"line_number":460,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":461,"context_line":""},{"line_number":462,"context_line":"Functional tests and unittests."},{"line_number":463,"context_line":""},{"line_number":464,"context_line":"Documentation Impact"},{"line_number":465,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":16,"id":"3fa7e38b_0c6e9b96","line":462,"range":{"start_line":462,"start_character":0,"end_line":462,"end_character":10},"in_reply_to":"3fa7e38b_787eed6d","updated":"2020-02-10 20:19:04.000000000","message":"yes well we have numa tests already that use the fakelibvirt fixture with the real libvirt dirver so i read this as we will continue to extend those.","commit_id":"388723de0407042e5b5cfade001131d0c1df1c76"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":41,"context_line":"be answered by the Placement service as potential allocation candidates that"},{"line_number":42,"context_line":"the filter would *only* be responsible for choosing between them in some"},{"line_number":43,"context_line":"very specific cases (eg. PCI device NUMA affinity, CPU pinning and NUMA"},{"line_number":44,"context_line":"anti-affinity)."},{"line_number":45,"context_line":""},{"line_number":46,"context_line":"Accordingly, we could model the host memory and the CPU topologies as a set of"},{"line_number":47,"context_line":"resource providers arranged in a tree, and just directly allocate resources for"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_fe6dd367","line":44,"updated":"2020-02-11 16:35:12.000000000","message":"++","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":110,"context_line":"the corresponding resource classes as part of the child NUMA Resource"},{"line_number":111,"context_line":"Providers. In order to facilitate querying NUMA resources, we propose to"},{"line_number":112,"context_line":"decorate the NUMA child resource providers with a specific trait named"},{"line_number":113,"context_line":"``HW_NUMA_ROOT`` that would be on each NUMA *node*."},{"line_number":114,"context_line":""},{"line_number":115,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":116,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_be251b18","line":113,"updated":"2020-02-11 16:35:12.000000000","message":"It would still be nice to explain the motivation behind adding this trait:\n- It allows us to differentiate NUMA-modeled hosts from non-NUMA-modeled hosts; and\n- It gives us an anchor to express affinity among resources in the same subtree without having to rely on knowing specifically which resources are where. (Example: as currently modeled we could just express affinity by putting the proc and mem groups into same_subtree -- but some day we might put procs into a grandchild RP as well, and then that would stop working.)","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":110,"context_line":"the corresponding resource classes as part of the child NUMA Resource"},{"line_number":111,"context_line":"Providers. In order to facilitate querying NUMA resources, we propose to"},{"line_number":112,"context_line":"decorate the NUMA child resource providers with a specific trait named"},{"line_number":113,"context_line":"``HW_NUMA_ROOT`` that would be on each NUMA *node*."},{"line_number":114,"context_line":""},{"line_number":115,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":116,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_e5c3f48a","line":113,"in_reply_to":"3fa7e38b_be251b18","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7348b2faae818ee5433fa59d6230cdfd59f1bcce","unresolved":false,"context_lines":[{"line_number":114,"context_line":""},{"line_number":115,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":116,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"},{"line_number":117,"context_line":"point that the smallest unit of splittable unit you can query with Nova is"},{"line_number":118,"context_line":"really a page size. Accordingly, we should rather model our NUMA subtree"},{"line_number":119,"context_line":"with children Resource Providers that represent the smallest unit of memory"},{"line_number":120,"context_line":"you can allocate, ie. a page size. Since a pagesize is not a *consumable*"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_3efe0b39","line":117,"range":{"start_line":117,"start_character":56,"end_line":117,"end_character":61},"updated":"2020-02-11 16:55:32.000000000","message":"nit: assign","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7348b2faae818ee5433fa59d6230cdfd59f1bcce","unresolved":false,"context_lines":[{"line_number":114,"context_line":""},{"line_number":115,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":116,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"},{"line_number":117,"context_line":"point that the smallest unit of splittable unit you can query with Nova is"},{"line_number":118,"context_line":"really a page size. Accordingly, we should rather model our NUMA subtree"},{"line_number":119,"context_line":"with children Resource Providers that represent the smallest unit of memory"},{"line_number":120,"context_line":"you can allocate, ie. a page size. Since a pagesize is not a *consumable*"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_7ee00394","line":117,"range":{"start_line":117,"start_character":10,"end_line":117,"end_character":47},"updated":"2020-02-11 16:55:32.000000000","message":"nit: the smallest allocatable unit","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":114,"context_line":""},{"line_number":115,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":116,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"},{"line_number":117,"context_line":"point that the smallest unit of splittable unit you can query with Nova is"},{"line_number":118,"context_line":"really a page size. Accordingly, we should rather model our NUMA subtree"},{"line_number":119,"context_line":"with children Resource Providers that represent the smallest unit of memory"},{"line_number":120,"context_line":"you can allocate, ie. a page size. Since a pagesize is not a *consumable*"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_3e9dab57","line":117,"range":{"start_line":117,"start_character":23,"end_line":117,"end_character":32},"updated":"2020-02-11 16:35:12.000000000","message":"x","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":114,"context_line":""},{"line_number":115,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":116,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"},{"line_number":117,"context_line":"point that the smallest unit of splittable unit you can query with Nova is"},{"line_number":118,"context_line":"really a page size. Accordingly, we should rather model our NUMA subtree"},{"line_number":119,"context_line":"with children Resource Providers that represent the smallest unit of memory"},{"line_number":120,"context_line":"you can allocate, ie. a page size. Since a pagesize is not a *consumable*"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_a5c17c83","line":117,"range":{"start_line":117,"start_character":10,"end_line":117,"end_character":47},"in_reply_to":"3fa7e38b_7ee00394","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":125,"context_line":"  know whether the memory page size is default or optionally configured."},{"line_number":126,"context_line":""},{"line_number":127,"context_line":"- ``CUSTOM_MEMORY_PAGE_SIZE_\u003cX\u003e`` where \u003cX\u003e is an integer would allow us to"},{"line_number":128,"context_line":"  know the size of the page."},{"line_number":129,"context_line":""},{"line_number":130,"context_line":""},{"line_number":131,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_fea2b397","line":128,"range":{"start_line":128,"start_character":27,"end_line":128,"end_character":28},"updated":"2020-02-11 16:35:12.000000000","message":"in KB.\n\nAgain, it\u0027s important that the trait name be consistent so we can 1) generate it from update_provider_tree, and 2) query placement on it from the scheduler.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":125,"context_line":"  know whether the memory page size is default or optionally configured."},{"line_number":126,"context_line":""},{"line_number":127,"context_line":"- ``CUSTOM_MEMORY_PAGE_SIZE_\u003cX\u003e`` where \u003cX\u003e is an integer would allow us to"},{"line_number":128,"context_line":"  know the size of the page."},{"line_number":129,"context_line":""},{"line_number":130,"context_line":""},{"line_number":131,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_c5bcb808","line":128,"range":{"start_line":128,"start_character":27,"end_line":128,"end_character":28},"in_reply_to":"3fa7e38b_fea2b397","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":227,"context_line":".. code::"},{"line_number":228,"context_line":""},{"line_number":229,"context_line":"  [workarounds]"},{"line_number":230,"context_line":"  disable_placement_numa_reporting \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":231,"context_line":""},{"line_number":232,"context_line":""},{"line_number":233,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_be0e7b8c","line":230,"updated":"2020-02-11 16:35:12.000000000","message":"++","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0d8d9b0a14fc48e85c28eb634664abb695bb0f40","unresolved":false,"context_lines":[{"line_number":313,"context_line":"   as we will also modify the ``NUMATopologyFilter`` to only accept"},{"line_number":314,"context_line":"   allocation candidates for a host that are in different NUMA nodes."},{"line_number":315,"context_line":"   It will probably be implemented in the ``nova.virt.hardware`` module but"},{"line_number":316,"context_line":"   that\u0027s an implementation detail."},{"line_number":317,"context_line":""},{"line_number":318,"context_line":"* for a flavor of 8 VCPUs, 8GB of RAM and ``hw:numa_nodes\u003d1``::"},{"line_number":319,"context_line":""}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_fef873c2","line":316,"updated":"2020-02-11 16:06:07.000000000","message":"thanks!","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":389,"context_line":""},{"line_number":390,"context_line":"If you only want large page size support without really specifying which size"},{"line_number":391,"context_line":"(eg. by specifying ``hw:mem_page_size\u003dlarge`` instead of, say, ``2MB``), then"},{"line_number":392,"context_line":"the above same request for large pages would translate into:::"},{"line_number":393,"context_line":""},{"line_number":394,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":395,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_fe6cf334","line":392,"range":{"start_line":392,"start_character":59,"end_line":392,"end_character":60},"updated":"2020-02-11 16:35:12.000000000","message":"extra :","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":389,"context_line":""},{"line_number":390,"context_line":"If you only want large page size support without really specifying which size"},{"line_number":391,"context_line":"(eg. by specifying ``hw:mem_page_size\u003dlarge`` instead of, say, ``2MB``), then"},{"line_number":392,"context_line":"the above same request for large pages would translate into:::"},{"line_number":393,"context_line":""},{"line_number":394,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":395,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_65b70424","line":392,"range":{"start_line":392,"start_character":59,"end_line":392,"end_character":60},"in_reply_to":"3fa7e38b_fe6cf334","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":401,"context_line":"    \u0026required_MEM2\u003dMEMORY_PAGE_SIZE_LARGE"},{"line_number":402,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":403,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":404,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":405,"context_line":""},{"line_number":406,"context_line":"Asking the same with ``hw:mem_page_size\u003dsmall`` would translate into::"},{"line_number":407,"context_line":""}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_9e57ff59","line":404,"updated":"2020-02-11 16:35:12.000000000","message":"Again, it would be nice to point out that, given the example model above, this could result in candidates from *either* of the large-page memory RPs.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":401,"context_line":"    \u0026required_MEM2\u003dMEMORY_PAGE_SIZE_LARGE"},{"line_number":402,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":403,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":404,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":405,"context_line":""},{"line_number":406,"context_line":"Asking the same with ``hw:mem_page_size\u003dsmall`` would translate into::"},{"line_number":407,"context_line":""}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_85b24030","line":404,"in_reply_to":"3fa7e38b_9e57ff59","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7348b2faae818ee5433fa59d6230cdfd59f1bcce","unresolved":false,"context_lines":[{"line_number":417,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":418,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":419,"context_line":""},{"line_number":420,"context_line":"And eventually, asking with ``hw:mem_page_size\u003dany`` would mean::"},{"line_number":421,"context_line":""},{"line_number":422,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":423,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":424,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":425,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":426,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"},{"line_number":427,"context_line":"    \u0026resources_MEM2\u003dMEMORY_MB:2048"},{"line_number":428,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":429,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":430,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":431,"context_line":""},{"line_number":432,"context_line":""},{"line_number":433,"context_line":"Alternatives"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_79749d8a","line":430,"range":{"start_line":420,"start_character":1,"end_line":430,"end_character":22},"updated":"2020-02-11 16:55:32.000000000","message":"this might change the meaning of any. it was ment to mean use any page size that fits but actully ended up being let the image decided and if not set in the image i think we use small pages.\n\nits the lest well tested/used option so i dont actuly know of the top of my head if we ever impleented the logic to use any pagesize includeing hugepages like we had planned.\n\nif we have then yes. if not then we could treat it as a error or treat it like small. this should be resolved before getting to the placemnt code when constructing the instnace numa toplogy object in the request spec so this is just an implemenation detail.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":417,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":418,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":419,"context_line":""},{"line_number":420,"context_line":"And eventually, asking with ``hw:mem_page_size\u003dany`` would mean::"},{"line_number":421,"context_line":""},{"line_number":422,"context_line":"    resources_PROC1\u003dVCPU:1"},{"line_number":423,"context_line":"    \u0026resources_MEM1\u003dMEMORY_MB:2048"},{"line_number":424,"context_line":"    \u0026required_NUMA1\u003dHW_NUMA_ROOT"},{"line_number":425,"context_line":"    \u0026same_subtree\u003d_PROC1,_MEM1,_NUMA1"},{"line_number":426,"context_line":"    \u0026resources_PROC2\u003dVCPU:1"},{"line_number":427,"context_line":"    \u0026resources_MEM2\u003dMEMORY_MB:2048"},{"line_number":428,"context_line":"    \u0026required_NUMA2\u003dHW_NUMA_ROOT"},{"line_number":429,"context_line":"    \u0026same_subtree\u003d_PROC2,_MEM2,_NUMA2"},{"line_number":430,"context_line":"    \u0026group_policy\u003dnone"},{"line_number":431,"context_line":""},{"line_number":432,"context_line":""},{"line_number":433,"context_line":"Alternatives"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_e56774eb","line":430,"range":{"start_line":420,"start_character":1,"end_line":430,"end_character":22},"in_reply_to":"3fa7e38b_79749d8a","updated":"2020-02-12 11:43:57.000000000","message":"\u003e this might change the meaning of any. it was ment to mean use any\n \u003e page size that fits but actully ended up being let the image\n \u003e decided and if not set in the image i think we use small pages.\n \u003e \n \u003e its the lest well tested/used option so i dont actuly know of the\n \u003e top of my head if we ever impleented the logic to use any pagesize\n \u003e includeing hugepages like we had planned.\n \u003e \n \u003e if we have then yes. if not then we could treat it as a error or\n \u003e treat it like small. this should be resolved before getting to the\n \u003e placemnt code when constructing the instnace numa toplogy object in\n \u003e the request spec so this is just an implemenation detail.\n\nLet\u0027s make sure we can discuss this during the implementation.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"0d8d9b0a14fc48e85c28eb634664abb695bb0f40","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_becd1b93","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"updated":"2020-02-11 16:06:07.000000000","message":"Yeah this was missing for me. This fallback mechanism solves my issue about suddenly loosing all my NUMA-aware capacity after the upgrade.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_45d1a855","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_006ac516","updated":"2020-02-12 11:43:57.000000000","message":"I still need to fully understand the alternative proposal. Will provide a new revision now without changing this, and please provide me a better summary about what you\u0027d like in the next revision.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"2ed5bdf1ffc9a91d1c96594d91f07678ae18a5a6","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_b6cc5062","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_006ba553","updated":"2020-02-12 14:54:19.000000000","message":"\u003e if we dont early out and actully do multiple numa requests and\n \u003e merge them we coudl spread but im not sure that is what we want to\n \u003e do from a performance point of view.\n\nYes, the performance should be a concern, if we do that.\n\n \u003e \n \u003e if we did not early out and did up to [scheduler]/max_implcit_numa_nodes\n \u003e config always we could leave it to the werigher to do there normal\n \u003e spreading vs packing behavior.\n \u003e \n \u003e without the early out however i would then do set\n \u003e [scheduler]/max_implcit_numa_nodes\u003d2  rather than\n \u003e [scheduler]/max_implcit_numa_nodes\u003d4 which i think would an ok\n \u003e default if we had the early out.\n \u003e \n \u003e that said i dont really agree that prefing smaller numa node counts\n \u003e violated packing vs spreading, at least in the small vm\n \u003e approximation. for large vms where only 1 or can fit per host numa\n \u003e node it will tend to spread yes but that also depends on the size\n \u003e of a numa noded.\n \u003e \n \u003e on moderen zen 2 amd eypc chips with 1 numa node per cache region\n \u003e enabled in the bios tech numa node has 3-4 phyical core or 6-8\n \u003e thread and about 64G of ram. so on those hosts if you ask for 16\n \u003e cores its going to always  be spread over 2 numa nodes.if your on\n \u003e an intel system where the numa nodes are larger then you could fit\n \u003e it on 1 numa node.\n \u003e \n\ncan we assume most of non-NUMA instance is small? If we can, then we needn\u0027t worry about this case.\n\n \u003e if we were really concerend we could make it configurable. e.g.\n \u003e start with most numa node and get smaller, start with smllest and\n \u003e get larger or generate all in the range and merge. those are\n \u003e basicaly the 3 options.\n \u003e \n \u003e option 3 i think is what you want as it lets the weigher decided at\n \u003e the cost of scheduler performance.\n \u003e \n \u003e live migration betwen a numa host and a train compute node should\n \u003e be possible provide the train compute node can support the numa\n \u003e topology of the guest. similarly if you have disable numa reporting\n \u003e on a Ussuri host the fall back query would allow that host to be\n \u003e selected and if the numa toplogy filter said it was fine it would\n \u003e be a candiate.\n\nOh, right. The fallback + NUMA filter can help that. So we are going to check the instance has numa topology or not even if the flavor is NUMA-agnostic. If the instance has numa, then query with NUMA-aware placement request first, and then fallback if need.\n\n \u003e \n \u003e the only added constrait would be with an implcit numa toplogy for\n \u003e all instances we would only be able to live migrate if that implcit\n \u003e topology was supported. that come directly for the idea that we\n \u003e cannot alter the hardware topology in the guest will it is running.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"26547ca3377761ed9e852e1e74ae1edcba2ad389","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_006ac516","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_006ba553","updated":"2020-02-12 03:14:22.000000000","message":"oh by the way [scheduler]/max_implcit_numa_nodes will allow you to disabel implicit numa if you dont want it by setting\n[scheduler]/max_implcit_numa_nodes config\u003d0","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"067f87f0d7c599d3d776084f5eed0bedb397ba02","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_e49a24c2","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_3964e5e5","updated":"2020-02-11 17:34:55.000000000","message":"Thinking further (after consulting upgrade guy in the downstream project). Assume we have a Train deployment with NUMA-aware computes (we use cpu pinning and huge pages for almost every workload). During upgrade to Ussuri there will be a situation where we have the Ussuri control plane handling Train compute nodes. In this situation, without the fallback mechanism in the scheduler, we cannot create new servers for cannot migrate / evacuate server. That is bad I don\u0027t really want to be in this situation. However I think there is a possible compromise. 1) implement the fallback as proposed. 2) Make the Ussuri compute do the re-shape to NUMA-aware placement tree if so configured. \n\nThen for V make the compute startup to re-shape to NUMA aware tree _by default_. This way we can \"force\" the deployers _gradually_ to go to the new model.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"06e6ce261b22aee391d085a91c5406fb5ccd2d67","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_803c7591","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_3fc6054c","updated":"2020-02-12 02:27:36.000000000","message":"\u003e - To allow enabling the new modeling by default, we want to\n \u003e *translate NUMA-agnostic flavors*, allowing them to land on\n \u003e NUMA-modeled hosts. We do this by performing multiple separate\n \u003e queries in the \"aimed at reshaped computes\" category. Essentially,\n \u003e for $n \u003d 1 to $max, we pretend the flavor actually said\n \u003e hw:numa_nodes\u003d$n (but allow asymmetric splitting). $max is a\n \u003e configurable maximum number of implicit NUMA nodes to try.\n\nOne concern is that if the instance has implicit NUMA nodes, then it can\u0027t be live-migration back to any host without NUMA modeling. If the operator separates some of hosts as NUMA modeling, some of hosts as not. Then I guess the operator doesn\u0027t want the implict NUMA instance, maybe we can provide a config option to disable the implict NUMA instance.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"f827dbf3303a961dd0d95f014259031e4628e8db","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_471c577a","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_3fc6054c","updated":"2020-02-11 23:13:41.000000000","message":"ok to help people thinks about this i have done a quick poc of how we would do the progress numa toplogy implemenation.\nignoring the query generation which should be done from the numa toplogy object this basically implements the progressive generation of the numa_toplogy object\nill clean it up tomorow to make it a generator but right now it will print the diffent possibel options for an instance wtih 4G of ram and 10 cpus with a max numa count of 16.\n\n------------------------NOTES----------------------------\nmax_numa_nodes would be the value form the \n[scheduler]/max_implcit_numa_nodes config option\nthis code generates shoudl be correct but will not take into account some minor optimisations that could be added later\n\nit it takes advantage of the ability to specify asymmetric numa toplogies to minimise the imbalance of both cpus and ram and should result in at most 1 cpu and 1 mb of ram delta between any two numa nodes.\n\ni have added one optimisation that bails out early when the number of numa nodes reach the number of cpus as we need 1 cpu per numa node.\n\none of the optimisations that i have not added is that where the cpu and ram is evenly divisable by the number of numa nodes i can skip the generation of the hw:numa_cpus and hw:numa_mem paramaters.\n\ni am copying the flavor becasue we would temporaily modify it to create the numa toplogy object but the only thing we would persisti in the instance/request spec would be the toplogy objects.\n\n------------------CODE----------------------\n\nfrom nova.objects import instance_numa,  numa, flavor, image_meta, instance\nfrom nova.virt import hardware\n\nimport copy\n\nbase_instance \u003d instance.Instance()\n\nbase_image \u003d image_meta.ImageMeta()\nbase_image.properties \u003d image_meta.ImageMetaProps.from_dict({})\n\nbase_flavor \u003d flavor.Flavor()\nbase_flavor.id \u003d 42\nbase_flavor.vcpus \u003d 10\nbase_flavor.memory_mb \u003d 4096\nbase_flavor.root_gb\u003d100\n\nmax_numa_nodes \u003d 16\nnuma_limit \u003d min(base_flavor.vcpus, max_numa_nodes)\n\nfor nodes in range(1, numa_limit+1):\n    server \u003d copy.deepcopy(base_instance)\n    server.flavor \u003d copy.deepcopy(base_flavor)\n    image \u003d copy.deepcopy(base_image)\n    extra_specs \u003d {\n        \"hw:numa_nodes\": nodes,\n        \"hw:mem_page_size\": \"small\" # non numa instance should use 4k small pages.\n    }\n    cpus_total \u003d server.flavor.vcpus\n    cpus_per_node \u003d cpus_total // nodes\n    cpu_imblance \u003d cpus_total % nodes\n    current_cpu \u003d 0\n    ram_total \u003d server.flavor.memory_mb\n    ram_per_node \u003d ram_total // nodes\n    ram_imblance \u003d ram_total % nodes\n    current_ram \u003d 0\n    if nodes \u003e 1:\n        for node in range(nodes):\n            cpus_to_add \u003d min(cpus_total - current_cpu, cpus_per_node)\n            ram_to_add \u003d min(ram_total - current_ram, ram_per_node)\n            if  cpu_imblance!\u003d0:\n                cpus_to_add+\u003d1\n                cpu_imblance-\u003d1\n            if ram_imblance!\u003d0:\n                ram_to_add+\u003d1\n                ram_imblance-\u003d1\n            cpu_list \u003d \u0027,\u0027.join(str(x) for x in range(current_cpu, current_cpu+cpus_to_add))\n            extra_specs[\u0027hw:numa_cpus.%s\u0027 % node] \u003d cpu_list\n            extra_specs[\u0027hw:numa_mem.%s\u0027 % node] \u003d ram_to_add\n            current_cpu +\u003d cpus_to_add\n    print(\"------------------------\")\n    print(\"------ numa node count %s ------ \" % nodes)\n    print(\"------------------------\")\n    print(\"flavor:\", server.flavor)\n    print(\"------------------------\")\n    server.flavor.extra_specs \u003d extra_specs\n    print(\"flavor extra specs:\", extra_specs)\n    # this is what we need to generate progressively.\n    # if we caulate the placment query from the numa toplogy object.\n    numa_topology \u003d hardware.numa_get_constraints(server.flavor, image)\n    # note that we would just make this a generator function and yeild\n    # numa_topology here.\n    print(\"------------------------\")\n    print(\"numa topology: \",numa_topology)\n    print(\"------------------------\")\n    for cell in numa_topology.cells:\n        print(\"\\t numa cell %s\" % cell.id, cell)\n    print()\n\n\n----------------output-------------\n\n(venv) sean@pop-os:~/repos/openstack/nova-2$ python3 nova/numa.py \n------------------------\n------ numa node count 1 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 1, \u0027hw:mem_page_size\u0027: \u0027small\u0027}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1,2,3,4,5,6,7,8,9]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d4096,pagesize\u003d-1)\n\n------------------------\n------ numa node count 2 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 2, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1,2,3,4\u0027, \u0027hw:numa_mem.0\u0027: 2048, \u0027hw:numa_cpus.1\u0027: \u00275,6,7,8,9\u0027, \u0027hw:numa_mem.1\u0027: 2048}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1,2,3,4]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d2048,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([5,6,7,8,9]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d2048,pagesize\u003d-1)\n\n------------------------\n------ numa node count 3 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 3, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1,2,3\u0027, \u0027hw:numa_mem.0\u0027: 1366, \u0027hw:numa_cpus.1\u0027: \u00274,5,6\u0027, \u0027hw:numa_mem.1\u0027: 1365, \u0027hw:numa_cpus.2\u0027: \u00277,8,9\u0027, \u0027hw:numa_mem.2\u0027: 1365}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1,2,3]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d1366,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4,5,6]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d1365,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8,9,7]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d1365,pagesize\u003d-1)\n\n------------------------\n------ numa node count 4 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 4, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1,2\u0027, \u0027hw:numa_mem.0\u0027: 1024, \u0027hw:numa_cpus.1\u0027: \u00273,4,5\u0027, \u0027hw:numa_mem.1\u0027: 1024, \u0027hw:numa_cpus.2\u0027: \u00276,7\u0027, \u0027hw:numa_mem.2\u0027: 1024, \u0027hw:numa_cpus.3\u0027: \u00278,9\u0027, \u0027hw:numa_mem.3\u0027: 1024}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1,2]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d1024,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([3,4,5]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d1024,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6,7]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d1024,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8,9]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d1024,pagesize\u003d-1)\n\n------------------------\n------ numa node count 5 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 5, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1\u0027, \u0027hw:numa_mem.0\u0027: 820, \u0027hw:numa_cpus.1\u0027: \u00272,3\u0027, \u0027hw:numa_mem.1\u0027: 819, \u0027hw:numa_cpus.2\u0027: \u00274,5\u0027, \u0027hw:numa_mem.2\u0027: 819, \u0027hw:numa_cpus.3\u0027: \u00276,7\u0027, \u0027hw:numa_mem.3\u0027: 819, \u0027hw:numa_cpus.4\u0027: \u00278,9\u0027, \u0027hw:numa_mem.4\u0027: 819}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3),InstanceNUMACell(4)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d820,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([2,3]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d819,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4,5]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d819,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6,7]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d819,pagesize\u003d-1)\n\t numa cell 4 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8,9]),cpuset_reserved\u003dNone,id\u003d4,memory\u003d819,pagesize\u003d-1)\n\n------------------------\n------ numa node count 6 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 6, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1\u0027, \u0027hw:numa_mem.0\u0027: 683, \u0027hw:numa_cpus.1\u0027: \u00272,3\u0027, \u0027hw:numa_mem.1\u0027: 683, \u0027hw:numa_cpus.2\u0027: \u00274,5\u0027, \u0027hw:numa_mem.2\u0027: 683, \u0027hw:numa_cpus.3\u0027: \u00276,7\u0027, \u0027hw:numa_mem.3\u0027: 683, \u0027hw:numa_cpus.4\u0027: \u00278\u0027, \u0027hw:numa_mem.4\u0027: 682, \u0027hw:numa_cpus.5\u0027: \u00279\u0027, \u0027hw:numa_mem.5\u0027: 682}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3),InstanceNUMACell(4),InstanceNUMACell(5)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d683,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([2,3]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d683,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4,5]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d683,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6,7]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d683,pagesize\u003d-1)\n\t numa cell 4 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8]),cpuset_reserved\u003dNone,id\u003d4,memory\u003d682,pagesize\u003d-1)\n\t numa cell 5 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([9]),cpuset_reserved\u003dNone,id\u003d5,memory\u003d682,pagesize\u003d-1)\n\n------------------------\n------ numa node count 7 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 7, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1\u0027, \u0027hw:numa_mem.0\u0027: 586, \u0027hw:numa_cpus.1\u0027: \u00272,3\u0027, \u0027hw:numa_mem.1\u0027: 585, \u0027hw:numa_cpus.2\u0027: \u00274,5\u0027, \u0027hw:numa_mem.2\u0027: 585, \u0027hw:numa_cpus.3\u0027: \u00276\u0027, \u0027hw:numa_mem.3\u0027: 585, \u0027hw:numa_cpus.4\u0027: \u00277\u0027, \u0027hw:numa_mem.4\u0027: 585, \u0027hw:numa_cpus.5\u0027: \u00278\u0027, \u0027hw:numa_mem.5\u0027: 585, \u0027hw:numa_cpus.6\u0027: \u00279\u0027, \u0027hw:numa_mem.6\u0027: 585}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3),InstanceNUMACell(4),InstanceNUMACell(5),InstanceNUMACell(6)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d586,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([2,3]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d585,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4,5]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d585,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d585,pagesize\u003d-1)\n\t numa cell 4 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([7]),cpuset_reserved\u003dNone,id\u003d4,memory\u003d585,pagesize\u003d-1)\n\t numa cell 5 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8]),cpuset_reserved\u003dNone,id\u003d5,memory\u003d585,pagesize\u003d-1)\n\t numa cell 6 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([9]),cpuset_reserved\u003dNone,id\u003d6,memory\u003d585,pagesize\u003d-1)\n\n------------------------\n------ numa node count 8 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 8, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1\u0027, \u0027hw:numa_mem.0\u0027: 512, \u0027hw:numa_cpus.1\u0027: \u00272,3\u0027, \u0027hw:numa_mem.1\u0027: 512, \u0027hw:numa_cpus.2\u0027: \u00274\u0027, \u0027hw:numa_mem.2\u0027: 512, \u0027hw:numa_cpus.3\u0027: \u00275\u0027, \u0027hw:numa_mem.3\u0027: 512, \u0027hw:numa_cpus.4\u0027: \u00276\u0027, \u0027hw:numa_mem.4\u0027: 512, \u0027hw:numa_cpus.5\u0027: \u00277\u0027, \u0027hw:numa_mem.5\u0027: 512, \u0027hw:numa_cpus.6\u0027: \u00278\u0027, \u0027hw:numa_mem.6\u0027: 512, \u0027hw:numa_cpus.7\u0027: \u00279\u0027, \u0027hw:numa_mem.7\u0027: 512}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3),InstanceNUMACell(4),InstanceNUMACell(5),InstanceNUMACell(6),InstanceNUMACell(7)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d512,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([2,3]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d512,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d512,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([5]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d512,pagesize\u003d-1)\n\t numa cell 4 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6]),cpuset_reserved\u003dNone,id\u003d4,memory\u003d512,pagesize\u003d-1)\n\t numa cell 5 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([7]),cpuset_reserved\u003dNone,id\u003d5,memory\u003d512,pagesize\u003d-1)\n\t numa cell 6 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8]),cpuset_reserved\u003dNone,id\u003d6,memory\u003d512,pagesize\u003d-1)\n\t numa cell 7 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([9]),cpuset_reserved\u003dNone,id\u003d7,memory\u003d512,pagesize\u003d-1)\n\n------------------------\n------ numa node count 9 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 9, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270,1\u0027, \u0027hw:numa_mem.0\u0027: 456, \u0027hw:numa_cpus.1\u0027: \u00272\u0027, \u0027hw:numa_mem.1\u0027: 455, \u0027hw:numa_cpus.2\u0027: \u00273\u0027, \u0027hw:numa_mem.2\u0027: 455, \u0027hw:numa_cpus.3\u0027: \u00274\u0027, \u0027hw:numa_mem.3\u0027: 455, \u0027hw:numa_cpus.4\u0027: \u00275\u0027, \u0027hw:numa_mem.4\u0027: 455, \u0027hw:numa_cpus.5\u0027: \u00276\u0027, \u0027hw:numa_mem.5\u0027: 455, \u0027hw:numa_cpus.6\u0027: \u00277\u0027, \u0027hw:numa_mem.6\u0027: 455, \u0027hw:numa_cpus.7\u0027: \u00278\u0027, \u0027hw:numa_mem.7\u0027: 455, \u0027hw:numa_cpus.8\u0027: \u00279\u0027, \u0027hw:numa_mem.8\u0027: 455}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3),InstanceNUMACell(4),InstanceNUMACell(5),InstanceNUMACell(6),InstanceNUMACell(7),InstanceNUMACell(8)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0,1]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d456,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([2]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d455,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([3]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d455,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d455,pagesize\u003d-1)\n\t numa cell 4 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([5]),cpuset_reserved\u003dNone,id\u003d4,memory\u003d455,pagesize\u003d-1)\n\t numa cell 5 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6]),cpuset_reserved\u003dNone,id\u003d5,memory\u003d455,pagesize\u003d-1)\n\t numa cell 6 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([7]),cpuset_reserved\u003dNone,id\u003d6,memory\u003d455,pagesize\u003d-1)\n\t numa cell 7 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8]),cpuset_reserved\u003dNone,id\u003d7,memory\u003d455,pagesize\u003d-1)\n\t numa cell 8 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([9]),cpuset_reserved\u003dNone,id\u003d8,memory\u003d455,pagesize\u003d-1)\n\n------------------------\n------ numa node count 10 ------ \n------------------------\nflavor: Flavor(created_at\u003d\u003c?\u003e,deleted\u003d\u003c?\u003e,deleted_at\u003d\u003c?\u003e,description\u003d\u003c?\u003e,disabled\u003d\u003c?\u003e,ephemeral_gb\u003d\u003c?\u003e,extra_specs\u003d\u003c?\u003e,flavorid\u003d\u003c?\u003e,id\u003d42,is_public\u003d\u003c?\u003e,memory_mb\u003d4096,name\u003d\u003c?\u003e,projects\u003d\u003c?\u003e,root_gb\u003d100,rxtx_factor\u003d\u003c?\u003e,swap\u003d\u003c?\u003e,updated_at\u003d\u003c?\u003e,vcpu_weight\u003d\u003c?\u003e,vcpus\u003d10)\n------------------------\nflavor extra specs: {\u0027hw:numa_nodes\u0027: 10, \u0027hw:mem_page_size\u0027: \u0027small\u0027, \u0027hw:numa_cpus.0\u0027: \u00270\u0027, \u0027hw:numa_mem.0\u0027: 410, \u0027hw:numa_cpus.1\u0027: \u00271\u0027, \u0027hw:numa_mem.1\u0027: 410, \u0027hw:numa_cpus.2\u0027: \u00272\u0027, \u0027hw:numa_mem.2\u0027: 410, \u0027hw:numa_cpus.3\u0027: \u00273\u0027, \u0027hw:numa_mem.3\u0027: 410, \u0027hw:numa_cpus.4\u0027: \u00274\u0027, \u0027hw:numa_mem.4\u0027: 410, \u0027hw:numa_cpus.5\u0027: \u00275\u0027, \u0027hw:numa_mem.5\u0027: 410, \u0027hw:numa_cpus.6\u0027: \u00276\u0027, \u0027hw:numa_mem.6\u0027: 409, \u0027hw:numa_cpus.7\u0027: \u00277\u0027, \u0027hw:numa_mem.7\u0027: 409, \u0027hw:numa_cpus.8\u0027: \u00278\u0027, \u0027hw:numa_mem.8\u0027: 409, \u0027hw:numa_cpus.9\u0027: \u00279\u0027, \u0027hw:numa_mem.9\u0027: 409}\n------------------------\nnuma topology:  InstanceNUMATopology(cells\u003d[InstanceNUMACell(UNKNOWN),InstanceNUMACell(1),InstanceNUMACell(2),InstanceNUMACell(3),InstanceNUMACell(4),InstanceNUMACell(5),InstanceNUMACell(6),InstanceNUMACell(7),InstanceNUMACell(8),InstanceNUMACell(9)],emulator_threads_policy\u003d\u003c?\u003e,id\u003d\u003c?\u003e,instance_uuid\u003d\u003c?\u003e)\n------------------------\n\t numa cell 0 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([0]),cpuset_reserved\u003dNone,id\u003d0,memory\u003d410,pagesize\u003d-1)\n\t numa cell 1 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([1]),cpuset_reserved\u003dNone,id\u003d1,memory\u003d410,pagesize\u003d-1)\n\t numa cell 2 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([2]),cpuset_reserved\u003dNone,id\u003d2,memory\u003d410,pagesize\u003d-1)\n\t numa cell 3 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([3]),cpuset_reserved\u003dNone,id\u003d3,memory\u003d410,pagesize\u003d-1)\n\t numa cell 4 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([4]),cpuset_reserved\u003dNone,id\u003d4,memory\u003d410,pagesize\u003d-1)\n\t numa cell 5 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([5]),cpuset_reserved\u003dNone,id\u003d5,memory\u003d410,pagesize\u003d-1)\n\t numa cell 6 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([6]),cpuset_reserved\u003dNone,id\u003d6,memory\u003d409,pagesize\u003d-1)\n\t numa cell 7 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([7]),cpuset_reserved\u003dNone,id\u003d7,memory\u003d409,pagesize\u003d-1)\n\t numa cell 8 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([8]),cpuset_reserved\u003dNone,id\u003d8,memory\u003d409,pagesize\u003d-1)\n\t numa cell 9 InstanceNUMACell(cpu_pinning_raw\u003dNone,cpu_policy\u003dNone,cpu_thread_policy\u003dNone,cpu_topology\u003d\u003c?\u003e,cpuset\u003dset([9]),cpuset_reserved\u003dNone,id\u003d9,memory\u003d409,pagesize\u003d-1)","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"b77e57b67896aa8f05ac8e23c8f90db761491bf1","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_6b89156e","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_45d1a855","updated":"2020-02-12 13:59:25.000000000","message":"How I understand Eric\u0027s summary above:\n\n* always do two placement queries one with the new query syntax and one with the old query syntax + required\u003d!HW_NUMA_ROOT. Merge the results of the two queries and let the NTF do the rest of the filtering.\n\n* at the same time make reshaping the default behavior for the computes in U.\n\n* To support  the non-NUMA aware requests after very compute is reshaped to U  such request will be translated to the new query syntax by trying out different splittings of the non-NUMA aware requests to NUMA nodes. (e.g. does the request fit into one node? does the request fit into 2 nodes with an equal splitting? without an equal splitting? ...)\n\n* As we not fully trust the splitting algo in the previous point. We add a way to disable re-shaping in the computes. Or even do an un-re-shaping (a backward reshape to T version of the placement tree).","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"19529ea9a1b3f5638ce0eecfdc0383e906a7707b","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_ea40aa35","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_471c577a","updated":"2020-02-11 23:21:13.000000000","message":"oh one other thing.\nwe have the ablity to auto split resouces if the cpus and ram are evenly devisable by the numa of numa nodes today.\n\nwith a slight tweek it could be able made able to support the asemetic cases as i have done above.\n\nif we are ok with allowing sligly more cases to work by default for numa instnace too we can just share the code and all that will need to be set in the temporay flavor when invoking hardware.numa_get_constraints would be hw:numa_nodes\n\ngenerating the extraspecs and bypasing that was just cleaner to show how it would work for a quick poc.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"060e310947c1249716b48c9b254ea2ecf4ff88a7","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_36486088","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_6b89156e","updated":"2020-02-12 15:13:11.000000000","message":"\u003e One concern is that if the instance has implicit NUMA nodes, then\n \u003e it can\u0027t be live-migration back to any host without NUMA modeling.\n\nYou are correct that the live migration *must* honor whatever topology was selected initially. We discussed this and decided it was an acceptable restriction. But yes, this needs to be spelled out in the spec, as it will increase the potential for NUMA-agnostic workloads to fail live migration in U+.\n\nHowever, you *can* still live-migrate to an unreshaped host (if it has room). That\u0027s what the fallback query is for. In this case the NumaTopologyFilter is responsible for ensuring the matching topology.\n\n \u003e Another concern is this violates pack/spread also since you always\n \u003e want to try 1 implicit numa node first, that means spread strategy.\n \u003e It always want to choose a host whose NUMA nodes have more free\n \u003e resources.\n\nThat\u0027s why we proposed to do *all* the queries, merge the results, and let the existing filters/weighers pick.\n\n \u003e * To support  the non-NUMA aware requests after very compute is\n \u003e reshaped to U  such request will be translated to the new query\n \u003e syntax by trying out different splittings of the non-NUMA aware\n \u003e requests to NUMA nodes.\n\nYes, but to be clear, we don\u0027t wait until all computes are reshaped before we start doing this.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"a9121cbae5fde47facf243822c5e3a89aed8743f","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_d609cc28","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_6b89156e","updated":"2020-02-12 14:28:13.000000000","message":"As far as I understand this solves my issue about Ussuri control plane + Train computes + NUMA aware flavors as the second query will use T syntaxt and therefore can place workload to T computes.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":5754,"name":"Alex Xu","email":"hejie.xu@intel.com","username":"xuhj"},"change_message_id":"04b0895fb7458cda3baa31a6be52c15c52f92ae7","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_c0d26d33","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_803c7591","updated":"2020-02-12 02:35:20.000000000","message":"\u003e \u003e - To allow enabling the new modeling by default, we want to\n \u003e \u003e *translate NUMA-agnostic flavors*, allowing them to land on\n \u003e \u003e NUMA-modeled hosts. We do this by performing multiple separate\n \u003e \u003e queries in the \"aimed at reshaped computes\" category.\n \u003e Essentially,\n \u003e \u003e for $n \u003d 1 to $max, we pretend the flavor actually said\n \u003e \u003e hw:numa_nodes\u003d$n (but allow asymmetric splitting). $max is a\n \u003e \u003e configurable maximum number of implicit NUMA nodes to try.\n \u003e \n \u003e One concern is that if the instance has implicit NUMA nodes, then\n \u003e it can\u0027t be live-migration back to any host without NUMA modeling.\n \u003e If the operator separates some of hosts as NUMA modeling, some of\n \u003e hosts as not. Then I guess the operator doesn\u0027t want the implict\n \u003e NUMA instance, maybe we can provide a config option to disable the\n \u003e implict NUMA instance.\n\nAnother concern is this violates pack/spread also since you always want to try 1 implicit numa node first, that means spread strategy. It always want to choose a host whose NUMA nodes have more free resources.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"9177bfd45ddd72cfde50badeff2266fa08bd73aa","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_3fc6054c","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_a43c2c91","updated":"2020-02-11 20:22:03.000000000","message":"Okay, I see now that we have multiple concerns:\n\n1) A U control plane with (some number of) T computes.\n\n2) A fully-U deployment.\n\nAnd we need to deal with:\n\nA) NUMA-aware flavors (with hw:numa* in them)\n\nB) NUMA-agnostic flavors\n\nAnd the concern is:\n\ni) Use placement syntax that\u0027s U-aware\n\nii) Use legacy placement syntax\n\nAfter discussion [1], we came up with this proposal:\n\n- To avoid favoring reshaped U hosts and thereby violating pack/spread, server affinity, etc., we need to do two query types -- one aimed at downlevel computes and one at reshaped computes -- merge the results, and then let filters/weighers take the final swing.\n- Don\u0027t enforce segregation.\n- To allow enabling the new modeling by default, we want to *translate NUMA-agnostic flavors*, allowing them to land on NUMA-modeled hosts. We do this by performing multiple separate queries in the \"aimed at reshaped computes\" category. Essentially, for $n \u003d 1 to $max, we pretend the flavor actually said hw:numa_nodes\u003d$n (but allow asymmetric splitting). $max is a configurable maximum number of implicit NUMA nodes to try.\n- Because ^ is naïf, the workaround is available to force a U host to behave like a T host, and thus continue to be subject to...\n- The second (\"fallback\") query in both (NUMA-aware and NUMA-agnostic flavor) cases specifies !HW_NUMA_NODE to make sure it only targets unreshaped hosts. This is so the fallback query doesn\u0027t accidentally land on a reshaped host, but straddling NUMA nodes.\n\n[1] http://eavesdrop.openstack.org/irclogs/%23openstack-nova/%23openstack-nova.2020-02-11.log.html#t2020-02-11T18:44:45","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_fe8873c4","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_becd1b93","updated":"2020-02-11 16:35:12.000000000","message":"Wait, let me make sure I understand this paragraph correctly.\n\nI had originally thought the ussuri code was going to look like this:\n\n if extra_specs has hw:numa* stuff:\n     do the translated granular placement request\n     let the new-and-improved NTF filter candidates for anti-affinity and PCI\n else:\n     do the untranslated request (but with !HW_NUMA_ROOT added)\n\nBut this paragraph is proposing that it will instead do this:\n\n if extra_specs has hw:numa* stuff:\n     try:\n         do the translated granular placement request\n         let the new-and-improved NTF filter candidates for anti-affinity and PCI\n     except NoValidHosts:\n         do an old-style untranslated request (maybe even without !HW_NUMA_ROOT?)\n         use the old NTF to do all the filtering and fitting\n else:\n     do the untranslated request (but with !HW_NUMA_ROOT added)\n\n??\n\nIf that\u0027s the case, I\u0027m not sure what would ever motivate operators to switch over.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"1b214eb79d725d9fe9af12ab274cf4ea0860b94a","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_006ba553","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_c0d26d33","updated":"2020-02-12 03:09:02.000000000","message":"if we dont early out and actully do multiple numa requests and merge them we coudl spread but im not sure that is what we want to do from a performance point of view.\n\nif we did not early out and did up to [scheduler]/max_implcit_numa_nodes config always we could leave it to the werigher to do there normal spreading vs packing behavior.\n\nwithout the early out however i would then do set\n[scheduler]/max_implcit_numa_nodes\u003d2  rather than\n[scheduler]/max_implcit_numa_nodes\u003d4 which i think would an ok default if we had the early out.\n\nthat said i dont really agree that prefing smaller numa node counts violated packing vs spreading, at least in the small vm approximation. for large vms where only 1 or can fit per host numa node it will tend to spread yes but that also depends on the size of a numa noded.\n\non moderen zen 2 amd eypc chips with 1 numa node per cache region enabled in the bios tech numa node has 3-4 phyical core or 6-8 thread and about 64G of ram. so on those hosts if you ask for 16 cores its going to always  be spread over 2 numa nodes.if your on an intel system where the numa nodes are larger then you could fit it on 1 numa node.\n\nif we were really concerend we could make it configurable. e.g. start with most numa node and get smaller, start with smllest and get larger or generate all in the range and merge. those are basicaly the 3 options.\n\noption 3 i think is what you want as it lets the weigher decided at the cost of scheduler performance.\n\nlive migration betwen a numa host and a train compute node should be possible provide the train compute node can support the numa topology of the guest. similarly if you have disable numa reporting on a Ussuri host the fall back query would allow that host to be selected and if the numa toplogy filter said it was fine it would be a candiate.\n\nthe only added constrait would be with an implcit numa toplogy for all instances we would only be able to live migrate if that implcit topology was supported. that come directly for the idea that we cannot alter the hardware topology in the guest will it is running.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"afeb44094a1ec1847c174fd2769737d462578bfe","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_a43c2c91","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_e49a24c2","updated":"2020-02-11 17:42:34.000000000","message":"yep this is exactly what we are proposing and why we did the fall back for PCPUs.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"c31e744e3efd900bfdd7eca7d5c186c86aca1ac4","unresolved":false,"context_lines":[{"line_number":507,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":508,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":509,"context_line":""},{"line_number":510,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":511,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":512,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":513,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":514,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":515,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":516,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":517,"context_line":""},{"line_number":518,"context_line":"Implementation"},{"line_number":519,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_3964e5e5","line":516,"range":{"start_line":510,"start_character":0,"end_line":516,"end_character":53},"in_reply_to":"3fa7e38b_fe8873c4","updated":"2020-02-11 16:41:29.000000000","message":"I understand the need to push the operators to switch. But pushing them to do the switch right at the upgrade feels too much to me. It would like an ultimate. \"When you upgrade to Ussuri you will loose all the NUMA aware capacity of your cloud, but you can get them back iff you do the reshape one every NUMA aware compute, but you should not do that all at once as that will overload placement.\"","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":554,"context_line":""},{"line_number":555,"context_line":"None."},{"line_number":556,"context_line":""},{"line_number":557,"context_line":"References"},{"line_number":558,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":559,"context_line":""},{"line_number":560,"context_line":".. _`Nested Resource Providers`: https://specs.openstack.org/openstack/nova-specs/specs/queens/approved/nested-resource-providers.html"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_9e685ff3","line":557,"updated":"2020-02-11 16:35:12.000000000","message":"Pretty sure this section will render empty as written.","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":554,"context_line":""},{"line_number":555,"context_line":"None."},{"line_number":556,"context_line":""},{"line_number":557,"context_line":"References"},{"line_number":558,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"},{"line_number":559,"context_line":""},{"line_number":560,"context_line":".. _`Nested Resource Providers`: https://specs.openstack.org/openstack/nova-specs/specs/queens/approved/nested-resource-providers.html"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_25334cc6","line":557,"in_reply_to":"3fa7e38b_9e685ff3","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"01779e262cc2ea625c7a9e074c793613836ced58","unresolved":false,"context_lines":[{"line_number":562,"context_line":".. _`NUMA possible extra specs`: https://docs.openstack.org/nova/latest/admin/flavors.html#extra-specs-numa-topology"},{"line_number":563,"context_line":".. _`Huge pages`: https://docs.openstack.org/nova/latest/admin/huge-pages.html"},{"line_number":564,"context_line":".. _`Placement API /reshaper endpoint`: https://developer.openstack.org/api-ref/placement/?expanded\u003did84-detail#reshaper"},{"line_number":565,"context_line":".. _`Placement can_split` : https://review.opendev.org/#/c/658510/"},{"line_number":566,"context_line":".. _`physical CPU resources`: https://specs.openstack.org/openstack/nova-specs/specs/train/approved/cpu-resources.html"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_5ea98722","line":565,"range":{"start_line":565,"start_character":25,"end_line":565,"end_character":26},"updated":"2020-02-11 16:35:12.000000000","message":"extra space","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"02c4341d8be515cc15790070e4824cadc6592878","unresolved":false,"context_lines":[{"line_number":562,"context_line":".. _`NUMA possible extra specs`: https://docs.openstack.org/nova/latest/admin/flavors.html#extra-specs-numa-topology"},{"line_number":563,"context_line":".. _`Huge pages`: https://docs.openstack.org/nova/latest/admin/huge-pages.html"},{"line_number":564,"context_line":".. _`Placement API /reshaper endpoint`: https://developer.openstack.org/api-ref/placement/?expanded\u003did84-detail#reshaper"},{"line_number":565,"context_line":".. _`Placement can_split` : https://review.opendev.org/#/c/658510/"},{"line_number":566,"context_line":".. _`physical CPU resources`: https://specs.openstack.org/openstack/nova-specs/specs/train/approved/cpu-resources.html"}],"source_content_type":"text/x-rst","patch_set":17,"id":"3fa7e38b_453688b5","line":565,"range":{"start_line":565,"start_character":25,"end_line":565,"end_character":26},"in_reply_to":"3fa7e38b_5ea98722","updated":"2020-02-12 11:43:57.000000000","message":"Done","commit_id":"07e6b8feda440a989dab8d2480100a92725c0a47"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7dd85899d0fa4e1c72e0dfed18c7204462c267ff","unresolved":false,"context_lines":[{"line_number":516,"context_line":"should possibly document any extra step required for FFU with an eventual"},{"line_number":517,"context_line":"removal in a couple of releases once all deployers no longer need this support."},{"line_number":518,"context_line":""},{"line_number":519,"context_line":"Last but not the least, given rolling upgrades can see a situation where only"},{"line_number":520,"context_line":"a very few nodes are reshaped yet as NUMA-aware but the translation mechanism"},{"line_number":521,"context_line":"is in place, we absolutely need to provide a temporary fallback mechanism in"},{"line_number":522,"context_line":"Ussuri that will ensure that if a NUMA-aware request can\u0027t be satisfied, we"},{"line_number":523,"context_line":"will just ask Placement with the original non-NUMA-specific queries so that"},{"line_number":524,"context_line":"we rollback to find hosts thanks to the ``NUMATopologyFilter`` only, like we"},{"line_number":525,"context_line":"implemented for the `physical CPU resources`_ series."},{"line_number":526,"context_line":""},{"line_number":527,"context_line":"Implementation"},{"line_number":528,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":18,"id":"3fa7e38b_05b17000","line":525,"range":{"start_line":519,"start_character":0,"end_line":525,"end_character":53},"updated":"2020-02-12 11:52:58.000000000","message":"This is controversial as I understand and Eric you alreasy tried to provide me a outcome consensus in https://review.opendev.org/#/c/552924/17/specs/ussuri/approved/numa-topology-with-rps.rst@516\n\nCould you please summarize it another way ? I didn\u0027t understand. \n\nThanks.","commit_id":"adbd5203826b9c7c596d192f3f9e4381188670fe"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. We won\u0027t address in this spec how we"},{"line_number":23,"context_line":"  should model NUMA-affinized hardware like PCI devices or GPUs and will"},{"line_number":24,"context_line":"  discuss on the relationships in a later spec."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":""},{"line_number":27,"context_line":"Problem description"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_9dc3658c","line":24,"range":{"start_line":24,"start_character":10,"end_line":24,"end_character":16},"updated":"2020-02-13 10:05:07.000000000","message":"these","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":21,"context_line":"  This spec only targets to model resource capabilities for NUMA nodes in some"},{"line_number":22,"context_line":"  general and quite abstract manner. We won\u0027t address in this spec how we"},{"line_number":23,"context_line":"  should model NUMA-affinized hardware like PCI devices or GPUs and will"},{"line_number":24,"context_line":"  discuss on the relationships in a later spec."},{"line_number":25,"context_line":""},{"line_number":26,"context_line":""},{"line_number":27,"context_line":"Problem description"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_0b972c9e","line":24,"range":{"start_line":24,"start_character":10,"end_line":24,"end_character":16},"in_reply_to":"3fa7e38b_9dc3658c","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":75,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":76,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":77,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":78,"context_line":"I/O-bound of course).For the moment, if you\u0027re an operator, you can use flavor"},{"line_number":79,"context_line":"extra specs to indicate a desired guest NUMA topology for your instance like:"},{"line_number":80,"context_line":""},{"line_number":81,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_1db77524","line":78,"range":{"start_line":78,"start_character":19,"end_line":78,"end_character":21},"updated":"2020-02-13 10:05:07.000000000","message":"nit: space","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":75,"context_line":"vCPUs on the same instance (for parallel computing reasons) would like to"},{"line_number":76,"context_line":"ensure that those CPU resources are provided by the same NUMA node, or some"},{"line_number":77,"context_line":"performance penalties would occur (if your application is CPU-bound or"},{"line_number":78,"context_line":"I/O-bound of course).For the moment, if you\u0027re an operator, you can use flavor"},{"line_number":79,"context_line":"extra specs to indicate a desired guest NUMA topology for your instance like:"},{"line_number":80,"context_line":""},{"line_number":81,"context_line":".. code::"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_6b56e0fb","line":78,"range":{"start_line":78,"start_character":19,"end_line":78,"end_character":21},"in_reply_to":"3fa7e38b_1db77524","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":89,"context_line":""},{"line_number":90,"context_line":".. note ::"},{"line_number":91,"context_line":""},{"line_number":92,"context_line":"  The example above is only needed when you want to not evently divide your"},{"line_number":93,"context_line":"  virtual CPUs and memory between NUMA nodes, of course."},{"line_number":94,"context_line":""},{"line_number":95,"context_line":""}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_3db27132","line":92,"range":{"start_line":92,"start_character":56,"end_line":92,"end_character":63},"updated":"2020-02-13 10:05:07.000000000","message":"evenly","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":89,"context_line":""},{"line_number":90,"context_line":".. note ::"},{"line_number":91,"context_line":""},{"line_number":92,"context_line":"  The example above is only needed when you want to not evently divide your"},{"line_number":93,"context_line":"  virtual CPUs and memory between NUMA nodes, of course."},{"line_number":94,"context_line":""},{"line_number":95,"context_line":""}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_4b5be421","line":92,"range":{"start_line":92,"start_character":56,"end_line":92,"end_character":63},"in_reply_to":"3fa7e38b_3db27132","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":113,"context_line":"``HW_NUMA_ROOT`` that would be on each NUMA *node*. That would help to know"},{"line_number":114,"context_line":"which hosts would be *NUMA-aware* and which others are not."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":117,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"},{"line_number":118,"context_line":"point that the smallest allocatable unit you can assign with Nova is"},{"line_number":119,"context_line":"really a page size. Accordingly, we should rather model our NUMA subtree"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_bd3bc197","line":116,"range":{"start_line":116,"start_character":16,"end_line":116,"end_character":24},"updated":"2020-02-13 10:05:07.000000000","message":"tougher","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":113,"context_line":"``HW_NUMA_ROOT`` that would be on each NUMA *node*. That would help to know"},{"line_number":114,"context_line":"which hosts would be *NUMA-aware* and which others are not."},{"line_number":115,"context_line":""},{"line_number":116,"context_line":"Memory is a bit thougher to represent. The granularity of a NUMA node having"},{"line_number":117,"context_line":"an amount of attached memory is somehow a first approach but we\u0027re missing the"},{"line_number":118,"context_line":"point that the smallest allocatable unit you can assign with Nova is"},{"line_number":119,"context_line":"really a page size. Accordingly, we should rather model our NUMA subtree"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_2b9a28c3","line":116,"range":{"start_line":116,"start_character":16,"end_line":116,"end_character":24},"in_reply_to":"3fa7e38b_bd3bc197","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":125,"context_line":"- ``MEMORY_PAGE_SIZE_SMALL`` and ``MEMORY_PAGE_SIZE_LARGE`` would allow us to"},{"line_number":126,"context_line":"  know whether the memory page size is default or optionally configured."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"- ``CUSTOM_MEMORY_PAGE_SIZE_\u003cX\u003e`` where \u003cX\u003e is an integer would allow us to"},{"line_number":129,"context_line":"  know the size of the page in KB. To make it clear, even if the trait is a"},{"line_number":130,"context_line":"  custom one, it\u0027s important to have a naming convention for it so the"},{"line_number":131,"context_line":"  scheduler could ask about page sizes without knowing all the traits."}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_3d47d11d","line":128,"range":{"start_line":128,"start_character":1,"end_line":128,"end_character":33},"updated":"2020-02-13 10:05:07.000000000","message":"I don\u0027t know why we can\u0027t standardize on these. There are only a few options available on the platforms we care about (x86, POWER, ARM, ARM64). Surely we can enumerate these, perhaps with a fallback to custom sizes for really weird page sizes?","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":125,"context_line":"- ``MEMORY_PAGE_SIZE_SMALL`` and ``MEMORY_PAGE_SIZE_LARGE`` would allow us to"},{"line_number":126,"context_line":"  know whether the memory page size is default or optionally configured."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"- ``CUSTOM_MEMORY_PAGE_SIZE_\u003cX\u003e`` where \u003cX\u003e is an integer would allow us to"},{"line_number":129,"context_line":"  know the size of the page in KB. To make it clear, even if the trait is a"},{"line_number":130,"context_line":"  custom one, it\u0027s important to have a naming convention for it so the"},{"line_number":131,"context_line":"  scheduler could ask about page sizes without knowing all the traits."}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_8baa3cd0","line":128,"range":{"start_line":128,"start_character":1,"end_line":128,"end_character":33},"in_reply_to":"3fa7e38b_3d47d11d","updated":"2020-02-13 13:56:05.000000000","message":"\u003e I don\u0027t know why we can\u0027t standardize on these. There are only a\n \u003e few options available on the platforms we care about (x86, POWER,\n \u003e ARM, ARM64). Surely we can enumerate these, perhaps with a fallback\n \u003e to custom sizes for really weird page sizes?\n\nLet\u0027s just leave this to be an implementation question. FWIW, using custom traits is good with me but if the consensus moves to ask to have standard traits, then OK.\n\n(FWIW, we could have a looooooot of standard traits then)","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7ff7435c9c7a5b772e589467b97c275377e90754","unresolved":false,"context_lines":[{"line_number":125,"context_line":"- ``MEMORY_PAGE_SIZE_SMALL`` and ``MEMORY_PAGE_SIZE_LARGE`` would allow us to"},{"line_number":126,"context_line":"  know whether the memory page size is default or optionally configured."},{"line_number":127,"context_line":""},{"line_number":128,"context_line":"- ``CUSTOM_MEMORY_PAGE_SIZE_\u003cX\u003e`` where \u003cX\u003e is an integer would allow us to"},{"line_number":129,"context_line":"  know the size of the page in KB. To make it clear, even if the trait is a"},{"line_number":130,"context_line":"  custom one, it\u0027s important to have a naming convention for it so the"},{"line_number":131,"context_line":"  scheduler could ask about page sizes without knowing all the traits."}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_152d09ad","line":128,"range":{"start_line":128,"start_character":1,"end_line":128,"end_character":33},"in_reply_to":"3fa7e38b_3d47d11d","updated":"2020-02-13 12:34:16.000000000","message":"that was covered in the etherpad.\nbasically if we did we would need to do an uncachable addtional call to placement to get the set of traits  as that could change over time. so using a custom trait allows us to avoid that.\n\n-----------------------------------------------------------\nerric:As long as we can know beforehand all the possible page sizes, that\u0027s fine. But it would be a pain to have to make a code change to support a new page size that pops up in the world.\n\nme: ya so right not its free form but there are only 3 i know if in use in opentack envs and technically only 6-8 that the main 3 architcure suport\n\nerric:we can probably create standard traits for the ones we know about, and the nova code can just go figure out if a standard trait exists for $size (this requires an extra placement call, but a cheap one) and if not generate a CUSTOM one according to rules. That would allow us to grow without forcing code changes on boundaries.\n\nme: +1 (delete form etherpad but i suggested skiping placmeent call and just checking os-traits)\n\nerric: nope, can\u0027t use os_traits. What we have there locally may not be the same as what the placement db has. Been down this road before.\n\nme: i was hoping we could assume the api server and placmente had the same version but ok ill take your word for it\nwe shoudl be able to look it up once and cache it\n\nerric: sorry, also no :( as if you upgrade os_traits on the placement server that can change. (upgrade os_traits, and then db-sync placement)\n\nme:so maybe thats a argument for just using custom_ to avoid the check.\n\nerric:yeah, I wouldn\u0027t be opposed to that.\nme: ok lets put that in the spec and people can weight in \nerric: ack\n\nsee lines 132-143 https://etherpad.openstack.org/p/mem_page_size_and_placement","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":188,"context_line":"Memory pagesize RP"},{"line_number":189,"context_line":"------------------"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"Each `NUMA RP`_ should be having children RPs for each possible memory page"},{"line_number":192,"context_line":"size per host, and having a single resource class :"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has in that specific"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_1d6535b0","line":191,"range":{"start_line":191,"start_character":23,"end_line":191,"end_character":41},"updated":"2020-02-13 10:05:07.000000000","message":"have child","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":188,"context_line":"Memory pagesize RP"},{"line_number":189,"context_line":"------------------"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"Each `NUMA RP`_ should be having children RPs for each possible memory page"},{"line_number":192,"context_line":"size per host, and having a single resource class :"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has in that specific"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_ab8e1877","line":191,"range":{"start_line":191,"start_character":23,"end_line":191,"end_character":41},"in_reply_to":"3fa7e38b_1d6535b0","updated":"2020-02-13 13:56:05.000000000","message":"\u003e have child\n\ndone","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7ff7435c9c7a5b772e589467b97c275377e90754","unresolved":false,"context_lines":[{"line_number":188,"context_line":"Memory pagesize RP"},{"line_number":189,"context_line":"------------------"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"Each `NUMA RP`_ should be having children RPs for each possible memory page"},{"line_number":192,"context_line":"size per host, and having a single resource class :"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has in that specific"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_7521ddb1","line":191,"range":{"start_line":191,"start_character":23,"end_line":191,"end_character":41},"in_reply_to":"3fa7e38b_1d6535b0","updated":"2020-02-13 12:34:16.000000000","message":"be having is such an irish thing to do i am proud of you sylvain. that would be a literal translation of how we express that in Irish \"páistí a bheith agat\"\n i guess its also how its congregated in French.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":188,"context_line":"Memory pagesize RP"},{"line_number":189,"context_line":"------------------"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"Each `NUMA RP`_ should be having children RPs for each possible memory page"},{"line_number":192,"context_line":"size per host, and having a single resource class :"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has in that specific"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_8b931ca4","line":191,"range":{"start_line":191,"start_character":23,"end_line":191,"end_character":41},"in_reply_to":"3fa7e38b_7521ddb1","updated":"2020-02-13 13:56:05.000000000","message":"\u003e be having is such an irish thing to do i am proud of you sylvain.\n \u003e that would be a literal translation of how we express that in Irish\n \u003e \"páistí a bheith agat\"\n \u003e i guess its also how its congregated in French.\n\nLOL\nFWIW, knowing to say either \u0027have\u0027 or \u0027be having\u0027 for a French folk is a bit difficult given we don\u0027t have this in French :)","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":188,"context_line":"Memory pagesize RP"},{"line_number":189,"context_line":"------------------"},{"line_number":190,"context_line":""},{"line_number":191,"context_line":"Each `NUMA RP`_ should be having children RPs for each possible memory page"},{"line_number":192,"context_line":"size per host, and having a single resource class :"},{"line_number":193,"context_line":""},{"line_number":194,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has in that specific"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_292d5044","line":191,"range":{"start_line":191,"start_character":23,"end_line":191,"end_character":41},"in_reply_to":"3fa7e38b_ab8e1877","updated":"2020-02-13 16:19:54.000000000","message":"Chaque `NUMA_RP`_ devrait être en ayant enfant RPs...\n\nis totally not a construction that would *ever* happen in French.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":194,"context_line":"* ``MEMORY_MB``: for telling how much memory the NUMA node has in that specific"},{"line_number":195,"context_line":"  page size."},{"line_number":196,"context_line":""},{"line_number":197,"context_line":"This RP would be decorated by two traits :"},{"line_number":198,"context_line":" - either ``MEMORY_PAGE_SIZE_SMALL`` (default if not configured) or"},{"line_number":199,"context_line":"   ``MEMORY_PAGE_SIZE_LARGE`` (if large pages are configured)"},{"line_number":200,"context_line":" - the size of the page size : CUSTOM_MEMORY_PAGE_SIZE_# (where # is the size"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_7d6a2982","line":197,"updated":"2020-02-13 10:05:07.000000000","message":"think you need a newline after this for it to render correctly. Also, dedent the next lines","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":206,"context_line":""},{"line_number":207,"context_line":"The root Resource Provider (ie. the compute node) would only provide resources"},{"line_number":208,"context_line":"for classes that are not NUMA-related. Existing children RPs for vGPUs or"},{"line_number":209,"context_line":"bandwidth-aware resources should still be having this parent (until we discuss"},{"line_number":210,"context_line":"about NUMA affinity for PCI devices)."},{"line_number":211,"context_line":""},{"line_number":212,"context_line":""}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_5df40dd0","line":209,"range":{"start_line":209,"start_character":37,"end_line":209,"end_character":49},"updated":"2020-02-13 10:05:07.000000000","message":"have","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":230,"context_line":".. code::"},{"line_number":231,"context_line":""},{"line_number":232,"context_line":"  [workarounds]"},{"line_number":233,"context_line":"  disable_placement_numa_reporting \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":234,"context_line":""},{"line_number":235,"context_line":""},{"line_number":236,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_9d1fe509","line":233,"updated":"2020-02-13 10:05:07.000000000","message":"I agree that this should default to disabled. That\u0027s pretty much what we did for the cpu-resources (PCPU) work: you need to set the new config options \u0027[compute] cpu_shared_set\u0027 and \u0027[compute] cpu_dedicated_set\u0027 in order to trigger the reshape.\n\nWith that said, is this really a workaround option? I understand that it\u0027s temporary, but don\u0027t workaround options normally default to off? Perhaps something like \u0027[compute] enable_numa_reporting\u0027 would be better? We\u0027re going to deprecate whichever one we choose at some point anyway.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":230,"context_line":".. code::"},{"line_number":231,"context_line":""},{"line_number":232,"context_line":"  [workarounds]"},{"line_number":233,"context_line":"  disable_placement_numa_reporting \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":234,"context_line":""},{"line_number":235,"context_line":""},{"line_number":236,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_49968c4c","line":233,"in_reply_to":"3fa7e38b_4bde846b","updated":"2020-02-13 16:19:54.000000000","message":"This being a [workaround] goes hand in hand with making reshaping the default behavior in U. If we decide to go back to the PS16 idea of making the reshape opt-in (which I objected to on the grounds that there would be no motivation for operators to switch over, thus making all this work pretty much a waste of time) then I agree it shouldn\u0027t be a [workaround].\n\nWe can bikeshed whether the opt name/value is positive or negative at impl time. See https://review.opendev.org/#/c/657078/3/nova/conf/workarounds.py@247","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":230,"context_line":".. code::"},{"line_number":231,"context_line":""},{"line_number":232,"context_line":"  [workarounds]"},{"line_number":233,"context_line":"  disable_placement_numa_reporting \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":234,"context_line":""},{"line_number":235,"context_line":""},{"line_number":236,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_4bde846b","line":233,"in_reply_to":"3fa7e38b_9d1fe509","updated":"2020-02-13 13:56:05.000000000","message":"\u003e I agree that this should default to disabled. That\u0027s pretty much\n \u003e what we did for the cpu-resources (PCPU) work: you need to set the\n \u003e new config options \u0027[compute] cpu_shared_set\u0027 and \u0027[compute]\n \u003e cpu_dedicated_set\u0027 in order to trigger the reshape.\n \u003e \n \u003e With that said, is this really a workaround option? I understand\n \u003e that it\u0027s temporary, but don\u0027t workaround options normally default\n \u003e to off? Perhaps something like \u0027[compute] enable_numa_reporting\u0027\n \u003e would be better? We\u0027re going to deprecate whichever one we choose\n \u003e at some point anyway.\n\nLet\u0027s just discuss this by the implementation changes","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":251,"context_line":"   explaining that it would be a performance hit when changing the value. This"},{"line_number":252,"context_line":"   also implies that an operator can go backwards to ask to have a non-NUMA"},{"line_number":253,"context_line":"   host, which would then trigger a reshape asking the resources to be moved"},{"line_number":254,"context_line":"   back to the root compute resource provider."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_fdba99dd","line":254,"updated":"2020-02-13 10:05:07.000000000","message":"Are you suuure? That\u0027s going to be a lot of work. Why wouldn\u0027t we just block this operation, i.e. once you\u0027re in the new world there\u0027s no going back","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":251,"context_line":"   explaining that it would be a performance hit when changing the value. This"},{"line_number":252,"context_line":"   also implies that an operator can go backwards to ask to have a non-NUMA"},{"line_number":253,"context_line":"   host, which would then trigger a reshape asking the resources to be moved"},{"line_number":254,"context_line":"   back to the root compute resource provider."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_8bc1dc83","line":254,"in_reply_to":"3fa7e38b_38835c57","updated":"2020-02-13 13:56:05.000000000","message":"\u003e thats what we had previously. i was suggesting the alternitive was\n \u003e if we blocked it and you really wanted to go back then drain the\n \u003e host, stop the agent and upsdate the config, remove the compute\n \u003e service, then start the agent again.\n \u003e \n \u003e others want the bidirectional translation however.\n\nI\u0027m not sure it would be a huge work, but again, let\u0027s just see this by the implementation and in case we no longer agree with moving back to \u0027no-NUMA\u0027 inventories, then we could modify this spec.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":251,"context_line":"   explaining that it would be a performance hit when changing the value. This"},{"line_number":252,"context_line":"   also implies that an operator can go backwards to ask to have a non-NUMA"},{"line_number":253,"context_line":"   host, which would then trigger a reshape asking the resources to be moved"},{"line_number":254,"context_line":"   back to the root compute resource provider."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_89c7a446","line":254,"in_reply_to":"3fa7e38b_fdba99dd","updated":"2020-02-13 16:19:54.000000000","message":"\u003e Are you suuure? That\u0027s going to be a lot of work. Why wouldn\u0027t we\n \u003e just block this operation, i.e. once you\u0027re in the new world\n \u003e there\u0027s no going back\n\nNo matter what approach we take (even can_split), a reshaped host is going to have a decreased ability to land NUMA-agnostic flavors. The reversible behavior is for the case where somebody\u0027s hosts/flavors happen to be set up in such a way that the decrease is significant/painful.\n\nAgain, I think \"can we go backwards\" is tied to whether we cut over by default or not. If it\u0027s opt-in, then I agree we can probably get away with no takesey-backseys.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7ff7435c9c7a5b772e589467b97c275377e90754","unresolved":false,"context_lines":[{"line_number":251,"context_line":"   explaining that it would be a performance hit when changing the value. This"},{"line_number":252,"context_line":"   also implies that an operator can go backwards to ask to have a non-NUMA"},{"line_number":253,"context_line":"   host, which would then trigger a reshape asking the resources to be moved"},{"line_number":254,"context_line":"   back to the root compute resource provider."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_38835c57","line":254,"in_reply_to":"3fa7e38b_fdba99dd","updated":"2020-02-13 12:34:16.000000000","message":"thats what we had previously. i was suggesting the alternitive was if we blocked it and you really wanted to go back then drain the host, stop the agent and upsdate the config, remove the compute service, then start the agent again.\n\nothers want the bidirectional translation however.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"},{"line_number":258,"context_line":"a rolling upgrade in Ussuri and where we want to be super conservative with"},{"line_number":259,"context_line":"any potential regression, we will also provide another configuration option"},{"line_number":260,"context_line":"that will optionally test NUMA-aware hosts against non-NUMA-aware instances"},{"line_number":261,"context_line":"by verifying whether a instance can fit on 1 or multiple NUMA nodes within"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_5dcbad71","line":258,"range":{"start_line":258,"start_character":52,"end_line":258,"end_character":57},"updated":"2020-02-13 10:05:07.000000000","message":"drop this","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"},{"line_number":258,"context_line":"a rolling upgrade in Ussuri and where we want to be super conservative with"},{"line_number":259,"context_line":"any potential regression, we will also provide another configuration option"},{"line_number":260,"context_line":"that will optionally test NUMA-aware hosts against non-NUMA-aware instances"},{"line_number":261,"context_line":"by verifying whether a instance can fit on 1 or multiple NUMA nodes within"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_3dbcb1d3","line":258,"range":{"start_line":258,"start_character":71,"end_line":258,"end_character":75},"updated":"2020-02-13 10:05:07.000000000","message":"in order to prevent","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":255,"context_line":""},{"line_number":256,"context_line":""},{"line_number":257,"context_line":"In order to facilitate a transition period, in particular with the context of"},{"line_number":258,"context_line":"a rolling upgrade in Ussuri and where we want to be super conservative with"},{"line_number":259,"context_line":"any potential regression, we will also provide another configuration option"},{"line_number":260,"context_line":"that will optionally test NUMA-aware hosts against non-NUMA-aware instances"},{"line_number":261,"context_line":"by verifying whether a instance can fit on 1 or multiple NUMA nodes within"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_ab41380a","line":258,"range":{"start_line":258,"start_character":52,"end_line":258,"end_character":57},"in_reply_to":"3fa7e38b_5dcbad71","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":259,"context_line":"any potential regression, we will also provide another configuration option"},{"line_number":260,"context_line":"that will optionally test NUMA-aware hosts against non-NUMA-aware instances"},{"line_number":261,"context_line":"by verifying whether a instance can fit on 1 or multiple NUMA nodes within"},{"line_number":262,"context_line":"this node."},{"line_number":263,"context_line":""},{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_3d6a714f","line":262,"updated":"2020-02-13 10:05:07.000000000","message":"Did dansmith actually agree to this? :) I recall him being fairly against the double query to placement for the PCPU work and that was only okay because there was a very short period of time where it would actually happen. This seems like a much longer term thing.\n\nI\u0027m also not sure how well this would actually work in practice? How do you determine how to split the guest across NUMA nodes? I imagine unless we try every possible combination (which would result in a *lot* of queries to placement), we\u0027re still going to regress because we wouldn\u0027t be able to satisfy combinations like booting a 4 core instance on a host with 1 core free on node #0 and 3 cores free on node #1.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":259,"context_line":"any potential regression, we will also provide another configuration option"},{"line_number":260,"context_line":"that will optionally test NUMA-aware hosts against non-NUMA-aware instances"},{"line_number":261,"context_line":"by verifying whether a instance can fit on 1 or multiple NUMA nodes within"},{"line_number":262,"context_line":"this node."},{"line_number":263,"context_line":""},{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_4940cca3","line":262,"in_reply_to":"3fa7e38b_3d6a714f","updated":"2020-02-13 16:19:54.000000000","message":"\u003e Did dansmith actually agree to this? :) I recall him being fairly\n \u003e against the double query to placement for the PCPU work and that\n \u003e was only okay because there was a very short period of time where\n \u003e it would actually happen. This seems like a much longer term thing.\n\nWe only need this until can_split is implemented.\n\n \u003e I\u0027m also not sure how well this would actually work in practice?\n \u003e How do you determine how to split the guest across NUMA nodes? I\n \u003e imagine unless we try every possible combination (which would\n \u003e result in a *lot* of queries to placement), we\u0027re still going to\n \u003e regress because we wouldn\u0027t be able to satisfy combinations like\n \u003e booting a 4 core instance on a host with 1 core free on node #0 and\n \u003e 3 cores free on node #1.\n\nAll true.\n\nThe proposed algorithm just splits \"evenly\" (overflow by 1 for asymmetrical splits; e.g. 5 \u003d\u003e 3/2, but not 4/1). We acknowledge and accept that it\u0027s not perfect. It should work a large percentage of the time, but it will indeed fail to fit the 4\u003d\u003e3/1 example you give. And that\u0027s why we provide the [workaround] to let you stick to the old way if that truly causes a problem in your cloud.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"7ff7435c9c7a5b772e589467b97c275377e90754","unresolved":false,"context_lines":[{"line_number":259,"context_line":"any potential regression, we will also provide another configuration option"},{"line_number":260,"context_line":"that will optionally test NUMA-aware hosts against non-NUMA-aware instances"},{"line_number":261,"context_line":"by verifying whether a instance can fit on 1 or multiple NUMA nodes within"},{"line_number":262,"context_line":"this node."},{"line_number":263,"context_line":""},{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_989370a4","line":262,"in_reply_to":"3fa7e38b_3d6a714f","updated":"2020-02-13 12:34:16.000000000","message":"dansmith suggested this. although it was ment to be an int not a bool as i noted below.\n\ni have done a quick poc which i atttach to the previous version as a coment if you want to take a look but ill proably push a patch later today to implement a small part of it.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aa897430bf2e1d1a1b9287192c2e270735b8d18a","unresolved":false,"context_lines":[{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""},{"line_number":266,"context_line":"  [workarounds]"},{"line_number":267,"context_line":"  test_numa_hosts_for_agnostic_instances \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":".. note::"},{"line_number":270,"context_line":"   This option is set to True in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_97510b34","line":267,"range":{"start_line":267,"start_character":2,"end_line":267,"end_character":75},"updated":"2020-02-12 20:34:54.000000000","message":"this is not quite correct.\n\nthis was ment to be \n[scheduler]/max_implicit_numa_nodes where setting it to 0 was how you disabled the progressive splitting.\n\nthis option would continue to exist until placement meant could support a similar feature.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""},{"line_number":266,"context_line":"  [workarounds]"},{"line_number":267,"context_line":"  test_numa_hosts_for_agnostic_instances \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":".. note::"},{"line_number":270,"context_line":"   This option is set to True in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_2b0468c4","line":267,"range":{"start_line":267,"start_character":2,"end_line":267,"end_character":75},"in_reply_to":"3fa7e38b_7860f49e","updated":"2020-02-13 13:56:05.000000000","message":"\u003e also the other reason we want this to not be a workaround is only\n \u003e libvirt will support numa in placment initally\n \u003e \n \u003e so if you are deploying with any other virt driver we will want\n \u003e [scheduler]/max_implicit_numa_nodes\u003d0 to be the default so on a\n \u003e powervm cloud we dont query placment for things that wont exisit or\n \u003e an ironic cloud.\n\nOK, you know what, we are very close to the spec deadline, so I\u0027ll change it back to be [scheduler]/max_implicit_sth \u003d 0 and let\u0027s discuss on it by the implementation.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""},{"line_number":266,"context_line":"  [workarounds]"},{"line_number":267,"context_line":"  test_numa_hosts_for_agnostic_instances \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":".. note::"},{"line_number":270,"context_line":"   This option is set to True in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_ab6a987e","line":267,"range":{"start_line":267,"start_character":2,"end_line":267,"end_character":75},"in_reply_to":"3fa7e38b_97510b34","updated":"2020-02-13 13:56:05.000000000","message":"\u003e this is not quite correct.\n \u003e \n \u003e this was ment to be\n \u003e [scheduler]/max_implicit_numa_nodes where setting it to 0 was how\n \u003e you disabled the progressive splitting.\n \u003e \n \u003e this option would continue to exist until placement meant could\n \u003e support a similar feature.\n\nWhat would be the maximum value for this from an operator question ?\n\nSay you have a flavor with 16 vCPUS, should the max limit to be 16 ?","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"9bad72972635d7b234327e68b87058f68021ff50","unresolved":false,"context_lines":[{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""},{"line_number":266,"context_line":"  [workarounds]"},{"line_number":267,"context_line":"  test_numa_hosts_for_agnostic_instances \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":".. note::"},{"line_number":270,"context_line":"   This option is set to True in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_7860f49e","line":267,"range":{"start_line":267,"start_character":2,"end_line":267,"end_character":75},"in_reply_to":"3fa7e38b_97510b34","updated":"2020-02-13 12:40:24.000000000","message":"also the other reason we want this to not be a workaround is only libvirt will support numa in placment initally\n\nso if you are deploying with any other virt driver we will want [scheduler]/max_implicit_numa_nodes\u003d0 to be the default so on a powervm cloud we dont query placment for things that wont exisit or an ironic cloud.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":264,"context_line":".. code::"},{"line_number":265,"context_line":""},{"line_number":266,"context_line":"  [workarounds]"},{"line_number":267,"context_line":"  test_numa_hosts_for_agnostic_instances \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":268,"context_line":""},{"line_number":269,"context_line":".. note::"},{"line_number":270,"context_line":"   This option is set to True in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_49f52c51","line":267,"range":{"start_line":267,"start_character":2,"end_line":267,"end_character":75},"in_reply_to":"3fa7e38b_ab6a987e","updated":"2020-02-13 16:19:54.000000000","message":"You mean when we document guidelines for how the operator should set this? I don\u0027t think the operator should bother thinking about their flavors to decide, since the algorithm will skip any attempts that would put 0 CPUs on any NUMA node.\n\nI think there are two considerations. The first is simple: it should never be higher than the max number of NUMA nodes on any host in your cloud. If it were, those extra queries would always yield zero results, but you still incurred the expense of doing them.\n\nThe second is a bit fuzzier: weigh scheduling performance (number of queries) against \"desire to fit\". If the latter is paramount, an arbitrarily large number is best.\n\n(Realistically, I don\u0027t think the extra placement queries are likely to be noticeably expensive; but feeding lots of extra candidates into the filters/weighers might be.)","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aa897430bf2e1d1a1b9287192c2e270735b8d18a","unresolved":false,"context_lines":[{"line_number":293,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":294,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":295,"context_line":""},{"line_number":296,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":297,"context_line":"  \u0026required\u003d!HW_NUMA_ROOT"},{"line_number":298,"context_line":""},{"line_number":299,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":300,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_f704ff1a","line":297,"range":{"start_line":296,"start_character":2,"end_line":297,"end_character":25},"updated":"2020-02-12 20:34:54.000000000","message":"this would be the fallback query and would be the only one we do if you set if you set [scheduler]/max_implicit_numa_node\u003d0\n\notherwise we woudl addtionally query with 1-N numa nodes\nwhere n is minium of the flavor.vcpus and the config option.\n\nwe do not need to detail the exact details fo how the progressive spliting will be done but we should mention it here.\n\nif you want you can refer to it and put it in a seperate section later","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":293,"context_line":"For flavors just asking for, say, vCPUs and memory without asking them to be"},{"line_number":294,"context_line":"NUMA-aware, then the Placement allocations candidates call would be simple::"},{"line_number":295,"context_line":""},{"line_number":296,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":297,"context_line":"  \u0026required\u003d!HW_NUMA_ROOT"},{"line_number":298,"context_line":""},{"line_number":299,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":300,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_ebb090fe","line":297,"range":{"start_line":296,"start_character":2,"end_line":297,"end_character":25},"in_reply_to":"3fa7e38b_f704ff1a","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":15334,"name":"Stephen Finucane","display_name":"stephenfin","email":"stephenfin@redhat.com","username":"sfinucan"},"change_message_id":"ead047ea7b827e3a9e464cb1916ebe5a9626322d","unresolved":false,"context_lines":[{"line_number":484,"context_line":"This alternative proposal has largely already been discussed in a"},{"line_number":485,"context_line":"spec but the outcome consensus was that it was very"},{"line_number":486,"context_line":"difficult to implement and potentially not worth the difficulty."},{"line_number":487,"context_line":""},{"line_number":488,"context_line":"Data model impact"},{"line_number":489,"context_line":"-----------------"},{"line_number":490,"context_line":"None"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_9dde050d","line":487,"updated":"2020-02-13 10:05:07.000000000","message":"What about the option where NUMA reporting was something you opted in or out of, and booting instances on a host with a NUMA topology would automatically enable an implicit NUMA topology and those with an explicit NUMA topology would be restricted to hosts with either NUMA reporting enabled or, as a fallback, without it configured? We\u0027ve discussed this before and I think we decided it would be a bad idea but I don\u0027t recall why...","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":484,"context_line":"This alternative proposal has largely already been discussed in a"},{"line_number":485,"context_line":"spec but the outcome consensus was that it was very"},{"line_number":486,"context_line":"difficult to implement and potentially not worth the difficulty."},{"line_number":487,"context_line":""},{"line_number":488,"context_line":"Data model impact"},{"line_number":489,"context_line":"-----------------"},{"line_number":490,"context_line":"None"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_c90b5c2a","line":487,"in_reply_to":"3fa7e38b_9dde050d","updated":"2020-02-13 16:19:54.000000000","message":"\u003e What about the option where NUMA reporting was something you opted\n \u003e in or out of, and booting instances on a host with a NUMA topology\n \u003e would automatically enable an implicit NUMA topology\n\nIn this case you still have to decide what that topology will be. Is it implicitly hw:numa_nodes\u003d1? Then we have the fitting problem.\n\n \u003e and those with\n \u003e an explicit NUMA topology would be restricted to hosts with either\n \u003e NUMA reporting enabled\n\nThis was going to break pack/spread and server affinity. Maybe that\u0027s acceptable.\n\n \u003e or, as a fallback, without it configured?\n\nI don\u0027t understand this part.\n\nTL;DR we ended up where we are for two reasons:\n- Dealing with a partially-upgraded cloud, we need to be able to land both NUMA-aware and NUMA-agnostic flavors without impacting pack/spread and server affinity. We\u0027re compromising a little on fittability with the \"even split\" algorithm.\n- If we make the cutover opt-in only, nobody\u0027s going to do it; there would be no reason. (Citing the PCPU thing as a precedent isn\u0027t valid: PCPUs offer a tangible benefit to make operators *want* to cut over.)\n\nThis is explained in a bit more detail here: https://review.opendev.org/#/c/552924/17/specs/ussuri/approved/numa-topology-with-rps.rst@516","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":484,"context_line":"This alternative proposal has largely already been discussed in a"},{"line_number":485,"context_line":"spec but the outcome consensus was that it was very"},{"line_number":486,"context_line":"difficult to implement and potentially not worth the difficulty."},{"line_number":487,"context_line":""},{"line_number":488,"context_line":"Data model impact"},{"line_number":489,"context_line":"-----------------"},{"line_number":490,"context_line":"None"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_4b6f6460","line":487,"in_reply_to":"3fa7e38b_9dde050d","updated":"2020-02-13 13:56:05.000000000","message":"\u003e What about the option where NUMA reporting was something you opted\n \u003e in or out of, and booting instances on a host with a NUMA topology\n \u003e would automatically enable an implicit NUMA topology and those with\n \u003e an explicit NUMA topology would be restricted to hosts with either\n \u003e NUMA reporting enabled or, as a fallback, without it configured?\n \u003e We\u0027ve discussed this before and I think we decided it would be a\n \u003e bad idea but I don\u0027t recall why...\n\nLet\u0027s discuss this as an implementation detail.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aa897430bf2e1d1a1b9287192c2e270735b8d18a","unresolved":false,"context_lines":[{"line_number":511,"context_line":"Performance Impact"},{"line_number":512,"context_line":"------------------"},{"line_number":513,"context_line":""},{"line_number":514,"context_line":"Only when changing the configuration option, a reshape is done."},{"line_number":515,"context_line":""},{"line_number":516,"context_line":"Other deployer impact"},{"line_number":517,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_321dc560","line":514,"updated":"2020-02-12 20:34:54.000000000","message":"the progressive spliting may have a perfomance impact on schduling so we will have the [scheduler]/max_implicit_numa_nodes to contol the behavior.\n\nthe numa toplogy filter is one of the most expeincive filster we run so in general this shoudl still be a performace win but its proably worth mentioning.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":511,"context_line":"Performance Impact"},{"line_number":512,"context_line":"------------------"},{"line_number":513,"context_line":""},{"line_number":514,"context_line":"Only when changing the configuration option, a reshape is done."},{"line_number":515,"context_line":""},{"line_number":516,"context_line":"Other deployer impact"},{"line_number":517,"context_line":"---------------------"}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_6b910050","line":514,"in_reply_to":"3fa7e38b_321dc560","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"aa897430bf2e1d1a1b9287192c2e270735b8d18a","unresolved":false,"context_lines":[{"line_number":550,"context_line":"- query Placement \u0027old-style\u0027 by only asking for CPU and RAM resources without"},{"line_number":551,"context_line":"  asking for specific NUMA bits."},{"line_number":552,"context_line":""},{"line_number":553,"context_line":"- query Placement \u0027NUMA-like\u0027 by asking whether NUMA-aware hosts can fit the"},{"line_number":554,"context_line":"  request, including for instances that *aren\u0027t* asking for NUMA specifics."},{"line_number":555,"context_line":"  That\u0027s the reason why we introduced the"},{"line_number":556,"context_line":"  ``test_numa_hosts_for_agnostic_instances`` option that will basically"},{"line_number":557,"context_line":"  allow Nova to query up to N times Placement with N calls, iteratively asking"},{"line_number":558,"context_line":"  for N NUMA nodes then N-1 up to 1 single node, where N is the number of"},{"line_number":559,"context_line":"  vCPUs asked."},{"line_number":560,"context_line":""},{"line_number":561,"context_line":"All the resulting allocation candidates will be merged and passed to the"},{"line_number":562,"context_line":"scheduler filters and weighers so a decision can be made accordingly."}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_d23c71c1","line":559,"range":{"start_line":553,"start_character":2,"end_line":559,"end_character":14},"updated":"2020-02-12 20:34:54.000000000","message":"it would be good to intoduced this earlier.\nas i said test_numa_hosts_for_agnostic_instances\nshould be an int field called  [scheduler]/max_implicit_numa_nodes\n\nnot a bool as this will likely continue for several cycle unless can_split is added or a simlar feature to do this in placement.","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"7e2c54abb3a2b1ca43a4ad5a317cbe3a3a1e97d8","unresolved":false,"context_lines":[{"line_number":550,"context_line":"- query Placement \u0027old-style\u0027 by only asking for CPU and RAM resources without"},{"line_number":551,"context_line":"  asking for specific NUMA bits."},{"line_number":552,"context_line":""},{"line_number":553,"context_line":"- query Placement \u0027NUMA-like\u0027 by asking whether NUMA-aware hosts can fit the"},{"line_number":554,"context_line":"  request, including for instances that *aren\u0027t* asking for NUMA specifics."},{"line_number":555,"context_line":"  That\u0027s the reason why we introduced the"},{"line_number":556,"context_line":"  ``test_numa_hosts_for_agnostic_instances`` option that will basically"},{"line_number":557,"context_line":"  allow Nova to query up to N times Placement with N calls, iteratively asking"},{"line_number":558,"context_line":"  for N NUMA nodes then N-1 up to 1 single node, where N is the number of"},{"line_number":559,"context_line":"  vCPUs asked."},{"line_number":560,"context_line":""},{"line_number":561,"context_line":"All the resulting allocation candidates will be merged and passed to the"},{"line_number":562,"context_line":"scheduler filters and weighers so a decision can be made accordingly."}],"source_content_type":"text/x-rst","patch_set":19,"id":"3fa7e38b_abc61868","line":559,"range":{"start_line":553,"start_character":2,"end_line":559,"end_character":14},"in_reply_to":"3fa7e38b_d23c71c1","updated":"2020-02-13 13:56:05.000000000","message":"Done","commit_id":"565191f33536b6c6c2882fd37ad25323a6b28c78"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [workarounds]"},{"line_number":235,"context_line":"  disable_placement_numa_reporting \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":20,"id":"3fa7e38b_e478bd7d","line":235,"range":{"start_line":235,"start_character":53,"end_line":235,"end_character":57},"updated":"2020-02-13 16:19:54.000000000","message":"No, default to False. We want to enable reshaping by default.","commit_id":"4dba90a39488eb6094c862403b01defdab85c6d4"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"642b95acc3b7a8b673361a5a1a92add5ef4bbcfd","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [workarounds]"},{"line_number":235,"context_line":"  disable_placement_numa_reporting \u003d \u003cbool\u003e (default True for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":20,"id":"3fa7e38b_84ff8933","line":235,"range":{"start_line":235,"start_character":53,"end_line":235,"end_character":57},"in_reply_to":"3fa7e38b_e478bd7d","updated":"2020-02-13 16:38:08.000000000","message":"for the record I\u0027m fine both ways.","commit_id":"4dba90a39488eb6094c862403b01defdab85c6d4"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8124556605db991ec500d17c59ff2f9d2a749044","unresolved":false,"context_lines":[{"line_number":266,"context_line":".. code::"},{"line_number":267,"context_line":""},{"line_number":268,"context_line":"  [scheduler]"},{"line_number":269,"context_line":"  max_implicit_numa_nodes \u003d \u003cint\u003e (default 0 for Ussuri)"},{"line_number":270,"context_line":""},{"line_number":271,"context_line":".. note::"},{"line_number":272,"context_line":"   This option is set to 0 in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":20,"id":"3fa7e38b_c49ca1bf","line":269,"range":{"start_line":269,"start_character":34,"end_line":269,"end_character":56},"updated":"2020-02-13 16:19:54.000000000","message":"I think we need to default to a number that\u0027s *least* likely to result in regressive behavior. Zero means NUMA-agnostic flavors will *never* land on reshaped hosts. That\u0027s bad, especially if we\u0027re reshaping by default.\n\nAs Sean notes, performance should be a net gain even if we do a zillion placement queries because it\u0027s the filters and retries that are expensive.\n\nSo IMO we should make this \"large\" by default. We could even make it \"unlimited\" -- the algorithm would max it out at the number of VCPUs in the flavor. But I say at least 4.","commit_id":"4dba90a39488eb6094c862403b01defdab85c6d4"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"3ec63c24c3a1680f0dc0dd7c486c397504f9686e","unresolved":false,"context_lines":[{"line_number":266,"context_line":".. code::"},{"line_number":267,"context_line":""},{"line_number":268,"context_line":"  [scheduler]"},{"line_number":269,"context_line":"  max_implicit_numa_nodes \u003d \u003cint\u003e (default 0 for Ussuri)"},{"line_number":270,"context_line":""},{"line_number":271,"context_line":".. note::"},{"line_number":272,"context_line":"   This option is set to 0 in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":20,"id":"3fa7e38b_f40384bb","line":269,"range":{"start_line":269,"start_character":34,"end_line":269,"end_character":56},"in_reply_to":"3fa7e38b_6485adaa","updated":"2020-02-13 22:04:51.000000000","message":"4 was also the number i was thiniking of if we enabeld the spliting by default for what its worth.\n\nthere are number of reasons for this but its the number i cam back to if we did not set it to 0","commit_id":"4dba90a39488eb6094c862403b01defdab85c6d4"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"642b95acc3b7a8b673361a5a1a92add5ef4bbcfd","unresolved":false,"context_lines":[{"line_number":266,"context_line":".. code::"},{"line_number":267,"context_line":""},{"line_number":268,"context_line":"  [scheduler]"},{"line_number":269,"context_line":"  max_implicit_numa_nodes \u003d \u003cint\u003e (default 0 for Ussuri)"},{"line_number":270,"context_line":""},{"line_number":271,"context_line":".. note::"},{"line_number":272,"context_line":"   This option is set to 0 in Ussuri for upgrade reasons where operators"}],"source_content_type":"text/x-rst","patch_set":20,"id":"3fa7e38b_6485adaa","line":269,"range":{"start_line":269,"start_character":34,"end_line":269,"end_character":56},"in_reply_to":"3fa7e38b_c49ca1bf","updated":"2020-02-13 16:38:08.000000000","message":"4 works for me.","commit_id":"4dba90a39488eb6094c862403b01defdab85c6d4"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [compute]"},{"line_number":235,"context_line":"  enable_numa_reporting_to_placement \u003d \u003cbool\u003e (default None for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_837a1cec","line":235,"range":{"start_line":235,"start_character":2,"end_line":235,"end_character":36},"updated":"2020-02-13 20:06:22.000000000","message":"It needs to be made clear which kinds of flavors would be able to land on which kinds of hosts.\n\n                     ``enable_numa_reporting_to_placement``:\n                         None (or pre-U)  False   True\n NUMA-aware flavor:        yes              no     yes\n NUMA-agnostic flavor:     yes             yes      no\n\nEven better would be an explanation of which queries are invoked in each case, and why the result is what it is. For example\n\n- NUMA-aware flavor lands on `True` due to the complex query with granular groups, affinity (same_subtree), and HW_NUMA_ROOT.\n- NUMA-aware flavor lands on `None/pre-U` due to the \u0027fallback query\u0027.\n- NUMA-agnostic flavor lands on `None/pre-U` and `False` but not on `True` because we did a simple resource query but added !HW_NUMA_ROOT. (This is what you describe in the section starting on L274.)\n\nHowever:\n\n- NUMA-aware flavor doesn\u0027t land on `False` because... why? Without some additional design element, the \u0027fallback query\u0027 would hit such a host, right?","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"2c2e47d466c43a245f2fac256dc33f9f61d009c9","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [compute]"},{"line_number":235,"context_line":"  enable_numa_reporting_to_placement \u003d \u003cbool\u003e (default None for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_2f69c921","line":235,"range":{"start_line":235,"start_character":2,"end_line":235,"end_character":36},"in_reply_to":"3fa7e38b_34cd9c5a","updated":"2020-02-14 00:27:58.000000000","message":"ya so we cant do that with placment unless we alter the query. e.g. we use group_policy\u003disolate and put the memroy and cpus in different groups. in that case the numa instance will not be able to land on the non numa host since on a non numa host the ram and cpus would be in the root RP. but i dont think that is what we want.\n\nyou could expcitly add a NON_NUMA tratit to the compute node RP when set to false i guess. that avoid the need for group_policy\u003disolate which i think we should not do.\ngiven that we want to move to alwasy reporting numa eventually i think the COMPUTE_NON_NUMA is a non starter.\n\nso the only viable option i can think of is adding all compute nodes to a placment aggreate and useing member_of\u003d!\u003chard codded uuid5 for non numa hosts\u003e which would work and would not have negitive side effects if we really need something explcit. grouping a random set of RP based on an arbitary reason the client cares about is why aggreates exist in placment os use a forbiden aggreate with the numa aware query seam to make sense.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"34dd0ddea9d2d1e2c37caa809e8cdcc95bd24d7c","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [compute]"},{"line_number":235,"context_line":"  enable_numa_reporting_to_placement \u003d \u003cbool\u003e (default None for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_34cd9c5a","line":235,"range":{"start_line":235,"start_character":2,"end_line":235,"end_character":36},"in_reply_to":"3fa7e38b_74c6b46b","updated":"2020-02-13 22:22:14.000000000","message":"\u003e in this case we are relying on filters to prevent that.\n\nEh? What filter would prevent that? Not a filter that exists today.\n\n \u003e specificly we are relying on operators having configured either\n \u003e host aggrates for numa flavors or compute capablities.\n \u003e \n \u003e and failing that we are relying on the numa toplogy filter\n \u003e determine if the host could fit the vm.\n \u003e so basically we are relying on them having configure there cloud to\n \u003e handle this case as they would do today/pre-U.\n\nYeah, none of that is good enough. We need something explicit to enforce the segregation that is the crux of this design.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"0593c7cd00f31893b47fe7d86a05079fcf0af22c","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [compute]"},{"line_number":235,"context_line":"  enable_numa_reporting_to_placement \u003d \u003cbool\u003e (default None for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_fdb7e4cf","line":235,"range":{"start_line":235,"start_character":2,"end_line":235,"end_character":36},"in_reply_to":"3fa7e38b_837a1cec","updated":"2020-02-14 13:48:16.000000000","message":"\u003e However:\n \u003e \n \u003e - NUMA-aware flavor doesn\u0027t land on `False` because... why? Without\n \u003e some additional design element, the \u0027fallback query\u0027 would hit such\n \u003e a host, right?\n\n\nYou missed a crucial point when we agreed between Stephen and I : we don\u0027t plan to allow NUMA-aware flavors to land on *intentionally* NUMA-disabled hosts.\n\nThe whole purpose of the failback mechanism was to allow a transition period in Ussuri but now that we have the None value for it, it solves the problem without needing a fallback system.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"9644a48cc4261004e9db8919a07fe7eba8418026","unresolved":false,"context_lines":[{"line_number":232,"context_line":".. code::"},{"line_number":233,"context_line":""},{"line_number":234,"context_line":"  [compute]"},{"line_number":235,"context_line":"  enable_numa_reporting_to_placement \u003d \u003cbool\u003e (default None for Ussuri)"},{"line_number":236,"context_line":""},{"line_number":237,"context_line":""},{"line_number":238,"context_line":"For below, we will tell hosts as \"NUMA-aware\" ones that have this option be"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_74c6b46b","line":235,"range":{"start_line":235,"start_character":2,"end_line":235,"end_character":36},"in_reply_to":"3fa7e38b_837a1cec","updated":"2020-02-13 22:17:11.000000000","message":"yes it would.\nin this case we are relying on filters to prevent that.\n\nspecificly we are relying on operators having configured either host aggrates for numa flavors or compute capablities.\n\nand failing that we are relying on the numa toplogy filter determine if the host could fit the vm.\nso basically we are relying on them having configure there cloud to handle this case as they would do today/pre-U.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":246,"context_line":"   which hosts they want to support NUMA-aware instances and which should be"},{"line_number":247,"context_line":"   dedicated for \u0027non-NUMA-aware\u0027 instances. A `nova-status pre-upgrade check`"},{"line_number":248,"context_line":"   command will be provided that will warn them to decide before upgrading to"},{"line_number":249,"context_line":"   Victoria, where the default value will change to ``False``."},{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_83c8dc1f","line":249,"range":{"start_line":249,"start_character":23,"end_line":249,"end_character":61},"updated":"2020-02-13 20:06:22.000000000","message":"Hum, I would think there would be no default; we would simply start disallowing ``None``/unspecified to force the operator to set it.\n\nThat\u0027s a decision we don\u0027t have to make right now, though.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"0593c7cd00f31893b47fe7d86a05079fcf0af22c","unresolved":false,"context_lines":[{"line_number":246,"context_line":"   which hosts they want to support NUMA-aware instances and which should be"},{"line_number":247,"context_line":"   dedicated for \u0027non-NUMA-aware\u0027 instances. A `nova-status pre-upgrade check`"},{"line_number":248,"context_line":"   command will be provided that will warn them to decide before upgrading to"},{"line_number":249,"context_line":"   Victoria, where the default value will change to ``False``."},{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_bd12ccb1","line":249,"range":{"start_line":249,"start_character":23,"end_line":249,"end_character":61},"in_reply_to":"3fa7e38b_748b1421","updated":"2020-02-14 13:48:16.000000000","message":"Done","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"9644a48cc4261004e9db8919a07fe7eba8418026","unresolved":false,"context_lines":[{"line_number":246,"context_line":"   which hosts they want to support NUMA-aware instances and which should be"},{"line_number":247,"context_line":"   dedicated for \u0027non-NUMA-aware\u0027 instances. A `nova-status pre-upgrade check`"},{"line_number":248,"context_line":"   command will be provided that will warn them to decide before upgrading to"},{"line_number":249,"context_line":"   Victoria, where the default value will change to ``False``."},{"line_number":250,"context_line":""},{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_748b1421","line":249,"range":{"start_line":249,"start_character":23,"end_line":249,"end_character":61},"in_reply_to":"3fa7e38b_83c8dc1f","updated":"2020-02-13 22:17:11.000000000","message":"that would have the same effect i guess. and ya we can decide that next cycle.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":253,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":254,"context_line":"   explaining that it would be a performance hit when changing the value."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":".. note::"},{"line_number":257,"context_line":"   Since we allow a transition period for helping the operators to decide, we"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_06fabab9","line":254,"range":{"start_line":254,"start_character":33,"end_line":254,"end_character":48},"updated":"2020-02-13 20:06:22.000000000","message":"I\u0027ll say again, I don\u0027t believe there will be a noticeable delay. We can revisit this in the implementation, but we should have some actual evidence before we document this.\n\nHowever, we *should* document that, once you set the value, there\u0027s no going back (without major disruption).\n\nThinking about that, though: is there a reason to allow or disallow changing from False to True? Would it even be possible for us to disallow that if we wanted to? I guess it depends how we address the issue noted above: how do we tell the difference between pre-U, `None`, and `False` in the first place?","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"9644a48cc4261004e9db8919a07fe7eba8418026","unresolved":false,"context_lines":[{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":253,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":254,"context_line":"   explaining that it would be a performance hit when changing the value."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":".. note::"},{"line_number":257,"context_line":"   Since we allow a transition period for helping the operators to decide, we"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_b47e6c17","line":254,"range":{"start_line":254,"start_character":33,"end_line":254,"end_character":48},"in_reply_to":"3fa7e38b_06fabab9","updated":"2020-02-13 22:17:11.000000000","message":"in this case sylvain is refering the one time start up penalty to do the inital reshape i think. rather then an ongoing performace hit.\n\ne.g. the first time the agent start and does the reshape it will take longer to finish starting up.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"0593c7cd00f31893b47fe7d86a05079fcf0af22c","unresolved":false,"context_lines":[{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":253,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":254,"context_line":"   explaining that it would be a performance hit when changing the value."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":".. note::"},{"line_number":257,"context_line":"   Since we allow a transition period for helping the operators to decide, we"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_9d74b005","line":254,"range":{"start_line":254,"start_character":33,"end_line":254,"end_character":48},"in_reply_to":"3fa7e38b_2f8549ab","updated":"2020-02-14 13:48:16.000000000","message":"I\u0027ll just drop this note which is useless.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"34dd0ddea9d2d1e2c37caa809e8cdcc95bd24d7c","unresolved":false,"context_lines":[{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":253,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":254,"context_line":"   explaining that it would be a performance hit when changing the value."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":".. note::"},{"line_number":257,"context_line":"   Since we allow a transition period for helping the operators to decide, we"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_f404e47e","line":254,"range":{"start_line":254,"start_character":33,"end_line":254,"end_character":48},"in_reply_to":"3fa7e38b_b47e6c17","updated":"2020-02-13 22:22:14.000000000","message":"\u003e in this case sylvain is refering the one time start up penalty to\n \u003e do the inital reshape i think. rather then an ongoing performace\n \u003e hit.\n\nYes, I know; I\u0027m saying I don\u0027t think that hit will be noticeable. It\u0027s a single call to the placement API. Worst case it affects, what, hundreds of database records on the placement side?","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"4f2cc11397c2e1f12db8d84530b2dfa1f282bd6e","unresolved":false,"context_lines":[{"line_number":251,"context_line":".. note::"},{"line_number":252,"context_line":"   Since updating this configuration option will create a reshape when"},{"line_number":253,"context_line":"   restarting the compute service, we will provide a configuration help"},{"line_number":254,"context_line":"   explaining that it would be a performance hit when changing the value."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":".. note::"},{"line_number":257,"context_line":"   Since we allow a transition period for helping the operators to decide, we"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_2f8549ab","line":254,"range":{"start_line":254,"start_character":33,"end_line":254,"end_character":48},"in_reply_to":"3fa7e38b_f404e47e","updated":"2020-02-14 00:33:07.000000000","message":"right if the only allcoation we are updating are the allcoation of numa affined instance then it should be relitivly cheap to loop over there instance numa toplogy blobs and determin which RPs to create allcoations against.\n\nif we have a non numa instnace on a numa reporting host then we would have to caluate a numa fiting for it and that could be expensive if we need to retry.\n\nare we sugesting that we just hard error if you enable numa reporting on a host with non numa affined instances.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":279,"context_line":"them on a NUMA-aware host::"},{"line_number":280,"context_line":""},{"line_number":281,"context_line":"    resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":282,"context_line":"    \u0026required\u003d!HW_NUMA_ROOT"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":285,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_83069ccb","line":282,"updated":"2020-02-13 20:06:22.000000000","message":"tbc, this will be able to land on\n- old hosts\n- new hosts with enable_numa_reporting_to_placement \u003d None\n- new hosts with enable_numa_reporting_to_placement \u003d False\n\nright?","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"0593c7cd00f31893b47fe7d86a05079fcf0af22c","unresolved":false,"context_lines":[{"line_number":279,"context_line":"them on a NUMA-aware host::"},{"line_number":280,"context_line":""},{"line_number":281,"context_line":"    resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":282,"context_line":"    \u0026required\u003d!HW_NUMA_ROOT"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":285,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_7d7d34e3","line":282,"in_reply_to":"3fa7e38b_83069ccb","updated":"2020-02-14 13:48:16.000000000","message":"\u003e tbc, this will be able to land on\n \u003e - old hosts\n \u003e - new hosts with enable_numa_reporting_to_placement \u003d None\n \u003e - new hosts with enable_numa_reporting_to_placement \u003d False\n \u003e \n \u003e right?\n\nCorrect, I just added a table in the next revision to clarify what can land on where, as you proposed.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":11604,"name":"sean mooney","email":"smooney@redhat.com","username":"sean-k-mooney"},"change_message_id":"9644a48cc4261004e9db8919a07fe7eba8418026","unresolved":false,"context_lines":[{"line_number":279,"context_line":"them on a NUMA-aware host::"},{"line_number":280,"context_line":""},{"line_number":281,"context_line":"    resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":282,"context_line":"    \u0026required\u003d!HW_NUMA_ROOT"},{"line_number":283,"context_line":""},{"line_number":284,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":285,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_74747437","line":282,"in_reply_to":"3fa7e38b_83069ccb","updated":"2020-02-13 22:17:11.000000000","message":"yes, HW_NUMA_ROOT was not used in train so !HW_NUMA_ROOT will allow all old hosts and false and None since we are not reporting by default in U.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":284,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":285,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"},{"line_number":286,"context_line":"forbidden ``HW_NUMA_ROOT`` trait)."},{"line_number":287,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":288,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":289,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"},{"line_number":290,"context_line":"on NUMA-aware hosts."}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_03faacb9","line":287,"range":{"start_line":287,"start_character":0,"end_line":287,"end_character":31},"updated":"2020-02-13 20:06:22.000000000","message":"well, we\u0027re giving the operator the *opportunity* to shard, but we\u0027re not forcing it, and we\u0027re not doing it by default.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"0593c7cd00f31893b47fe7d86a05079fcf0af22c","unresolved":false,"context_lines":[{"line_number":284,"context_line":"In this case, even if NUMA-aware hosts have enough resources for this query,"},{"line_number":285,"context_line":"the Placement API won\u0027t provide them but only non-NUMA-aware ones (given the"},{"line_number":286,"context_line":"forbidden ``HW_NUMA_ROOT`` trait)."},{"line_number":287,"context_line":"We\u0027re basically sharding clouds between NUMA-aware hosts and non-NUMA-aware"},{"line_number":288,"context_line":"hosts but that\u0027s not really changing the current behaviour as of now where"},{"line_number":289,"context_line":"operators create aggregates to make sure non-NUMA-aware instances can\u0027t land"},{"line_number":290,"context_line":"on NUMA-aware hosts."}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_dd6d4830","line":287,"range":{"start_line":287,"start_character":0,"end_line":287,"end_character":31},"in_reply_to":"3fa7e38b_03faacb9","updated":"2020-02-14 13:48:16.000000000","message":"Done","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":503,"context_line":"---------------------"},{"line_number":504,"context_line":""},{"line_number":505,"context_line":"Operators would want to migrate some instances from hosts to anothers before"},{"line_number":506,"context_line":"enabling NUMA awareness on some of them since they will have to consider"},{"line_number":507,"context_line":"the capacity usage accordingly as they will have to shard their cloud. This"},{"line_number":508,"context_line":"being said, this would only be necessary for clouds that weren\u0027t yet already"},{"line_number":509,"context_line":"dividing NUMA-aware and non-NUMA-aware workloads between hosts thru aggregates."}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_e32c304a","line":506,"range":{"start_line":506,"start_character":0,"end_line":506,"end_character":8},"updated":"2020-02-13 20:06:22.000000000","message":"or explicitly disabling\n\nIOW setting enable_numa_reporting_to_placement to a non-None value","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"6691f3fe646732b6bfb4c07e5437fccf355f6604","unresolved":false,"context_lines":[{"line_number":534,"context_line":"the Ussuri timeframe) where operators can decide which hosts to dedicate to"},{"line_number":535,"context_line":"NUMA-aware workloads. A specific ``nova-status pre-upgrade check`` command"},{"line_number":536,"context_line":"will warn them to do so before upgrading to Victoria."},{"line_number":537,"context_line":""},{"line_number":538,"context_line":""},{"line_number":539,"context_line":"Implementation"},{"line_number":540,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_86ad4aa5","line":537,"updated":"2020-02-13 20:06:22.000000000","message":"Okay, somewhere along the way we lost the explanation of the \u0027fallback query\u0027. that needs to be added back in somewhere.\n\nIIUC with the proposed design it would work like this:\n\n- For NUMA-agnostic flavors (no hw:numa*-isms) we do a single query, non-granular, with !HW_NUMA_ROOT, as described in the section starting on L274. This can land on pre-U, `None`, and `False` hosts, which will behave exactly as today.\n- For NUMA-aware flavors (containing hw:numa*-isms), we will do two queries [1]: The first with granular groups, affinity, and HW_NUMA_ROOT as described in the section starting on L296 will hit `True` (reshaped) hosts. The second will look like the NUMA-agnostic query to hit pre-U and `None` hosts [2].\n\n[1] We have to decide whether we a) always do both and merge the results; or b) do one (which one?) and then only do the second if the first one yields no results. The latter would be likely to violate pack/spread and affinity groups, so my preference is for the former.\n[2] Except, as noted in the comment on L235, that will also hit `False` hosts, so we need to add some design element to prevent that.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"0593c7cd00f31893b47fe7d86a05079fcf0af22c","unresolved":false,"context_lines":[{"line_number":534,"context_line":"the Ussuri timeframe) where operators can decide which hosts to dedicate to"},{"line_number":535,"context_line":"NUMA-aware workloads. A specific ``nova-status pre-upgrade check`` command"},{"line_number":536,"context_line":"will warn them to do so before upgrading to Victoria."},{"line_number":537,"context_line":""},{"line_number":538,"context_line":""},{"line_number":539,"context_line":"Implementation"},{"line_number":540,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_dd46a8a8","line":537,"in_reply_to":"3fa7e38b_86ad4aa5","updated":"2020-02-14 13:48:16.000000000","message":"\u003e Okay, somewhere along the way we lost the explanation of the\n \u003e \u0027fallback query\u0027. that needs to be added back in somewhere.\n \u003e \n \u003e IIUC with the proposed design it would work like this:\n \u003e \n \u003e - For NUMA-agnostic flavors (no hw:numa*-isms) we do a single\n \u003e query, non-granular, with !HW_NUMA_ROOT, as described in the\n \u003e section starting on L274. This can land on pre-U, `None`, and\n \u003e `False` hosts, which will behave exactly as today.\n \u003e - For NUMA-aware flavors (containing hw:numa*-isms), we will do two\n \u003e queries [1]: The first with granular groups, affinity, and\n \u003e HW_NUMA_ROOT as described in the section starting on L296 will hit\n \u003e `True` (reshaped) hosts. The second will look like the\n \u003e NUMA-agnostic query to hit pre-U and `None` hosts [2].\n \u003e \n \u003e [1] We have to decide whether we a) always do both and merge the\n \u003e results; or b) do one (which one?) and then only do the second if\n \u003e the first one yields no results. The latter would be likely to\n \u003e violate pack/spread and affinity groups, so my preference is for\n \u003e the former.\n \u003e [2] Except, as noted in the comment on L235, that will also hit\n \u003e `False` hosts, so we need to add some design element to prevent\n \u003e that.\n\nAgain, I clarified this in the next revision, but I\u0027m also clear that we won\u0027t provide a fallback mechanism given we now have the None value.","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"d62f977ce35b06f2cd32da626592a90e3dcd6770","unresolved":false,"context_lines":[{"line_number":534,"context_line":"the Ussuri timeframe) where operators can decide which hosts to dedicate to"},{"line_number":535,"context_line":"NUMA-aware workloads. A specific ``nova-status pre-upgrade check`` command"},{"line_number":536,"context_line":"will warn them to do so before upgrading to Victoria."},{"line_number":537,"context_line":""},{"line_number":538,"context_line":""},{"line_number":539,"context_line":"Implementation"},{"line_number":540,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":21,"id":"3fa7e38b_920016b3","line":537,"in_reply_to":"3fa7e38b_dd46a8a8","updated":"2020-02-14 16:01:36.000000000","message":"As discussed in IRC [1], we *do* need the fallback query for NUMA-aware flavors, because otherwise they will never land on `None` hosts. But as designed, if we do the fallback query, NUMA-aware flavors will *also* (incorrectly) land on `False` hosts.\n\nSo we agreed to mark `False` hosts with some kind of HW_NO_NUMA trait, which we would forbid in the fallback query for NUMA-aware flavors.\n\nOnce all computes are upgraded to V (or wherever the conf opt is mandatory), the trait would no longer be necessary, so we could rip it out if we wanted to.\n\n[1] http://eavesdrop.openstack.org/irclogs/%23openstack-nova/%23openstack-nova.2020-02-14.log.html#t2020-02-14T15:44:18","commit_id":"69ed0a4508017184daf56c7b00f70ca2ee072d36"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8e17f461856ff6e45e7212314926904e5a84482d","unresolved":false,"context_lines":[{"line_number":254,"context_line":"while ``No`` means that no allocation candidates will be returned."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":"In order to distinghish compute nodes that have the ``False`` value instead of"},{"line_number":257,"context_line":"``None``, we will decorate them with a specific trait name ``HW_NON_NUMA``."},{"line_number":258,"context_line":"Accordingly, we will query Placement by adding this forbidden trait for *not*"},{"line_number":259,"context_line":"getting nodes that operators explicitly don\u0027t want them to support NUMA-aware"},{"line_number":260,"context_line":"flavors."}],"source_content_type":"text/x-rst","patch_set":23,"id":"3fa7e38b_7551c40b","line":257,"range":{"start_line":257,"start_character":27,"end_line":257,"end_character":31},"updated":"2020-02-14 17:04:47.000000000","message":"the former","commit_id":"1a5fb1f0f442cbb8fb1a2914e0d55f25ed05b7a8"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"51b5541b67e2d2718421088fcbd64991d00b36b2","unresolved":false,"context_lines":[{"line_number":254,"context_line":"while ``No`` means that no allocation candidates will be returned."},{"line_number":255,"context_line":""},{"line_number":256,"context_line":"In order to distinghish compute nodes that have the ``False`` value instead of"},{"line_number":257,"context_line":"``None``, we will decorate them with a specific trait name ``HW_NON_NUMA``."},{"line_number":258,"context_line":"Accordingly, we will query Placement by adding this forbidden trait for *not*"},{"line_number":259,"context_line":"getting nodes that operators explicitly don\u0027t want them to support NUMA-aware"},{"line_number":260,"context_line":"flavors."}],"source_content_type":"text/x-rst","patch_set":23,"id":"3fa7e38b_55ada8a1","line":257,"range":{"start_line":257,"start_character":27,"end_line":257,"end_character":31},"in_reply_to":"3fa7e38b_7551c40b","updated":"2020-02-17 10:38:45.000000000","message":"Done","commit_id":"1a5fb1f0f442cbb8fb1a2914e0d55f25ed05b7a8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8e17f461856ff6e45e7212314926904e5a84482d","unresolved":false,"context_lines":[{"line_number":474,"context_line":"want to accept NUMA-aware flavors to land on hosts that have the"},{"line_number":475,"context_line":"``enable_numa_reporting_to_placement`` option set to ``None``. Since we can\u0027t"},{"line_number":476,"context_line":"yet build a ``OR`` query for allocation candidates, we propose to make a"},{"line_number":477,"context_line":"second call to Placement if the request for a NUMA-aware instance can\u0027t be"},{"line_number":478,"context_line":"satisfied. In this specific call (we name it a fallback call), we want to get"},{"line_number":479,"context_line":"all non-reshaped nodes that are *not* explicitly said to not support NUMA."},{"line_number":480,"context_line":"In this case, the request is fairly trivial since we decorated them with the"},{"line_number":481,"context_line":"``HW_NON_NUMA`` trait::"}],"source_content_type":"text/x-rst","patch_set":23,"id":"3fa7e38b_35d34c65","line":478,"range":{"start_line":477,"start_character":25,"end_line":478,"end_character":9},"updated":"2020-02-14 17:04:47.000000000","message":"Still open for debate whether we should do it this way, or always do both queries and merge the results. Let\u0027s mention both possibilities and say we\u0027ll argue about it at impl time:\n\n...second call to Placement, as described below. There are two options:\n\n* Only do the second call if the first yields no results. This is more efficient; but since it will favor reshaped hosts, it may violate pack/spread or server affinity.\n* Do both calls always, merge the results, and allow the filters/weighers to pick. This is less efficient, but won\u0027t regress pack/spread or server affinity.\n\nWe can debate this further at implementation time.","commit_id":"1a5fb1f0f442cbb8fb1a2914e0d55f25ed05b7a8"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"51b5541b67e2d2718421088fcbd64991d00b36b2","unresolved":false,"context_lines":[{"line_number":474,"context_line":"want to accept NUMA-aware flavors to land on hosts that have the"},{"line_number":475,"context_line":"``enable_numa_reporting_to_placement`` option set to ``None``. Since we can\u0027t"},{"line_number":476,"context_line":"yet build a ``OR`` query for allocation candidates, we propose to make a"},{"line_number":477,"context_line":"second call to Placement if the request for a NUMA-aware instance can\u0027t be"},{"line_number":478,"context_line":"satisfied. In this specific call (we name it a fallback call), we want to get"},{"line_number":479,"context_line":"all non-reshaped nodes that are *not* explicitly said to not support NUMA."},{"line_number":480,"context_line":"In this case, the request is fairly trivial since we decorated them with the"},{"line_number":481,"context_line":"``HW_NON_NUMA`` trait::"}],"source_content_type":"text/x-rst","patch_set":23,"id":"3fa7e38b_55a92875","line":478,"range":{"start_line":477,"start_character":25,"end_line":478,"end_character":9},"in_reply_to":"3fa7e38b_35d34c65","updated":"2020-02-17 10:38:45.000000000","message":"Done","commit_id":"1a5fb1f0f442cbb8fb1a2914e0d55f25ed05b7a8"},{"author":{"_account_id":14070,"name":"Eric Fried","email":"openstack@fried.cc","username":"efried"},"change_message_id":"8e17f461856ff6e45e7212314926904e5a84482d","unresolved":false,"context_lines":[{"line_number":485,"context_line":""},{"line_number":486,"context_line":"Then we would get all compute nodes that have the ``None`` value ("},{"line_number":487,"context_line":"including nodes that are still running the Train release in a rolling upgrade"},{"line_number":488,"context_line":"fashion). We couldn\u0027t get NUMA-aware hosts that are reshaped because the"},{"line_number":489,"context_line":"resource classes for ``VCPU`` and ``MEMORY_MB`` aren\u0027t on the same Resource"},{"line_number":490,"context_line":"Provider."},{"line_number":491,"context_line":""},{"line_number":492,"context_line":"Of course, we would get nodes that could potentially *not* accept the"},{"line_number":493,"context_line":"NUMA-aware flavor but we rely on the ``NUMATopologyFilter`` for not selecting"}],"source_content_type":"text/x-rst","patch_set":23,"id":"3fa7e38b_15465023","line":490,"range":{"start_line":488,"start_character":10,"end_line":490,"end_character":9},"updated":"2020-02-14 17:04:47.000000000","message":"Actually the query as written would still allow that, as we\u0027ve discussed previously. We need to forbid the HW_NUMA_ROOT trait as well for this query:\n\n resources\u003dVCPU:\u003cX\u003e,MEMORY\u003d\u003cY\u003e\n \u0026required\u003d!HW_NON_NUMA,!HW_NUMA_ROOT","commit_id":"1a5fb1f0f442cbb8fb1a2914e0d55f25ed05b7a8"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"51b5541b67e2d2718421088fcbd64991d00b36b2","unresolved":false,"context_lines":[{"line_number":485,"context_line":""},{"line_number":486,"context_line":"Then we would get all compute nodes that have the ``None`` value ("},{"line_number":487,"context_line":"including nodes that are still running the Train release in a rolling upgrade"},{"line_number":488,"context_line":"fashion). We couldn\u0027t get NUMA-aware hosts that are reshaped because the"},{"line_number":489,"context_line":"resource classes for ``VCPU`` and ``MEMORY_MB`` aren\u0027t on the same Resource"},{"line_number":490,"context_line":"Provider."},{"line_number":491,"context_line":""},{"line_number":492,"context_line":"Of course, we would get nodes that could potentially *not* accept the"},{"line_number":493,"context_line":"NUMA-aware flavor but we rely on the ``NUMATopologyFilter`` for not selecting"}],"source_content_type":"text/x-rst","patch_set":23,"id":"3fa7e38b_95d80024","line":490,"range":{"start_line":488,"start_character":10,"end_line":490,"end_character":9},"in_reply_to":"3fa7e38b_15465023","updated":"2020-02-17 10:38:45.000000000","message":"Done","commit_id":"1a5fb1f0f442cbb8fb1a2914e0d55f25ed05b7a8"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"cf572781ccc9bc31b74d068c12544701c1fefd6f","unresolved":false,"context_lines":[{"line_number":164,"context_line":".. note ::"},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"    As we said above, we don\u0027t want to support children PCI devices for Ussuri"},{"line_number":167,"context_line":"    at the moment.Other current children RPs for a root compute node, like ones"},{"line_number":168,"context_line":"    for VGPU resources or bandwidth resources would still have their parent be"},{"line_number":169,"context_line":"    the compute node."},{"line_number":170,"context_line":""}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_07c14281","line":167,"range":{"start_line":167,"start_character":17,"end_line":167,"end_character":18},"updated":"2020-02-15 10:13:17.000000000","message":"nit: missing space","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"51b5541b67e2d2718421088fcbd64991d00b36b2","unresolved":false,"context_lines":[{"line_number":164,"context_line":".. note ::"},{"line_number":165,"context_line":""},{"line_number":166,"context_line":"    As we said above, we don\u0027t want to support children PCI devices for Ussuri"},{"line_number":167,"context_line":"    at the moment.Other current children RPs for a root compute node, like ones"},{"line_number":168,"context_line":"    for VGPU resources or bandwidth resources would still have their parent be"},{"line_number":169,"context_line":"    the compute node."},{"line_number":170,"context_line":""}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_0438569c","line":167,"range":{"start_line":167,"start_character":17,"end_line":167,"end_character":18},"in_reply_to":"3fa7e38b_07c14281","updated":"2020-02-17 10:38:45.000000000","message":"\u003e nit: missing space\n\nDone with FUP.","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"cf572781ccc9bc31b74d068c12544701c1fefd6f","unresolved":false,"context_lines":[{"line_number":480,"context_line":"In this case, the request is fairly trivial since we decorated them with the"},{"line_number":481,"context_line":"``HW_NON_NUMA`` trait::"},{"line_number":482,"context_line":""},{"line_number":483,"context_line":"  resources\u003dVCPU:\u003cX\u003e,MEMORY_MB\u003d\u003cY\u003e"},{"line_number":484,"context_line":"  \u0026required\u003d!HW_NON_NUMA,!HW_NUMA_ROOT"},{"line_number":485,"context_line":""},{"line_number":486,"context_line":"Then we would get all compute nodes that have the ``None`` value ("},{"line_number":487,"context_line":"including nodes that are still running the Train release in a rolling upgrade"}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_673b5699","line":484,"range":{"start_line":483,"start_character":0,"end_line":484,"end_character":38},"updated":"2020-02-15 10:13:17.000000000","message":"Note to myself:\n\n!HW_NON_NUMA: to not land on node that are explicitly reserved for non numa (segregation)\n\n!HW_NUMA_ROOT: to not land on re-shaped numa aware nodes as those should be returned by the original call not the fallback call.\n\nresources: use the old resource syntax as we are targeting old non reshaped computes","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"cf572781ccc9bc31b74d068c12544701c1fefd6f","unresolved":false,"context_lines":[{"line_number":597,"context_line":""},{"line_number":598,"context_line":"Feature Liaison"},{"line_number":599,"context_line":"---------------"},{"line_number":600,"context_line":"None"},{"line_number":601,"context_line":""},{"line_number":602,"context_line":"Work Items"},{"line_number":603,"context_line":"----------"}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_07462223","line":600,"updated":"2020-02-15 10:13:17.000000000","message":"you can put yourself here","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"51b5541b67e2d2718421088fcbd64991d00b36b2","unresolved":false,"context_lines":[{"line_number":597,"context_line":""},{"line_number":598,"context_line":"Feature Liaison"},{"line_number":599,"context_line":"---------------"},{"line_number":600,"context_line":"None"},{"line_number":601,"context_line":""},{"line_number":602,"context_line":"Work Items"},{"line_number":603,"context_line":"----------"}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_645eea79","line":600,"in_reply_to":"3fa7e38b_07462223","updated":"2020-02-17 10:38:45.000000000","message":"\u003e you can put yourself here\n\n\nDone with FUP.","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"},{"author":{"_account_id":9708,"name":"Balazs Gibizer","display_name":"gibi","email":"gibizer@gmail.com","username":"gibi"},"change_message_id":"cf572781ccc9bc31b74d068c12544701c1fefd6f","unresolved":false,"context_lines":[{"line_number":607,"context_line":"* Scheduler translating flavor extra specs for NUMA properties into Placement"},{"line_number":608,"context_line":"  queries"},{"line_number":609,"context_line":"* ``nova-status pre-upgrade check`` command"},{"line_number":610,"context_line":""},{"line_number":611,"context_line":""},{"line_number":612,"context_line":"Dependencies"},{"line_number":613,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_e746661f","line":610,"updated":"2020-02-15 10:13:17.000000000","message":"Do we still need that the NTF works based on allocation candidates instead of based on hosts? I think so as it will  need to select a candidate for the numa aware, numa_nodes\u003d2 case when the candidate allocates from two different numa nodes. I foresee this change (filters works based on allocation candidates) as a significant refactor step during the implementation as i) filter interface needs to be changed, ii) filter scheduler cannot simply collate candidates for the same host using the first of such candidates.","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"},{"author":{"_account_id":7166,"name":"Sylvain Bauza","email":"sbauza@redhat.com","username":"sbauza"},"change_message_id":"51b5541b67e2d2718421088fcbd64991d00b36b2","unresolved":false,"context_lines":[{"line_number":607,"context_line":"* Scheduler translating flavor extra specs for NUMA properties into Placement"},{"line_number":608,"context_line":"  queries"},{"line_number":609,"context_line":"* ``nova-status pre-upgrade check`` command"},{"line_number":610,"context_line":""},{"line_number":611,"context_line":""},{"line_number":612,"context_line":"Dependencies"},{"line_number":613,"context_line":"\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d\u003d"}],"source_content_type":"text/x-rst","patch_set":24,"id":"3fa7e38b_44596e8e","line":610,"in_reply_to":"3fa7e38b_e746661f","updated":"2020-02-17 10:38:45.000000000","message":"\u003e Do we still need that the NTF works based on allocation candidates\n \u003e instead of based on hosts? I think so as it will  need to select a\n \u003e candidate for the numa aware, numa_nodes\u003d2 case when the candidate\n \u003e allocates from two different numa nodes. I foresee this change\n \u003e (filters works based on allocation candidates) as a significant\n \u003e refactor step during the implementation as i) filter interface\n \u003e needs to be changed, ii) filter scheduler cannot simply collate\n \u003e candidates for the same host using the first of such candidates.\n\n\nDone with FUP.","commit_id":"7d320517b494a6134f361df51b1dc04b5e4748a9"}]}
