From 017027bac62eb14824607c1e99f9a2f2582a9258 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 29 Nov 2024 17:11:45 +0000 Subject: [PATCH 01/50] Added manually messages that are not autogenerated. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/templates/Message2.mustache | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ocaml/sdk-gen/csharp/templates/Message2.mustache b/ocaml/sdk-gen/csharp/templates/Message2.mustache index 3dfe4f4503e..b6aa46d5f3e 100644 --- a/ocaml/sdk-gen/csharp/templates/Message2.mustache +++ b/ocaml/sdk-gen/csharp/templates/Message2.mustache @@ -43,6 +43,9 @@ namespace XenAPI LEAF_COALESCE_COMPLETED, LEAF_COALESCE_FAILED, POST_ATTACH_SCAN_FAILED, + WLB_HOST_POWER_OFF, + WLB_HOST_POWER_ON, + WLB_SERVER_TIME_DISCREPANCY, WLB_VM_RELOCATION, {{#message_types}} {{{message_type}}}, @@ -74,6 +77,12 @@ namespace XenAPI return MessageType.LEAF_COALESCE_FAILED; case "POST_ATTACH_SCAN_FAILED": return MessageType.POST_ATTACH_SCAN_FAILED; + case "WLB_HOST_POWER_OFF": + return MessageType.WLB_HOST_POWER_OFF; + case "WLB_HOST_POWER_ON": + return MessageType.WLB_HOST_POWER_ON; + case "WLB_SERVER_TIME_DISCREPANCY": + return MessageType.WLB_SERVER_TIME_DISCREPANCY; case "WLB_VM_RELOCATION": return MessageType.WLB_VM_RELOCATION; {{#message_types}} From 039f61fdf3965fa8123328273fd7d9dd40326fc5 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 29 Nov 2024 17:12:07 +0000 Subject: [PATCH 02/50] Docs tidy up: - Moved notes on error structure to the Wire protocol. - Moved duplicate file basics.md and wire-protocol.md from ocaml/doc to doc/content/xen-api. - Moved notes on VM boot parameters to doc/content/xen-api/topics/vm-lifecycle.md and removed ocaml/doc/vm-lifecycle.md. Signed-off-by: Konstantina Chremmou --- doc/content/xen-api/basics.md | 148 ++-- doc/content/xen-api/topics/vm-lifecycle.md | 48 +- doc/content/xen-api/wire-protocol.md | 942 ++++++++++++++++----- ocaml/doc/basics.md | 119 --- ocaml/doc/dune | 10 + ocaml/doc/vm-lifecycle.md | 50 -- ocaml/doc/wire-protocol.md | 664 --------------- ocaml/idl/templates/api_errors.mustache | 132 +-- ocaml/idl/templates/toc.mustache | 2 +- 9 files changed, 856 insertions(+), 1259 deletions(-) delete mode 100644 ocaml/doc/basics.md delete mode 100644 ocaml/doc/vm-lifecycle.md delete mode 100644 ocaml/doc/wire-protocol.md diff --git a/doc/content/xen-api/basics.md b/doc/content/xen-api/basics.md index ce4394c6afa..5288090aa34 100644 --- a/doc/content/xen-api/basics.md +++ b/doc/content/xen-api/basics.md @@ -3,14 +3,16 @@ title = "XenAPI Basics" weight = 10 +++ -This document contains a description of the Xen Management API – an interface for +This document contains a description of the Xen Management API - an interface for remotely configuring and controlling virtualised guests running on a Xen-enabled host. -The XenAPI is presented here as a set of Remote Procedure Calls, with a wire -format based upon [XML-RPC](http://xmlrpc.scripting.com). -No specific language bindings are prescribed, -although examples will be given in the python programming language. +The API is presented here as a set of Remote Procedure Calls (RPCs). +There are two supported wire formats, one based upon +[XML-RPC](http://xmlrpc.scripting.com/spec.html) +and one based upon [JSON-RPC](http://www.jsonrpc.org) (v1.0 and v2.0 are both +recognized). No specific language bindings are prescribed, although examples +are given in the Python programming language. Although we adopt some terminology from object-oriented programming, future client language bindings may or may not be object oriented. @@ -21,98 +23,102 @@ specific values. Objects are persistent and exist on the server-side. Clients may obtain opaque references to these server-side objects and then access their fields via get/set RPCs. -For each class we specify a list of fields along with their _types_ and _qualifiers_. A -qualifier is one of: +For each class we specify a list of fields along with their _types_ and +_qualifiers_. A qualifier is one of: -- _RO/runtime_: the field is Read -Only. Furthermore, its value is automatically computed at runtime. -For example: current CPU load and disk IO throughput. -- _RO/constructor_: the field must be manually set -when a new object is created, but is then Read Only for -the duration of the object's life. -For example, the maximum memory addressable by a guest is set -before the guest boots. -- _RW_: the field is Read/Write. For example, the name of a VM. +- _RO/runtime_: the field is Read Only. Furthermore, its value is + automatically computed at runtime. For example, current CPU load and disk IO + throughput. -Types ------ +- _RO/constructor_: the field must be manually set when a new object is + created, but is then Read Only for the duration of the object's life. + For example, the maximum memory addressable by a guest is set + before the guest boots. + +- _RW_: the field is Read/Write. For example, the name of a VM. + +## Types The following types are used to specify methods and fields in the API Reference: -- `string`: Text strings. -- `int`: 64-bit integers. -- `float`: IEEE double-precision floating-point numbers. -- `bool`: Boolean. -- `datetime`: Date and timestamp. -- `c ref`: Reference to an object of class `c`. -- `t set`: Arbitrary-length set of values of type `t`. -- `(k → v) map`: Mapping from values of type `k` to values of type `v`. -- `e enum`: Enumeration type with name `e`. Enums are defined in the API Reference together with classes that use them. - -Note that there are a number of cases where `ref`s are _doubly -linked_ – e.g. a VM has a field called `VIFs` of type -`VIF ref set`; this field lists -the network interfaces attached to a particular VM. Similarly, the VIF -class has a field called `VM` of type `VM ref` which references the VM to which the interface is connected. +- `string`: Text strings. +- `int`: 64-bit integers. +- `float`: IEEE double-precision floating-point numbers. +- `bool`: Boolean. +- `datetime`: Date and timestamp. +- `c ref`: Reference to an object of class `c`. +- `t set`: Arbitrary-length set of values of type `t`. +- `(k -> v) map`: Mapping from values of type `k` to values of type `v`. +- `e enum`: Enumeration type with name `e`. Enums are defined in the API + reference together with classes that use them. + +Note that there are a number of cases where `ref`s are _doubly linked_. +For example, a `VM` has a field called `VIFs` of type `VIF ref set`; +this field lists the network interfaces attached to a particular VM. +Similarly, the `VIF` class has a field called `VM` of type `VM ref` +which references the VM to which the interface is connected. These two fields are _bound together_, in the sense that creating a new VIF causes the `VIFs` field of the corresponding VM object to be updated automatically. -The API reference explicitly lists the fields that are +The API reference lists explicitly the fields that are bound together in this way. It also contains a diagram that shows relationships between classes. In this diagram an edge signifies the -existance of a pair of fields that are bound together, using standard +existence of a pair of fields that are bound together, using standard crows-foot notation to signify the type of relationship (e.g. one-many, many-many). -RPCs associated with fields ---------------------------- +## RPCs associated with fields + +Each field, `f`, has an RPC accessor associated with it that returns `f`'s value: + +- `get_f (r)`: takes a `ref`, `r` that refers to an object and returns the value + of `f`. + +Each field, `f`, with qualifier _RW_ and whose outermost type is `set` has the +following additional RPCs associated with it: -Each field, `f`, has an RPC accessor associated with it -that returns `f`'s value: +- `add_f(r, v)`: adds a new element `v` to the set. + Note that sets cannot contain duplicate values, hence this operation has + no action in the case that `v` is already in the set. -- `get_f (r)`: Takes a `ref`, `r`, that refers to an object and returns the value of `f`. +- `remove_f(r, v)`: removes element `v` from the set. -Each field, `f`, with attribute `RW` and whose outermost type is `set` has the following -additional RPCs associated with it: +Each field, `f`, with qualifier _RW_ and whose outermost type is `map` has the +following additional RPCs associated with it: -- `add_f (r, v)`: Adds a new element `v` to the set. Since sets cannot contain duplicate values this operation has no action in the case -that `v` was already in the set. +- `add_to_f(r, k, v)`: adds new pair `k -> v` to the mapping stored in `f` in + object `r`. Attempting to add a new pair for duplicate key, `k`, fails with a + `MAP_DUPLICATE_KEY` error. -- `remove_f (r, v)`: Removes element `v` from the set. +- `remove_from_f(r, k)`: removes the pair with key `k` + from the mapping stored in `f` in object `r`. -Each field, `f`, with attribute RW and whose outermost type is `map` has the following -additional RPCs associated with it: +Each field whose outermost type is neither `set` nor `map`, but whose +qualifier is _RW_ has an RPC accessor associated with it that sets its value: -- `add_to_f (r, k, v)`: Adds new pair `(k → v)` to the mapping stored in `f` in object `r`. Attempting to add a new pair for duplicate -key, `k`, fails with an `MAP_DUPLICATE_KEY` error. -- `remove_from_f (r, k)`: Removes the pair with key `k` from the mapping stored in `f` in object `r`. +- `set_f(r, v)`: sets the field `f` on object `r` to value `v`. -Each field whose outermost type is neither `set` nor `map`, -but whose attribute is RW has an RPC accessor associated with it -that sets its value: +## RPCs associated with classes -- `set_f (r, v)`: Sets field `f` on object `r` to value `v`. +- Most classes have a _constructor_ RPC named `create` that + takes as parameters all fields marked _RW_ and _RO/constructor_. The result + of this RPC is that a new _persistent_ object is created on the server-side + with the specified field values. -RPCs associated with classes ----------------------------- +- Each class has a `get_by_uuid(uuid)` RPC that returns the object + of that class that has the specified `uuid`. -- Most classes have a _constructor_ RPC named `create` that -takes as parameters all fields marked RW and -RO/constructor. The result of this RPC is that a new _persistent_ object is -created on the server-side with the specified field values. -- Each class has a `get_by_uuid (uuid)` RPC that returns the object -of that class that has the specified UUID. -- Each class that has a `name_label` field has a `get_by_name_label (name_label)` RPC that returns a set of objects of that -class that have the specified `name_label`. -- Most classes have a `destroy (r)` RPC that explicitly deletes the persistent object specified by `r` from the system. This is a -non-cascading delete – if the object being removed is referenced by another -object then the `destroy` call will fail. +- Each class that has a `name_label` field has a + `get_by_name_label(name_label)` RPC that returns a set of objects of that + class that have the specified `name_label`. -Additional RPCs ---------------- +- Most classes have a `destroy(r)` RPC that explicitly deletes + the persistent object specified by `r` from the system. This is a + non-cascading delete - if the object being removed is referenced by another + object then the `destroy` call will fail. -As well as the RPCs enumerated above, most classes have additional RPCs -associated with them. For example, the VM class has RPCs for cloning, +Apart from the RPCs enumerated above, most classes have additional RPCs +associated with them. For example, the `VM` class has RPCs for cloning, suspending, starting etc. Such additional RPCs are described explicitly in the API reference. diff --git a/doc/content/xen-api/topics/vm-lifecycle.md b/doc/content/xen-api/topics/vm-lifecycle.md index 4531acd07f7..7390dc61e80 100644 --- a/doc/content/xen-api/topics/vm-lifecycle.md +++ b/doc/content/xen-api/topics/vm-lifecycle.md @@ -2,6 +2,9 @@ title = "VM Lifecycle" +++ +The following figure shows the states that a VM can be in and the +API calls that can be used to move the VM between these states. + ```mermaid graph halted-- start(paused) -->paused @@ -17,7 +20,48 @@ graph halted-- destroy -->destroyed ``` -The figure above shows the states that a VM can be in and the -API calls that can be used to move the VM between these states. +## VM boot parameters + +The `VM` class contains a number of fields that control the way in which the VM +is booted. With reference to the fields defined in the VM class (see later in +this document), this section outlines the boot options available and the +mechanisms provided for controlling them. + +VM booting is controlled by setting one of the two mutually exclusive groups: +"PV" and "HVM". If `HVM.boot_policy` is an empty string, then paravirtual +domain building and booting will be used; otherwise the VM will be loaded as a +HVM domain, and booted using an emulated BIOS. + +When paravirtual booting is in use, the `PV_bootloader` field indicates the +bootloader to use. It may be "pygrub", in which case the platform's default +installation of pygrub will be used, or a full path within the control domain to +some other bootloader. The other fields, `PV_kernel`, `PV_ramdisk`, `PV_args`, +and `PV_bootloader_args` will be passed to the bootloader unmodified, and +interpretation of those fields is then specific to the bootloader itself, +including the possibility that the bootloader will ignore some or all of +those given values. Finally the paths of all bootable disks are added to the +bootloader commandline (a disk is bootable if its VBD has the bootable flag set). +There may be zero, one, or many bootable disks; the bootloader decides which +disk (if any) to boot from. + +If the bootloader is pygrub, then the menu.lst is parsed, if present in the +guest's filesystem, otherwise the specified kernel and ramdisk are used, or an +autodetected kernel is used if nothing is specified and autodetection is +possible. `PV_args` is appended to the kernel command line, no matter which +mechanism is used for finding the kernel. + +If `PV_bootloader` is empty but `PV_kernel` is specified, then the kernel and +ramdisk values will be treated as paths within the control domain. If both +`PV_bootloader` and `PV_kernel` are empty, then the behaviour is as if +`PV_bootloader` were specified as "pygrub". + +When using HVM booting, `HVM_boot_policy` and `HVM_boot_params` specify the boot +handling. Only one policy is currently defined, "BIOS order". In this case, +`HVM_boot_params` should contain one key-value pair "order" = "N" where N is the +string that will be passed to QEMU. +Optionally `HVM_boot_params` can contain another key-value pair "firmware" +with values "bios" or "uefi" (default is "bios" if absent). +By default Secure Boot is not enabled, it can be enabled when "uefi" is enabled +by setting `VM.platform["secureboot"]` to true. {{% children %}} diff --git a/doc/content/xen-api/wire-protocol.md b/doc/content/xen-api/wire-protocol.md index 689819a88a9..8a4fe9f5012 100644 --- a/doc/content/xen-api/wire-protocol.md +++ b/doc/content/xen-api/wire-protocol.md @@ -3,136 +3,106 @@ title = "Wire Protocol" weight = 20 +++ -API calls are sent over a network to a Xen-enabled host using the -[XML-RPC](http://xmlrpc.scripting.com/spec.html) protocol. On this page, we describe how the higher-level types -used in our API Reference are mapped to primitive XML-RPC types. +API calls are sent over a network to a Xen-enabled host using an RPC protocol. +Here we describe how the higher-level types used in our API Reference are mapped +to primitive RPC types, covering the two supported wire formats +[XML-RPC](http://xmlrpc.scripting.com/spec.html) and [JSON-RPC](http://www.jsonrpc.org). -We specify the signatures of API functions in the following style: - - (VM ref set) VM.get_all () - -This specifies that the function with name `VM.get_all` -takes no parameters and returns a `set` of `VM ref`s. These -types are mapped onto XML-RPC types in a straight-forward manner: - -- `float`s, `bool`s, `datetime`s and `string`s map directly to the XML-RPC - ``, ``, ``, and `` elements. - -- all `ref` types are opaque references, encoded as the - XML-RPC’s `` type. Users of the API should not make - assumptions about the concrete form of these strings and should not - expect them to remain valid after the client’s session with the - server has terminated. - -- fields named `uuid` of type `string` are - mapped to the XML-RPC `` type. The string itself is - the OSF DCE UUID presentation format (as output by - `uuidgen`, etc). - -- `int`s are all assumed to be 64-bit in our API and are encoded as a - string of decimal digits (rather than using XML-RPC’s built-in - 32-bit `` type). - -- values of `enum` types are encoded as strings. For example, a value of - `destroy` of type `enum on_normal_exit`, would be - conveyed as: - - destroy - -- for all our types, `t`, our type `t set` - simply maps to XML-RPC’s `` type, so for example a - value of type `string set` would be transmitted like - this: - - - - - CX8 - PSE36 - FPU - - - +## XML-RPC Protocol -- for types `k` and `v`, our type `(k → v) map` maps onto an XML-RPC ``, with the key as the name of - the struct. Note that the `(k → v) map` type is only valid - when `k` is a `string`, `ref`, or `int`, and in each case the keys of the maps are - stringified as above. For example, the `(string → double) map` containing a the mappings `"Mike" → 2.3` and - `"John" → 1.2` would be represented as: - - - - - Mike - 2.3 - - - John - 1.2 - - - - - -- our `void` type is transmitted as an empty string. - -Note on References vs UUIDs ---------------------------- - -References are opaque types — encoded as XML-RPC strings on the wire — -understood only by the particular server which generated them. Servers -are free to choose any concrete representation they find convenient; -clients should not make any assumptions or attempt to parse the string -contents. References are not guaranteed to be permanent identifiers for -objects; clients should not assume that references generated during one -session are valid for any future session. References do not allow -objects to be compared for equality. Two references to the same object -are not guaranteed to be textually identical. - -UUIDs are intended to be permanent names for objects. They are -guaranteed to be in the OSF DCE UUID presentation format (as output by -`uuidgen`. Clients may store UUIDs on disk and use them to -lookup objects in subsequent sessions with the server. Clients may also -test equality on objects by comparing UUID strings. - -The API provides mechanisms for translating between UUIDs and opaque -references. Each class that contains a UUID field provides: - -- A `get_by_uuid` method that takes a UUID, and - returns an opaque reference to the server-side object that has that - UUID; - -- A `get_uuid` function (a regular “field getter” RPC) - that takes an opaque reference and returns the UUID of the - server-side object that is referenced by it. +We specify the signatures of API functions in the following style: -Return Values/Status Codes --------------------------- +```python +(VM ref set) VM.get_all() +``` + +This specifies that the function with name `VM.get_all` takes +no parameters and returns a `set` of `VM ref`. +These types are mapped onto XML-RPC types in a straight-forward manner: + +- the types `float`, `bool`, `datetime`, and `string` map directly to the XML-RPC + ``, ``, ``, and `` elements. + +- all `ref` types are opaque references, encoded as the + XML-RPC's `` type. Users of the API should not make assumptions + about the concrete form of these strings and should not expect them to + remain valid after the client's session with the server has terminated. + +- fields named `uuid` of type `string` are mapped to + the XML-RPC `` type. The string itself is the OSF + DCE UUID presentation format (as output by `uuidgen`). + +- `int` is assumed to be 64-bit in our API and is encoded as a string + of decimal digits (rather than using XML-RPC's built-in 32-bit `` type). + +- values of `enum` types are encoded as strings. For example, the value + `destroy` of `enum on_normal_exit`, would be conveyed as: + +```xml + destroy +``` + +- for all our types, `t`, our type `t set` simply maps to XML-RPC's `` + type, so, for example, a value of type `string set` would be transmitted like + this: + +```xml + + + CX8 + PSE36 + FPU + + +``` + +- for types `k` and `v`, our type `(k -> v) map` maps onto an + XML-RPC ``, with the key as the name of the struct. Note that the + `(k -> v) map` type is only valid when `k` is a `string`, `ref`, or + `int`, and in each case the keys of the maps are stringified as + above. For example, the `(string -> float) map` containing the mappings + _Mike -> 2.3_ and _John -> 1.2_ would be represented as: + +```xml + + + + Mike + 2.3 + + + John + 1.2 + + + +``` + +- our `void` type is transmitted as an empty string. + +### XML-RPC Return Values and Status Codes The return value of an RPC call is an XML-RPC ``. -- The first element of the struct is named `"Status"`; it - contains a string value indicating whether the result of the call - was a `"Success"` or a `"Failure"`. +- The first element of the struct is named `Status`; it contains a string value + indicating whether the result of the call was a `Success` or a `Failure`. -If `"Status"` was set to `"Success"` then the Struct -contains a second element named `"Value"`: +If the `Status` is `Success` then the struct contains a second element named +`Value`: -- The element of the struct named `"Value"` contains the - function’s return value. +- The element of the struct named `Value` contains the function's return value. -In the case where `"Status"` is set to `"Failure"` -then the struct contains a second element named -`"ErrorDescription"`: +If the `Status` is `Failure` then the struct contains a second element named +`ErrorDescription`: -- The element of the struct named `"ErrorDescription"` - contains an array of string values. The first element of the array - is an error code; the remainder of the array are strings - representing error parameters relating to that code. +- The element of the struct named `ErrorDescription` contains an array of string + values. The first element of the array is an error code; the rest of the + elements are strings representing error parameters relating to that code. -For example, an XML-RPC return value from the -`host.get_resident_VMs` function above may look like this: +For example, an XML-RPC return value from the `host.get_resident_VMs` function +may look like this: +```xml Status @@ -151,149 +121,675 @@ For example, an XML-RPC return value from the +``` + +## JSON-RPC Protocol -Making XML-RPC Calls -==================== +We specify the signatures of API functions in the following style: -Transport Layer ---------------- +```python +(VM ref set) VM.get_all() +``` -The following transport layers are currently supported: +This specifies that the function with name `VM.get_all` takes no parameters and +returns a `set` of `VM ref`. These types are mapped onto JSON-RPC types in the +following manner: -- HTTPS for remote administration +- the types `float` and `bool` map directly to the JSON types `number` and + `boolean`, while `datetime` and `string` are represented as the JSON `string` + type. -- HTTP over Unix domain sockets for local administration +- all `ref` types are opaque references, encoded as the JSON `string` type. + Users of the API should not make assumptions about the concrete form of these + strings and should not expect them to remain valid after the client's session + with the server has terminated. -Session Layer -------------- +- fields named `uuid` of type `string` are mapped to the JSON `string` type. The + string itself is the OSF DCE UUID presentation format (as output by `uuidgen`). -The XML-RPC interface is session-based; before you can make arbitrary -RPC calls you must login and initiate a session. For example: +- `int` is assumed to be 64-bit in our API and is encoded as a JSON `number` + without decimal point or exponent, preserved as a string. - (session ref) session.login_with_password(string uname, string pwd, string version, string originator) +- values of `enum` types are encoded as the JSON `string` type. For example, the + value `destroy` of `enum on_normal_exit`, would be conveyed as: -Where `uname` and `password` refer to your -username and password respectively, as defined by the Xen administrator. -The `session ref` returned by `session.login_with_password` is passed to subequent RPC -calls as an authentication token. +```xml + "destroy" +``` -A session can be terminated with the `session.logout` function: +- for all our types, `t`, our type `t set` simply maps to the JSON `array` + type, so, for example, a value of type `string set` would be transmitted like + this: + +```json + [ "CX8", "PSE36", "FPU" ] +``` - (void) session.logout (session ref) +- for types `k` and `v`, our type `(k -> v) map` maps onto a JSON object which + contains members with name `k` and value `v`. Note that the + `(k -> v) map` type is only valid when `k` is a `string`, `ref`, or + `int`, and in each case the keys of the maps are stringified as + above. For example, the `(string -> float) map` containing the mappings + _Mike -> 2.3_ and _John -> 1.2_ would be represented as: -Synchronous and Asynchronous invocation ---------------------------------------- +```json + { + "Mike": 2.3, + "John": 1.2 + } +``` -Each method call (apart from methods on `session` and `task` objects and -“getters” and “setters” derived from fields) can be made either -synchronously or asynchronously. A synchronous RPC call blocks until the -return value is received; the return value of a synchronous RPC call is -exactly as specified above. +- our `void` type is transmitted as an empty string. -Only synchronous API calls are listed explicitly in this document. All -asynchronous versions are in the special `Async` namespace. -For example, synchronous call `VM.clone (...)` has an asynchronous counterpart, -`Async.VM.clone (...)`, that is non-blocking. +Both versions 1.0 and 2.0 of the JSON-RPC wire format are recognised and, +depending on your client library, you can use either of them. -Instead of returning its result directly, an asynchronous RPC call -returns a task ID (of type `task ref`); this identifier is subsequently used to -track the status of a running asynchronous RPC. Note that an asychronous -call may fail immediately, before a task has even been -created. To represent this eventuality, the returned `task ref` -is wrapped in an XML-RPC struct with a `Status`, -`ErrorDescription` and `Value` fields, exactly as -specified above. +### JSON-RPC v1.0 -The `task ref` is provided in the `Value` field if -`Status` is set to `Success`. +#### JSON-RPC v1.0 Requests -The RPC call +An API call is represented by sending a single JSON object to the server, which +contains the members `method`, `params`, and `id`. - (task ref set) task.get_all (session ref) +- `method`: A JSON `string` containing the name of the function to be invoked. -returns a set of all task IDs known to the system. The status (including -any returned result and error codes) of these tasks can then be queried -by accessing the fields of the Task object in the usual way. Note that, -in order to get a consistent snapshot of a task’s state, it is advisable -to call the `get_record` function. +- `params`: A JSON `array` of values, which represents the parameters of the + function to be invoked. + +- `id`: A JSON `string` or `integer` representing the call id. Note that, + diverging from the JSON-RPC v1.0 specification the API does not accept + _notification_ requests (requests without responses), i.e. the id cannot be + `null`. + +For example, the body of a JSON-RPC v1.0 request to retrieve the resident VMs of +a host may look like this: -Example interactive session -=========================== +```json + { + "method": "host.get_resident_VMs", + "params": [ + "OpaqueRef:74f1a19cd-b660-41e3-a163-10f03e0eae67", + "OpaqueRef:08c34fc9-f418-4f09-8274-b9cb25cd8550" + ], + "id": "xyz" + } +``` -This section describes how an interactive session might look, using the -python XML-RPC client library. +In the above example, the first element of the `params` array is the reference +of the open session to the host, while the second is the host reference. -First, initialise python and import the library `xmlrpc.client`: +#### JSON-RPC v1.0 Return Values - $ python - ... - >>> import xmlrpc.client +The return value of a JSON-RPC v1.0 call is a single JSON object containing +the members `result`, `error`, and `id`. -Create a python object referencing the remote server: +- `result`: If the call is successful, it is a JSON value (`string`, `array` + etc.) representing the return value of the invoked function. If an error has + occurred, it is `null`. - >>> xen = xmlrpc.client.Server("https://localhost:443") +- `error`: If the call is successful, it is `null`. If the call has failed, it + a JSON `array` of `string` values. The first element of the array is an error + code; the remainder of the array are strings representing error parameters + relating to that code. -Acquire a session reference by logging in with a username and password -(error-handling ommitted for brevity; the session reference is returned -under the key `'Value'` in the resulting dictionary) +- `id`: The call id. It is a JSON `string` or `integer` and it is the same id + as the request it is responding to. - >>> session = xen.session.login_with_password("user", "passwd")['Value'] +For example, a JSON-RPC v1.0 return value from the `host.get_resident_VMs` +function may look like this: -When serialised, this call looks like the following: +```json + { + "result": [ + "OpaqueRef:604f51e7-630f-4412-83fa-b11c6cf008ab", + "OpaqueRef:670d08f5-cbeb-4336-8420-ccd56390a65f" + ], + "error": null, + "id": "xyz" + } +``` - - - session.login_with_password - - - user - - - passwd - - - +while the return value of the same call made on a logged out session may look +like this: + +```json + { + "result": null, + "error": [ + "SESSION_INVALID", + "OpaqueRef:93f1a23cd-a640-41e3-b163-10f86e0eae67" + ], + "id": "xyz" + } +``` + +### JSON-RPC v2.0 + +#### JSON-RPC v2.0 Requests + +An API call is represented by sending a single JSON object to the server, which +contains the members `jsonrpc`, `method`, `params`, and `id`. + +- `jsonrpc`: A JSON `string` specifying the version of the JSON-RPC protocol. It + is exactly "2.0". + +- `method`: A JSON `string` containing the name of the function to be invoked. + +- `params`: A JSON `array` of values, which represents the parameters of the + function to be invoked. Although the JSON-RPC v2.0 specification allows this + member to be ommitted, in practice all API calls accept at least one parameter. + +- `id`: A JSON `string` or `integer` representing the call id. Note that, + diverging from the JSON-RPC v2.0 specification it cannot be null. Neither can + it be ommitted because the API does not accept _notification_ requests + (requests without responses). + +For example, the body of a JSON-RPC v2.0 request to retrieve the VMs resident on +a host may may look like this: + +```json + { + "jsonrpc": "2.0", + "method": "host.get_resident_VMs", + "params": [ + "OpaqueRef:c90cd28f-37ec-4dbf-88e6-f697ccb28b39", + "OpaqueRef:08c34fc9-f418-4f09-8274-b9cb25cd8550" + ], + "id": 3 + } +``` + +As before, the first element of the `parameter` array is the reference +of the open session to the host, while the second is the host reference. + +#### JSON-RPC v2.0 Return Values + +The return value of a JSON-RPC v2.0 call is a single JSON object containing the +members `jsonrpc`, either `result` or `error` depending on the outcome of the +call, and `id`. + +- `jsonrpc`: A JSON `string` specifying the version of the JSON-RPC protocol. It + is exactly "2.0". + +- `result`: If the call is successful, it is a JSON value (`string`, `array` etc.) + representing the return value of the invoked function. If an error has + occurred, it does not exist. + +- `error`: If the call is successful, it does not exist. If the call has failed, + it is a single structured JSON object (see below). + +- `id`: The call id. It is a JSON `string` or `integer` and it is the same id + as the request it is responding to. + +The `error` object contains the members `code`, `message`, and `data`. + +- `code`: The API does not make use of this member and only retains it for + compliance with the JSON-RPC v2.0 specification. It is a JSON `integer` + which has a non-zero value. + +- `message`: A JSON `string` representing an API error code. + +- `data`: A JSON array of `string` values representing error parameters + relating to the aforementioned API error code. + +For example, a JSON-RPC v2.0 return value from the `host.get_resident_VMs` +function may look like this: + +```json + { + "jsonrpc": "2.0", + "result": [ + "OpaqueRef:604f51e7-630f-4412-83fa-b11c6cf008ab", + "OpaqueRef:670d08f5-cbeb-4336-8420-ccd56390a65f" + ], + "id": 3 + } +``` + +while the return value of the same call made on a logged out session may look +like this: + +```json + { + "jsonrpc": "2.0", + "error": { + "code": 1, + "message": "SESSION_INVALID", + "data": [ + "OpaqueRef:c90cd28f-37ec-4dbf-88e6-f697ccb28b39" + ] + }, + "id": 3 + } +``` + +## Errors + +When a low-level transport error occurs, or a request is malformed at the HTTP +or RPC level, the server may send an HTTP 500 error response, or the client +may simulate the same. The client must be prepared to handle these errors, +though they may be treated as fatal. + +For example, the following malformed request when using the XML-RPC protocol: + +```sh +$curl -D - -X POST https://server -H 'Content-Type: application/xml' \ + -d ' + + session.logout + ' +``` + +results to the following response: + +```sh +HTTP/1.1 500 Internal Error +content-length: 297 +content-type:text/html +connection:close +cache-control:no-cache, no-store + +

HTTP 500 internal server error

An unexpected error occurred; + please wait a while and try again. If the problem persists, please contact your + support representative.

Additional information

Xmlrpc.Parse_error(&quo +t;close_tag", "open_tag", _) +``` + +When using the JSON-RPC protocol: + +```sh +$curl -D - -X POST https://server/jsonrpc -H 'Content-Type: application/json' \ + -d '{ + "jsonrpc": "2.0", + "method": "session.login_with_password", + "id": 0 + }' +``` + +the response is: + +```sh +HTTP/1.1 500 Internal Error +content-length: 308 +content-type:text/html +connection:close +cache-control:no-cache, no-store + +

HTTP 500 internal server error

An unexpected error occurred; + please wait a while and try again. If the problem persists, please contact your + support representative.

Additional information

Jsonrpc.Malformed_metho +d_request("{jsonrpc=...,method=...,id=...}") +``` + +All other failures are reported with a more structured error response, to +allow better automatic response to failures, proper internationalization of +any error message, and easier debugging. + +On the wire, these are transmitted like this when using the XML-RPC protocol: + +```xml + + + Status + Failure + + + ErrorDescription + + + + MAP_DUPLICATE_KEY + Customer + eSpiel Inc. + eSpiel Incorporated + + + + + +``` + +Note that `ErrorDescription` value is an array of string values. The +first element of the array is an error code; the remainder of the array are +strings representing error parameters relating to that code. In this case, +the client has attempted to add the mapping _Customer -> +eSpiel Incorporated_ to a Map, but it already contains the mapping +_Customer -> eSpiel Inc._, hence the request has failed. + +When using the JSON-RPC protocol v2.0, the above error is transmitted as: + +```json +{ + "jsonrpc": "2.0", + "error": { + "code": 1, + "message": "MAP_DUPLICATE_KEY", + "data": [ + "Customer", + "eSpiel Inc.", + "eSpiel Incorporated" + ] + }, + "id": 3 +} +``` + +Finally, when using the JSON-RPC protocol v1.0: + +```json +{ + "result": null, + "error": [ + "MAP_DUPLICATE_KEY", + "Customer", + "eSpiel Inc.", + "eSpiel Incorporated" + ], + "id": "xyz" +} +``` + +Each possible error code is documented in the last section of the API reference. + +## Note on References vs UUIDs + +References are opaque types - encoded as XML-RPC and JSON-RPC strings on the +wire - understood only by the particular server which generated them. Servers +are free to choose any concrete representation they find convenient; clients +should not make any assumptions or attempt to parse the string contents. +References are not guaranteed to be permanent identifiers for objects; clients +should not assume that references generated during one session are valid for any +future session. References do not allow objects to be compared for equality. Two +references to the same object are not guaranteed to be textually identical. + +UUIDs are intended to be permanent identifiers for objects. They are +guaranteed to be in the OSF DCE UUID presentation format (as output by `uuidgen`). +Clients may store UUIDs on disk and use them to look up objects in subsequent sessions +with the server. Clients may also test equality on objects by comparing UUID strings. + +The API provides mechanisms for translating between UUIDs and opaque references. +Each class that contains a UUID field provides: + +- A `get_by_uuid` method that takes a UUID and returns an opaque reference + to the server-side object that has that UUID; + +- A `get_uuid` function (a regular "field getter" RPC) that takes an opaque reference + and returns the UUID of the server-side object that is referenced by it. + +## Making RPC Calls + +### Transport Layer -Next, the user may acquire a list of all the VMs known to the system: -(Note the call takes the session reference as the only parameter) +The following transport layers are currently supported: - >>> all_vms = xen.VM.get_all(session)['Value'] - >>> all_vms - ['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4'] +- HTTP/HTTPS for remote administration +- HTTP over Unix domain sockets for local administration -The VM references here have the form `OpaqueRef:X`, though -they may not be that simple in the future, and you should treat them as -opaque strings. _Templates_ are VMs with the -`is_a_template` field set to `true`. We can find the subset -of template VMs using a command like the following: +### Session Layer - >>> all_templates = filter(lambda x: xen.VM.get_is_a_template(session, x)['Value'], all_vms) +The RPC interface is session-based; before you can make arbitrary RPC calls +you must login and initiate a session. For example: -Once a reference to a VM has been acquired a lifecycle operation may be -invoked: +```python + (session ref) session.login_with_password(string uname, string pwd, + string version, string originator) +``` - >>> xen.VM.start(session, all_templates[0], False, False) - {'Status': 'Failure', 'ErrorDescription': ['VM_IS_TEMPLATE', 'OpaqueRef:X']} +where `uname` and `password` refer to your username and password, as defined by +the Xen administrator, while `version` and `originator` are optional. The +`session ref` returned by `session.login_with_password` is passed +to subequent RPC calls as an authentication token. Note that a session +reference obtained by a login request to the XML-RPC backend can be used in +subsequent requests to the JSON-RPC backend, and vice-versa. -In this case the `start` message has been rejected, because -the VM is a template, and so an error response has been returned. These -high-level errors are returned as structured data (rather than as -XML-RPC faults), allowing them to be internationalised. +A session can be terminated with the `session.logout` function: -Rather than querying fields individually, whole _records_ -may be returned at once. To retrieve the record of a single object as a -python dictionary: +```python + void session.logout(session ref session_id) +``` + +### Synchronous and Asynchronous Invocation + +Each method call (apart from methods on the `Session` and `Task` objects and +"getters" and "setters" derived from fields) can be made either synchronously or +asynchronously. A synchronous RPC call blocks until the +return value is received; the return value of a synchronous RPC call is +exactly as specified above. + +Only synchronous API calls are listed explicitly in this document. +All their asynchronous counterparts are in the special `Async` namespace. +For example, the synchronous call `VM.clone(...)` has an asynchronous +counterpart, `Async.VM.clone(...)`, that is non-blocking. + +Instead of returning its result directly, an asynchronous RPC call +returns an identifier of type `task ref` which is subsequently used +to track the status of a running asynchronous RPC. + +Note that an asychronous call may fail immediately, before a task has even been +created. When using the XML-RPC wire protocol, this eventuality is represented +by wrapping the returned `task ref` in an XML-RPC struct with a `Status`, +`ErrorDescription`, and `Value` fields, exactly as specified above; the +`task ref` is provided in the `Value` field if `Status` is set to `Success`. +When using the JSON-RPC protocol, the `task ref` is wrapped in a response JSON +object as specified above and it is provided by the value of the `result` member +of a successful call. + +The RPC call + +```python + (task ref set) Task.get_all(session ref session_id) +``` + +returns a set of all task identifiers known to the system. The status (including any +returned result and error codes) of these can then be queried by accessing the +fields of the `Task` object in the usual way. Note that, in order to get a +consistent snapshot of a task's state, it is advisable to call the `get_record` +function. + +## Example interactive session + +This section describes how an interactive session might look, using python +XML-RPC and JSON-RPC client libraries. + +First, initialise python: + +```bash +$ python3 +>>> +``` + +### Using the XML-RPC Protocol + +Import the library `xmlrpc.client` and create a +python object referencing the remote server as shown below: + +```python +>>> import xmlrpc.client +>>> xen = xmlrpc.client.ServerProxy("https://localhost:443") +``` + +Note that you may need to disable SSL certificate validation to establish the +connection, this can be done as follows: + +```python +>>> import ssl +>>> ctx = ssl._create_unverified_context() +>>> xen = xmlrpc.client.ServerProxy("https://localhost:443", context=ctx) +``` + +Acquire a session reference by logging in with a username and password; the +session reference is returned under the key `Value` in the resulting dictionary +(error-handling ommitted for brevity): + +```python +>>> session = xen.session.login_with_password("user", "passwd", +... "version", "originator")['Value'] +``` + +This is what the call looks like when serialized + +```xml + + + session.login_with_password + + user + passwd + version + originator + + +``` + +Next, the user may acquire a list of all the VMs known to the system (note the +call takes the session reference as the only parameter): + +```python +>>> all_vms = xen.VM.get_all(session)['Value'] +>>> all_vms +['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] +``` + +The VM references here have the form `OpaqueRef:X` (though they may not be +that simple in reality) and you should treat them as opaque strings. +_Templates_ are VMs with the `is_a_template` field set to `true`. We can +find the subset of template VMs using a command like the following: + +```python +>>> all_templates = filter(lambda x: xen.VM.get_is_a_template(session, x)['Value'], + all_vms) +``` + +Once a reference to a VM has been acquired, a lifecycle operation may be invoked: + +```python +>>> xen.VM.start(session, all_templates[0], False, False) +{'Status': 'Failure', 'ErrorDescription': ['VM_IS_TEMPLATE', 'OpaqueRef:X']} +``` + +In this case the `start` message has been rejected, because the VM is +a template, and so an error response has been returned. These high-level +errors are returned as structured data (rather than as XML-RPC faults), +allowing them to be internationalized. + +Rather than querying fields individually, whole _records_ may be returned at once. +To retrieve the record of a single object as a python dictionary: + +```python +>>> record = xen.VM.get_record(session, all_templates[0])['Value'] +>>> record['power_state'] +'Halted' +>>> record['name_label'] +'Windows 10 (64-bit)' +``` + +To retrieve all the VM records in a single call: - >>> record = xen.VM.get_record(session, all_templates[0])['Value'] - >>> record['power_state'] - 'Halted' - >>> record['name_label'] - 'XenSource P2V Server' +```python +>>> records = xen.VM.get_all_records(session)['Value'] +>>> list(records.keys()) +['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] +>>> records['OpaqueRef:1']['name_label'] +'Red Hat Enterprise Linux 7' +``` + +### Using the JSON-RPC Protocol + +For this example we are making use of the package `jsonrpcclient` and the +`requests` library due to their simplicity, although other packages can also be +used. + +First, import the `requests` and `jsonrpcclient` libraries: + +```python +>>> import requests +>>> import jsonrpcclient +``` + +Now we construct a utility method to make using these libraries easier: + +```python +>>> def jsonrpccall(method, params): +... r = requests.post("https://localhost:443/jsonrpc", +... json=jsonrpcclient.request(method, params=params), +... verify=False) +... p = jsonrpcclient.parse(r.json()) +... if isinstance(p, jsonrpcclient.Ok): +... return p.result +... raise Exception(p.message, p.data) +``` + +Acquire a session reference by logging in with a username and password: + +```python +>>> session = jsonrpccall("session.login_with_password", +... ("user", "password", "version", "originator")) +``` + +`jsonrpcclient` uses the JSON-RPC protocol v2.0, so this is what the serialized +request looks like: + +```json + { + "jsonrpc": "2.0", + "method": "session.login_with_password", + "params": ["user", "passwd", "version", "originator"], + "id": 0 + } +``` + +Next, the user may acquire a list of all the VMs known to the system (note the +call takes the session reference as the only parameter): + +```python +>>> all_vms = jsonrpccall("VM.get_all", (session,)) +>>> all_vms +['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] +``` + +The VM references here have the form `OpaqueRef:X` (though they may not be +that simple in reality) and you should treat them as opaque strings. +_Templates_ are VMs with the `is_a_template` field set to `true`. We can +find the subset of template VMs using a command like the following: + +```python +>>> all_templates = filter( +... lambda x: jsonrpccall("VM.get_is_a_template", (session, x)), +... all_vms) +``` + +Once a reference to a VM has been acquired, a lifecycle operation may be invoked: + +```python +>>> try: +... jsonrpccall("VM.start", (session, next(all_templates), False, False)) +... except Exception as e: +... e +... +Exception('VM_IS_TEMPLATE', ['OpaqueRef:1', 'start']) +``` + +In this case the `start` message has been rejected because the VM is +a template, hence an error response has been returned. These high-level +errors are returned as structured data, allowing them to be internationalized. + +Rather than querying fields individually, whole _records_ may be returned at once. +To retrieve the record of a single object as a python dictionary: + +```python +>>> record = jsonrpccall("VM.get_record", (session, next(all_templates))) +>>> record['power_state'] +'Halted' +>>> record['name_label'] +'Windows 10 (64-bit)' +``` To retrieve all the VM records in a single call: - >>> records = xen.VM.get_all_records(session)['Value'] - >>> records.keys() - ['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] - >>> records['OpaqueRef:1']['name_label'] - 'RHEL 4.1 Autoinstall Template' +```python +>>> records = jsonrpccall("VM.get_all_records", (session,)) +>>> records.keys() +['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] +>>> records['OpaqueRef:1']['name_label'] +'Red Hat Enterprise Linux 7' +``` diff --git a/ocaml/doc/basics.md b/ocaml/doc/basics.md deleted file mode 100644 index 783797051f0..00000000000 --- a/ocaml/doc/basics.md +++ /dev/null @@ -1,119 +0,0 @@ -# API Basics - -This document defines the XenServer Management API - an interface for remotely -configuring and controlling virtualized guests running on a Xen-enabled host. - -The API is presented here as a set of Remote Procedure Calls (RPCs). There are -two supported wire formats, one based upon [XML-RPC](http://xmlrpc.scripting.com/spec.html) -and one based upon [JSON-RPC](http://www.jsonrpc.org) (v1.0 and v2.0 are both -recognized). No specific language bindings are prescribed, although examples -will be given in the python programming language. - -Although we adopt some terminology from object-oriented programming, -future client language bindings may or may not be object oriented. -The API reference uses the terminology _classes_ and _objects_. -For our purposes a _class_ is simply a hierarchical namespace; -an _object_ is an instance of a class with its fields set to -specific values. Objects are persistent and exist on the server-side. -Clients may obtain opaque references to these server-side objects and then -access their fields via get/set RPCs. - -For each class we specify a list of fields along with their _types_ and -_qualifiers_. A qualifier is one of: - -* `RO/runtime`: the field is Read Only. Furthermore, its value is - automatically computed at runtime. For example, current CPU load and disk IO - throughput. - -* `RO/constructor`: the field must be manually set when a new object is - created, but is then Read Only for the duration of the object's life. - For example, the maximum memory addressable by a guest is set - before the guest boots. - -* `RW`: the field is Read/Write. For example, the name of a VM. - -## Types - -The following types are used to specify methods and fields in the API Reference: - -* `string`: Text strings. -* `int`: 64-bit integers. -* `float`: IEEE double-precision floating-point numbers. -* `bool`: Boolean. -* `datetime`: Date and timestamp. -* `c ref`: Reference to an object of class `c`. -* `t set`: Arbitrary-length set of values of type `t`. -* `(k -> v) map`: Mapping from values of type `k` to values of type `v`. -* `e enum`: Enumeration type with name `e`. Enums are defined in the - API reference together with classes that use them. - -Note that there are a number of cases where `ref`s are _doubly linked_. -For example, a `VM` has a field called `VIFs` of type `VIF ref set`; -this field lists the network interfaces attached to a particular VM. -Similarly, the `VIF` class has a field called `VM` of type `VM ref` -which references the VM to which the interface is connected. -These two fields are _bound together_, in the sense that -creating a new VIF causes the `VIFs` field of the corresponding -VM object to be updated automatically. - -The API reference lists explicitly the fields that are -bound together in this way. It also contains a diagram that shows -relationships between classes. In this diagram an edge signifies the -existence of a pair of fields that are bound together, using standard -crows-foot notation to signify the type of relationship (e.g. -one-many, many-many). - -## RPCs associated with fields - -Each field, `f`, has an RPC accessor associated with it that returns `f`'s value: - -* `get_f (r)`: takes a `ref`, `r` that refers to an object and returns the value - of `f`. - -Each field, `f`, with qualifier `RW` and whose outermost type is `set` has the -following additional RPCs associated with it: - -* `add_f(r, v)`: adds a new element `v` to the set. - Note that sets cannot contain duplicate values, hence this operation has - no action in the case that `v` is already in the set. - -* `remove_f(r, v)`: removes element `v` from the set. - -Each field, `f`, with qualifier `RW` and whose outermost type is `map` has the -following additional RPCs associated with it: - -* `add_to_f(r, k, v)`: adds new pair `k -> v` to the mapping stored in `f` in - object`r`. Attempting to add a new pair for duplicate key, `k`, fails with a - `MAP_DUPLICATE_KEY` error. - -* `remove_from_f(r, k)`: removes the pair with key `k` - from the mapping stored in `f` in object `r`. - -Each field whose outermost type is neither `set` nor `map`, but whose -qualifier is `RW` has an RPC accessor associated with it that sets its value: - -* `set_f(r, v)`: sets the field `f` on object `r` to value `v`. - -## RPCs associated with classes - -* Most classes have a _constructor_ RPC named `create` that - takes as parameters all fields marked `RW` and `RO/constructor`. The result - of this RPC is that a new _persistent_ object is created on the server-side - with the specified field values. - -* Each class has a `get_by_uuid(uuid)` RPC that returns the object - of that class that has the specified `uuid`. - -* Each class that has a `name_label` field has a - `get_by_name_label(name_label)` RPC that returns a set of objects of that - class that have the specified `name_label`. - -* Most classes have a `destroy(r)` RPC that explicitly deletes - the persistent object specified by `r` from the system. This is a - non-cascading delete - if the object being removed is referenced by another - object then the `destroy` call will fail. - -Apart from the RPCs enumerated above, some classes have additional RPCs -associated with them. For example, the `VM` class has RPCs for cloning, -suspending, starting etc. Such additional RPCs are described explicitly -in the API reference. diff --git a/ocaml/doc/dune b/ocaml/doc/dune index aa5077ef404..9c4bb6cd474 100644 --- a/ocaml/doc/dune +++ b/ocaml/doc/dune @@ -60,3 +60,13 @@ doc-convert.sh ) ) + +(install + (package xapi) + (section share_root) + (files + (glob_files (../../doc/content/xen-api/basics.md with_prefix markdown)) + (glob_files (../../doc/content/xen-api/wire-protocol.md with_prefix markdown)) + (glob_files (../../doc/content/xen-api/topics/vm-lifecycle.md with_prefix markdown)) + ) +) diff --git a/ocaml/doc/vm-lifecycle.md b/ocaml/doc/vm-lifecycle.md deleted file mode 100644 index 31f31889f36..00000000000 --- a/ocaml/doc/vm-lifecycle.md +++ /dev/null @@ -1,50 +0,0 @@ -# VM Lifecycle - -The following diagram shows the states that a VM can be in -and the API calls that can be used to move the VM between these states. - -![VM lifecycle](vm-lifecycle.png "VM Lifecycle") - -## VM boot parameters - -The `VM` class contains a number of fields that control the way in which the VM -is booted. With reference to the fields defined in the VM class (see later in -this document), this section outlines the boot options available and the -mechanisms provided for controlling them. - -VM booting is controlled by setting one of the two mutually exclusive groups: -"PV" and "HVM". If `HVM.boot_policy` is an empty string, then paravirtual -domain building and booting will be used; otherwise the VM will be loaded as a -HVM domain, and booted using an emulated BIOS. - -When paravirtual booting is in use, the `PV_bootloader` field indicates the -bootloader to use. It may be "pygrub", in which case the platform's default -installation of pygrub will be used, or a full path within the control domain to -some other bootloader. The other fields, `PV_kernel`, `PV_ramdisk`, `PV_args`, -and `PV_bootloader_args` will be passed to the bootloader unmodified, and -interpretation of those fields is then specific to the bootloader itself, -including the possibility that the bootloader will ignore some or all of -those given values. Finally the paths of all bootable disks are added to the -bootloader commandline (a disk is bootable if its VBD has the bootable flag set). -There may be zero, one, or many bootable disks; the bootloader decides which -disk (if any) to boot from. - -If the bootloader is pygrub, then the menu.lst is parsed, if present in the -guest's filesystem, otherwise the specified kernel and ramdisk are used, or an -autodetected kernel is used if nothing is specified and autodetection is -possible. `PV_args` is appended to the kernel command line, no matter which -mechanism is used for finding the kernel. - -If `PV_bootloader` is empty but `PV_kernel` is specified, then the kernel and -ramdisk values will be treated as paths within the control domain. If both -`PV_bootloader` and `PV_kernel` are empty, then the behaviour is as if -`PV_bootloader` were specified as "pygrub". - -When using HVM booting, `HVM_boot_policy` and `HVM_boot_params` specify the boot -handling. Only one policy is currently defined, "BIOS order". In this case, -`HVM_boot_params` should contain one key-value pair "order" = "N" where N is the -string that will be passed to QEMU. -Optionally `HVM_boot_params` can contain another key-value pair "firmware" -with values "bios" or "uefi" (default is "bios" if absent). -By default Secure Boot is not enabled, it can be enabled when "uefi" is enabled by setting -`VM.platform["secureboot"]` to true. diff --git a/ocaml/doc/wire-protocol.md b/ocaml/doc/wire-protocol.md deleted file mode 100644 index 26b911bd2c7..00000000000 --- a/ocaml/doc/wire-protocol.md +++ /dev/null @@ -1,664 +0,0 @@ -# Wire Protocol for Remote API Calls - -API calls are sent over a network to a Xen-enabled host using an RPC protocol. -Here we describe how the higher-level types used in our API Reference are mapped -to primitive RPC types, covering the two supported wire formats -[XML-RPC](http://xmlrpc.scripting.com/spec.html) and [JSON-RPC](http://www.jsonrpc.org). - -## XML-RPC Protocol - -We specify the signatures of API functions in the following style: - -```python -(VM ref set) VM.get_all() -``` - -This specifies that the function with name `VM.get_all` takes -no parameters and returns a `set` of `VM ref`. -These types are mapped onto XML-RPC types in a straight-forward manner: - -* the types `float`, `bool`, `datetime`, and `string` map directly to the XML-RPC - ``, ``, ``, and `` elements. - -* all `ref` types are opaque references, encoded as the - XML-RPC's `` type. Users of the API should not make assumptions - about the concrete form of these strings and should not expect them to - remain valid after the client's session with the server has terminated. - -* fields named `uuid` of type `string` are mapped to - the XML-RPC `` type. The string itself is the OSF - DCE UUID presentation format (as output by `uuidgen`). - -* `int` is assumed to be 64-bit in our API and is encoded as a string - of decimal digits (rather than using XML-RPC's built-in 32-bit `` type). - -* values of `enum` types are encoded as strings. For example, the value - `destroy` of `enum on_normal_exit`, would be conveyed as: - -```xml - destroy -``` - -* for all our types, `t`, our type `t set` simply maps to XML-RPC's `` - type, so, for example, a value of type `string set` would be transmitted like - this: - -```xml - - - CX8 - PSE36 - FPU - - -``` - -* for types `k` and `v`, our type `(k -> v) map` maps onto an - XML-RPC ``, with the key as the name of the struct. Note that the - `(k -> v) map` type is only valid when `k` is a `string`, `ref`, or - `int`, and in each case the keys of the maps are stringified as - above. For example, the `(string -> float) map` containing the mappings - _Mike -> 2.3_ and _John -> 1.2_ would be represented as: - -```xml - - - - Mike - 2.3 - - - John - 1.2 - - - -``` - -* our `void` type is transmitted as an empty string. - -### XML-RPC Return Values and Status Codes - -The return value of an RPC call is an XML-RPC ``. - -* The first element of the struct is named `Status`; it contains a string value - indicating whether the result of the call was a `Success` or a `Failure`. - -If the `Status` is `Success` then the struct contains a second element named -`Value`: - -* The element of the struct named `Value` contains the function's return value. - -If the `Status` is `Failure` then the struct contains a second element named -`ErrorDescription`: - -* The element of the struct named `ErrorDescription` contains an array of string - values. The first element of the array is an error code; the rest of the - elements are strings representing error parameters relating to that code. - -For example, an XML-RPC return value from the `host.get_resident_VMs` function -may look like this: - -```xml - - - Status - Success - - - Value - - - - 81547a35-205c-a551-c577-00b982c5fe00 - 61c85a22-05da-b8a2-2e55-06b0847da503 - 1d401ec4-3c17-35a6-fc79-cee6bd9811fe - - - - - -``` - -## JSON-RPC Protocol - -We specify the signatures of API functions in the following style: - -```python -(VM ref set) VM.get_all() -``` - -This specifies that the function with name `VM.get_all` takes no parameters and -returns a `set` of `VM ref`. These types are mapped onto JSON-RPC types in the -following manner: - -* the types `float` and `bool` map directly to the JSON types `number` and - `boolean`, while `datetime` and `string` are represented as the JSON `string` - type. - -* all `ref` types are opaque references, encoded as the JSON `string` type. - Users of the API should not make assumptions about the concrete form of these - strings and should not expect them to remain valid after the client's session - with the server has terminated. - -* fields named `uuid` of type `string` are mapped to the JSON `string` type. The - string itself is the OSF DCE UUID presentation format (as output by `uuidgen`). - -* `int` is assumed to be 64-bit in our API and is encoded as a JSON `number` - without decimal point or exponent, preserved as a string. - -* values of `enum` types are encoded as the JSON `string` type. For example, the - value `destroy` of `enum on_normal_exit`, would be conveyed as: - -```xml - "destroy" -``` - -* for all our types, `t`, our type `t set` simply maps to the JSON `array` - type, so, for example, a value of type `string set` would be transmitted like - this: - -```json - [ "CX8", "PSE36", "FPU" ] -``` - -* for types `k` and `v`, our type `(k -> v) map` maps onto a JSON object which - contains members with name `k` and value `v`. Note that the - `(k -> v) map` type is only valid when `k` is a `string`, `ref`, or - `int`, and in each case the keys of the maps are stringified as - above. For example, the `(string -> float) map` containing the mappings - _Mike -> 2.3_ and _John -> 1.2_ would be represented as: - -```json - { - "Mike": 2.3, - "John": 1.2 - } -``` - -* our `void` type is transmitted as an empty string. - -Both versions 1.0 and 2.0 of the JSON-RPC wire format are recognized and, -depending on your client library, you can use either of them. - -### JSON-RPC v1.0 - -#### JSON-RPC v1.0 Requests - -An API call is represented by sending a single JSON object to the server, which -contains the members `method`, `params`, and `id`. - -* `method`: A JSON `string` containing the name of the function to be invoked. - -* `params`: A JSON `array` of values, which represents the parameters of the - function to be invoked. - -* `id`: A JSON `string` or `integer` representing the call id. Note that, - diverging from the JSON-RPC v1.0 specification the API does not accept - _notification_ requests (requests without responses), i.e. the id cannot be - `null`. - -For example, a JSON-RPC v1.0 request to retrieve the resident VMs of a host may -look like this: - -```json - { - "method": "host.get_resident_VMs", - "params": [ - "OpaqueRef:74f1a19cd-b660-41e3-a163-10f03e0eae67", - "OpaqueRef:08c34fc9-f418-4f09-8274-b9cb25cd8550" - ], - "id": "xyz" - } -``` - -In the above example, the first element of the `params` array is the reference -of the open session to the host, while the second is the host reference. - -#### JSON-RPC v1.0 Return Values - -The return value of a JSON-RPC v1.0 call is a single JSON object containing -the members `result`, `error`, and `id`. - -* `result`: If the call is successful, it is a JSON value (`string`, `array` - etc.) representing the return value of the invoked function. If an error has - occurred, it is `null`. - -* `error`: If the call is successful, it is `null`. If the call has failed, it - a JSON `array` of `string` values. The first element of the array is an error - code; the remainder of the array are strings representing error parameters - relating to that code. - -* `id`: The call id. It is a JSON `string` or `integer` and it is the same id - as the request it is responding to. - -For example, a JSON-RPC v1.0 return value from the `host.get_resident_VMs` -function may look like this: - -```json - { - "result": [ - "OpaqueRef:604f51e7-630f-4412-83fa-b11c6cf008ab", - "OpaqueRef:670d08f5-cbeb-4336-8420-ccd56390a65f" - ], - "error": null, - "id": "xyz" - } -``` - -while the return value of the same call made on a logged out session may look -like this: - -```json - { - "result": null, - "error": [ - "SESSION_INVALID", - "OpaqueRef:93f1a23cd-a640-41e3-b163-10f86e0eae67" - ], - "id": "xyz" - } -``` - -### JSON-RPC v2.0 - -#### JSON-RPC v2.0 Requests - -An API call is represented by sending a single JSON object to the server, which -contains the members `jsonrpc`, `method`, `params`, and `id`. - -* `jsonrpc`: A JSON `string` specifying the version of the JSON-RPC protocol. It - is exactly "2.0". - -* `method`: A JSON `string` containing the name of the function to be invoked. - -* `params`: A JSON `array` of values, which represents the parameters of the - function to be invoked. Although the JSON-RPC v2.0 specification allows this - member to be ommitted, in practice all API calls accept at least one parameter. - -* `id`: A JSON `string` or `integer` representing the call id. Note that, - diverging from the JSON-RPC v2.0 specification it cannot be null. Neither can - it be ommitted because the API does not accept _notification_ requests - (requests without responses). - -For example, a JSON-RPC v2.0 request to retrieve the VMs resident on a host may -may look like this: - -```json - { - "jsonrpc": "2.0", - "method": "host.get_resident_VMs", - "params": [ - "OpaqueRef:c90cd28f-37ec-4dbf-88e6-f697ccb28b39", - "OpaqueRef:08c34fc9-f418-4f09-8274-b9cb25cd8550" - ], - "id": 3 - } -``` - -As before, the first element of the `parameter` array is the reference -of the open session to the host, while the second is the host reference. - -#### JSON-RPC v2.0 Return Values - -The return value of a JSON-RPC v2.0 call is a single JSON object containing the -members `jsonrpc`, either `result` or `error` depending on the outcome of the -call, and `id`. - -* `jsonrpc`: A JSON `string` specifying the version of the JSON-RPC protocol. It - is exactly "2.0". - -* `result`: If the call is successful, it is a JSON value (`string`, `array` etc.) - representing the return value of the invoked function. If an error has - occurred, it does not exist. - -* `error`: If the call is successful, it does not exist. If the call has failed, - it is a single structured JSON object (see below). - -* `id`: The call id. It is a JSON `string` or `integer` and it is the same id - as the request it is responding to. - -The `error` object contains the members `code`, `message`, and `data`. - -* `code`: The API does not make use of this member and only retains it for - compliance with the JSON-RPC v2.0 specification. It is a JSON `integer` - which has a non-zero value. - -* `message`: A JSON `string` representing an API error code. - -* `data`: A JSON array of `string` values representing error parameters - relating to the aforementioned API error code. - -For example, a JSON-RPC v2.0 return value from the `host.get_resident_VMs` -function may look like this: - -```json - { - "jsonrpc": "2.0", - "result": [ - "OpaqueRef:604f51e7-630f-4412-83fa-b11c6cf008ab", - "OpaqueRef:670d08f5-cbeb-4336-8420-ccd56390a65f" - ], - "id": 3 - } -``` - -while the return value of the same call made on a logged out session may look -like this: - -```json - { - "jsonrpc": "2.0", - "error": { - "code": 1, - "message": "SESSION_INVALID", - "data": [ - "OpaqueRef:c90cd28f-37ec-4dbf-88e6-f697ccb28b39" - ] - }, - "id": 3 - } -``` - -## Note on References vs UUIDs - -References are opaque types - encoded as XML-RPC and JSON-RPC strings on the -wire - understood only by the particular server which generated them. Servers -are free to choose any concrete representation they find convenient; clients -should not make any assumptions or attempt to parse the string contents. -References are not guaranteed to be permanent identifiers for objects; clients -should not assume that references generated during one session are valid for any -future session. References do not allow objects to be compared for equality. Two -references to the same object are not guaranteed to be textually identical. - -UUIDs are intended to be permanent identifiers for objects. They are -guaranteed to be in the OSF DCE UUID presentation format (as output by `uuidgen`). -Clients may store UUIDs on disk and use them to look up objects in subsequent sessions -with the server. Clients may also test equality on objects by comparing UUID strings. - -The API provides mechanisms for translating between UUIDs and opaque references. -Each class that contains a UUID field provides: - -* A `get_by_uuid` method that takes a UUID and returns an opaque reference - to the server-side object that has that UUID; - -* A `get_uuid` function (a regular "field getter" RPC) that takes an opaque reference - and returns the UUID of the server-side object that is referenced by it. - -## Making RPC Calls - -### Transport Layer - -The following transport layers are currently supported: - -* HTTP/HTTPS for remote administration -* HTTP over Unix domain sockets for local administration - -### Session Layer - -The RPC interface is session-based; before you can make arbitrary RPC calls -you must login and initiate a session. For example: - -```python - (session ref) session.login_with_password(string uname, string pwd, - string version, string originator) -``` - -where `uname` and `password` refer to your username and password, as defined by -the Xen administrator, while `version` and `originator` are optional. The -`session ref` returned by `session.login_with_password` is passed -to subequent RPC calls as an authentication token. Note that a session -reference obtained by a login request to the XML-RPC backend can be used in -subsequent requests to the JSON-RPC backend, and vice-versa. - -A session can be terminated with the `session.logout` function: - -```python - void session.logout(session ref session_id) -``` - -### Synchronous and Asynchronous Invocation - -Each method call (apart from methods on the `Session` and `Task` objects and -"getters" and "setters" derived from fields) can be made either synchronously or -asynchronously. A synchronous RPC call blocks until the -return value is received; the return value of a synchronous RPC call is -exactly as specified above. - -Only synchronous API calls are listed explicitly in this document. -All their asynchronous counterparts are in the special `Async` namespace. -For example, the synchronous call `VM.clone(...)` has an asynchronous -counterpart, `Async.VM.clone(...)`, that is non-blocking. - -Instead of returning its result directly, an asynchronous RPC call -returns an identifier of type `task ref` which is subsequently used -to track the status of a running asynchronous RPC. - -Note that an asychronous call may fail immediately, before a task has even been -created. When using the XML-RPC wire protocol, this eventuality is represented -by wrapping the returned `task ref` in an XML-RPC struct with a `Status`, -`ErrorDescription`, and `Value` fields, exactly as specified above; the -`task ref` is provided in the `Value` field if `Status` is set to `Success`. -When using the JSON-RPC protocol, the `task ref` is wrapped in a response JSON -object as specified above and it is provided by the value of the `result` member -of a successful call. - -The RPC call - -```python - (task ref set) Task.get_all(session ref session_id) -``` - -returns a set of all task identifiers known to the system. The status (including any -returned result and error codes) of these can then be queried by accessing the -fields of the `Task` object in the usual way. Note that, in order to get a -consistent snapshot of a task's state, it is advisable to call the `get_record` -function. - -## Example interactive session - -This section describes how an interactive session might look, using python -XML-RPC and JSON-RPC client libraries. - -First, initialise python: - -```bash -$ python3 ->>> -``` - -### Using the XML-RPC Protocol - -Import the library `xmlrpc.client` and create a -python object referencing the remote server as shown below: - -```python ->>> import xmlrpc.client ->>> xen = xmlrpc.client.ServerProxy("https://localhost:443") -``` - -Note that you may need to disable SSL certificate validation to establish the -connection, this can be done as follows: - -```python ->>> import ssl ->>> ctx = ssl._create_unverified_context() ->>> xen = xmlrpc.client.ServerProxy("https://localhost:443", context=ctx) -``` - -Acquire a session reference by logging in with a username and password; the -session reference is returned under the key `Value` in the resulting dictionary -(error-handling ommitted for brevity): - -```python ->>> session = xen.session.login_with_password("user", "passwd", -... "version", "originator")['Value'] -``` - -This is what the call looks like when serialized - -```xml - - - session.login_with_password - - user - passwd - version - originator - - -``` - -Next, the user may acquire a list of all the VMs known to the system (note the -call takes the session reference as the only parameter): - -```python ->>> all_vms = xen.VM.get_all(session)['Value'] ->>> all_vms -['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] -``` - -The VM references here have the form `OpaqueRef:X` (though they may not be -that simple in reality) and you should treat them as opaque strings. -_Templates_ are VMs with the `is_a_template` field set to `true`. We can -find the subset of template VMs using a command like the following: - -```python ->>> all_templates = filter(lambda x: xen.VM.get_is_a_template(session, x)['Value'], - all_vms) -``` - -Once a reference to a VM has been acquired, a lifecycle operation may be invoked: - -```python ->>> xen.VM.start(session, all_templates[0], False, False) -{'Status': 'Failure', 'ErrorDescription': ['VM_IS_TEMPLATE', 'OpaqueRef:X']} -``` - -In this case the `start` message has been rejected, because the VM is -a template, and so an error response has been returned. These high-level -errors are returned as structured data (rather than as XML-RPC faults), -allowing them to be internationalized. - -Rather than querying fields individually, whole _records_ may be returned at once. -To retrieve the record of a single object as a python dictionary: - -```python ->>> record = xen.VM.get_record(session, all_templates[0])['Value'] ->>> record['power_state'] -'Halted' ->>> record['name_label'] -'Windows 10 (64-bit)' -``` - -To retrieve all the VM records in a single call: - -```python ->>> records = xen.VM.get_all_records(session)['Value'] ->>> list(records.keys()) -['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] ->>> records['OpaqueRef:1']['name_label'] -'Red Hat Enterprise Linux 7' -``` - -### Using the JSON-RPC Protocol - -For this example we are making use of the package `jsonrpcclient` and the -`requests` library due to their simplicity, although other packages can also be -used. - -First, import the `requests` and `jsonrpcclient` libraries: - -```python ->>> import requests ->>> import jsonrpcclient -``` - -Now we construct a utility method to make using these libraries easier: - -```python ->>> def jsonrpccall(method, params): -... r = requests.post("https://localhost:443/jsonrpc", -... json=jsonrpcclient.request(method, params=params), -... verify=False) -... p = jsonrpcclient.parse(r.json()) -... if isinstance(p, jsonrpcclient.Ok): -... return p.result -... raise Exception(p.message, p.data) -``` - -Acquire a session reference by logging in with a username and password: - -```python ->>> session = jsonrpccall("session.login_with_password", -... ("user", "password", "version", "originator")) -``` - -`jsonrpcclient` uses the JSON-RPC protocol v2.0, so this is what the serialized -request looks like: - -```json - { - "jsonrpc": "2.0", - "method": "session.login_with_password", - "params": ["user", "passwd", "version", "originator"], - "id": 0 - } -``` - -Next, the user may acquire a list of all the VMs known to the system (note the -call takes the session reference as the only parameter): - -```python ->>> all_vms = jsonrpccall("VM.get_all", (session,)) ->>> all_vms -['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] -``` - -The VM references here have the form `OpaqueRef:X` (though they may not be -that simple in reality) and you should treat them as opaque strings. -_Templates_ are VMs with the `is_a_template` field set to `true`. We can -find the subset of template VMs using a command like the following: - -```python ->>> all_templates = filter( -... lambda x: jsonrpccall("VM.get_is_a_template", (session, x)), -... all_vms) -``` - -Once a reference to a VM has been acquired, a lifecycle operation may be invoked: - -```python ->>> try: -... jsonrpccall("VM.start", (session, next(all_templates), False, False)) -... except Exception as e: -... e -... -Exception('VM_IS_TEMPLATE', ['OpaqueRef:1', 'start']) -``` - -In this case the `start` message has been rejected because the VM is -a template, hence an error response has been returned. These high-level -errors are returned as structured data, allowing them to be internationalized. - -Rather than querying fields individually, whole _records_ may be returned at once. -To retrieve the record of a single object as a python dictionary: - -```python ->>> record = jsonrpccall("VM.get_record", (session, next(all_templates))) ->>> record['power_state'] -'Halted' ->>> record['name_label'] -'Windows 10 (64-bit)' -``` - -To retrieve all the VM records in a single call: - -```python ->>> records = jsonrpccall("VM.get_all_records", (session,)) ->>> records.keys() -['OpaqueRef:1', 'OpaqueRef:2', 'OpaqueRef:3', 'OpaqueRef:4' ] ->>> records['OpaqueRef:1']['name_label'] -'Red Hat Enterprise Linux 7' -``` diff --git a/ocaml/idl/templates/api_errors.mustache b/ocaml/idl/templates/api_errors.mustache index 384a737d9a2..3bc2cd44a00 100644 --- a/ocaml/idl/templates/api_errors.mustache +++ b/ocaml/idl/templates/api_errors.mustache @@ -1,135 +1,9 @@ -# Error Handling +# Error Codes -When a low-level transport error occurs, or a request is malformed at the HTTP -or RPC level, the server may send an HTTP 500 error response, or the client -may simulate the same. The client must be prepared to handle these errors, -though they may be treated as fatal. - -For example, the following malformed request when using the XML-RPC protocol: - -```sh -$curl -D - -X POST https://server -H 'Content-Type: application/xml' \ -> -d ' -> -> session.logout -> ' -``` - -results to the following response: - -```sh -HTTP/1.1 500 Internal Error -content-length: 297 -content-type:text/html -connection:close -cache-control:no-cache, no-store - -

HTTP 500 internal server error

An unexpected error occurred; - please wait a while and try again. If the problem persists, please contact your - support representative.

Additional information

Xmlrpc.Parse_error(&quo -t;close_tag", "open_tag", _) -``` - -When using the JSON-RPC protocol: - -```sh -$curl -D - -X POST https://server/jsonrpc -H 'Content-Type: application/json' \ -> -d '{ -> "jsonrpc": "2.0", -> "method": "session.login_with_password", -> "id": 0 -> }' -``` - -the response is: - -```sh -HTTP/1.1 500 Internal Error -content-length: 308 -content-type:text/html -connection:close -cache-control:no-cache, no-store - -

HTTP 500 internal server error

An unexpected error occurred; - please wait a while and try again. If the problem persists, please contact your - support representative.

Additional information

Jsonrpc.Malformed_metho -d_request("{jsonrpc=...,method=...,id=...}") -``` - -All other failures are reported with a more structured error response, to -allow better automatic response to failures, proper internationalization of -any error message, and easier debugging. - -On the wire, these are transmitted like this when using the XML-RPC protocol: - -```xml - - - Status - Failure - - - ErrorDescription - - - - MAP_DUPLICATE_KEY - Customer - eSpiel Inc. - eSpiel Incorporated - - - - - -``` - -Note that `ErrorDescription` value is an array of string values. The -first element of the array is an error code; the remainder of the array are -strings representing error parameters relating to that code. In this case, -the client has attempted to add the mapping _Customer -> -eSpiel Incorporated_ to a Map, but it already contains the mapping -_Customer -> eSpiel Inc._, hence the request has failed. - -When using the JSON-RPC protocol v2.0, the above error is transmitted as: - -```json -{ - "jsonrpc": "2.0", - "error": { - "code": 1, - "message": "MAP_DUPLICATE_KEY", - "data": [ - "Customer", - "eSpiel Inc.", - "eSpiel Incorporated" - ] - }, - "id": 3 -} -``` - -Finally, when using the JSON-RPC protocol v1.0: - -```json -{ - "result": null, - "error": [ - "MAP_DUPLICATE_KEY", - "Customer", - "eSpiel Inc.", - "eSpiel Incorporated" - ], - "id": "xyz" -} -``` - -Each possible error code is documented in the following section. - -## Error Codes +The following is a list of all possible errors that can be issued by API calls. {{#errors}} -### {{{error_code}}} +## {{{error_code}}} {{{error_description}}} diff --git a/ocaml/idl/templates/toc.mustache b/ocaml/idl/templates/toc.mustache index 126bf2922e6..eca5b54db8b 100644 --- a/ocaml/idl/templates/toc.mustache +++ b/ocaml/idl/templates/toc.mustache @@ -11,5 +11,5 @@ - title: "Class: {{{name}}}" url: @root@management-api/class-{{{name_lower}}}.html {{/classes}} - - title: Error Handling + - title: Error Codes url: @root@management-api/api-errors.html From 785f1b9aa2e21af8ae980048a1edfff14a2b6e20 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 7 Jul 2023 14:57:50 +0000 Subject: [PATCH 03/50] Add VM_metrics to metadata export This includes the current_domain_type field, which is important for live imports, including those during a cross-pool live migration. Signed-off-by: Rob Hoes --- ocaml/xapi/export.ml | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/ocaml/xapi/export.ml b/ocaml/xapi/export.ml index a81ec647225..f14b7551885 100644 --- a/ocaml/xapi/export.ml +++ b/ocaml/xapi/export.ml @@ -140,7 +140,8 @@ let rec update_table ~__context ~include_snapshots ~preserve_power_state && Db.is_valid_ref __context vdi then add_vdi vdi ; - (* Add also the guest metrics *) + (* Add also the metrics and guest metrics *) + add vm.API.vM_metrics ; add vm.API.vM_guest_metrics ; (* Add the hosts links *) add vm.API.vM_resident_on ; @@ -264,7 +265,7 @@ let make_vm ?(with_snapshot_metadata = false) ~preserve_power_state table ; API.vM_resident_on= lookup table (Ref.string_of vm.API.vM_resident_on) ; API.vM_affinity= lookup table (Ref.string_of vm.API.vM_affinity) ; API.vM_consoles= [] - ; API.vM_metrics= Ref.null + ; API.vM_metrics= lookup table (Ref.string_of vm.API.vM_metrics) ; API.vM_guest_metrics= lookup table (Ref.string_of vm.API.vM_guest_metrics) ; API.vM_protection_policy= Ref.null ; API.vM_bios_strings= vm.API.vM_bios_strings @@ -277,6 +278,15 @@ let make_vm ?(with_snapshot_metadata = false) ~preserve_power_state table ; snapshot= API.rpc_of_vM_t vm } +(** Convert a VM metrics reference to an obj *) +let make_vmm table __context self = + let vmm = Db.VM_metrics.get_record ~__context ~self in + { + cls= Datamodel_common._vm_metrics + ; id= Ref.string_of (lookup table (Ref.string_of self)) + ; snapshot= API.rpc_of_vM_metrics_t vmm + } + (** Convert a guest-metrics reference to an obj *) let make_gm table __context self = let gm = Db.VM_guest_metrics.get_record ~__context ~self in @@ -506,6 +516,10 @@ let make_all ~with_snapshot_metadata ~preserve_power_state table __context = (make_vm ~with_snapshot_metadata ~preserve_power_state table __context) (filter table (Db.VM.get_all ~__context)) in + let vmms = + List.map (make_vmm table __context) + (filter table (Db.VM_metrics.get_all ~__context)) + in let gms = List.map (make_gm table __context) (filter table (Db.VM_guest_metrics.get_all ~__context)) @@ -566,6 +580,7 @@ let make_all ~with_snapshot_metadata ~preserve_power_state table __context = [ hosts ; vms + ; vmms ; gms ; vbds ; vifs From 077e8d11e3c000530c4c97a9f25aedd1463d3c8a Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Thu, 13 Jul 2023 15:46:19 +0000 Subject: [PATCH 04/50] Add VM_metrics to metadata import Signed-off-by: Rob Hoes --- ocaml/xapi/import.ml | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index a1aaa306f53..edc39a7673c 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -730,6 +730,15 @@ module VM : HandlerTools = struct then Db.VM.set_suspend_SR ~__context ~self:vm ~value:Ref.null ; Db.VM.set_parent ~__context ~self:vm ~value:vm_record.API.vM_parent ; + ( try + let vmm = lookup vm_record.API.vM_metrics state.table in + (* We have VM_metrics in the imported metadata, so use it, and destroy + the record created by VM.create_from_record above. *) + let replaced_vmm = Db.VM.get_metrics ~__context ~self:vm in + Db.VM.set_metrics ~__context ~self:vm ~value:vmm ; + Db.VM_metrics.destroy ~__context ~self:replaced_vmm + with _ -> () + ) ; ( try let gm = lookup vm_record.API.vM_guest_metrics state.table in Db.VM.set_guest_metrics ~__context ~self:vm ~value:gm @@ -805,6 +814,40 @@ module GuestMetrics : HandlerTools = struct state.table <- (x.cls, x.id, Ref.string_of gm) :: state.table end +(** Create the VM metrics *) +module Metrics : HandlerTools = struct + type precheck_t = OK + + let precheck __context _config _rpc _session_id _state _x = OK + + let handle_dry_run __context _config _rpc _session_id state x _precheck_result + = + let dummy_gm = Ref.make () in + state.table <- (x.cls, x.id, Ref.string_of dummy_gm) :: state.table + + let handle __context _config _rpc _session_id state x _precheck_result = + let vmm_record = API.vM_metrics_t_of_rpc x.snapshot in + let vmm = Ref.make () in + Db.VM_metrics.create ~__context ~ref:vmm + ~uuid:(Uuidx.to_string (Uuidx.make ())) + ~memory_actual:vmm_record.API.vM_metrics_memory_actual + ~vCPUs_number:vmm_record.API.vM_metrics_VCPUs_number + ~vCPUs_utilisation:vmm_record.API.vM_metrics_VCPUs_utilisation + ~vCPUs_CPU:vmm_record.API.vM_metrics_VCPUs_CPU + ~vCPUs_params:vmm_record.API.vM_metrics_VCPUs_params + ~vCPUs_flags:vmm_record.API.vM_metrics_VCPUs_flags + ~state:vmm_record.API.vM_metrics_state + ~start_time:vmm_record.API.vM_metrics_start_time + ~install_time:vmm_record.API.vM_metrics_install_time + ~last_updated:vmm_record.API.vM_metrics_last_updated + ~other_config:vmm_record.API.vM_metrics_other_config + ~hvm:vmm_record.API.vM_metrics_hvm + ~nested_virt:vmm_record.API.vM_metrics_nested_virt + ~nomigrate:vmm_record.API.vM_metrics_nomigrate + ~current_domain_type:vmm_record.API.vM_metrics_current_domain_type ; + state.table <- (x.cls, x.id, Ref.string_of vmm) :: state.table +end + (** If we're restoring VM metadata only then lookup the SR by uuid. If we can't find the SR then we will still try to match VDIs later (except CDROMs) *) module SR : HandlerTools = struct @@ -1910,6 +1953,7 @@ module HostHandler = MakeHandler (Host) module SRHandler = MakeHandler (SR) module VDIHandler = MakeHandler (VDI) module GuestMetricsHandler = MakeHandler (GuestMetrics) +module MetricsHandler = MakeHandler (Metrics) module VMHandler = MakeHandler (VM) module NetworkHandler = MakeHandler (Net) module GPUGroupHandler = MakeHandler (GPUGroup) @@ -1928,6 +1972,7 @@ let handlers = ; (Datamodel_common._sr, SRHandler.handle) ; (Datamodel_common._vdi, VDIHandler.handle) ; (Datamodel_common._vm_guest_metrics, GuestMetricsHandler.handle) + ; (Datamodel_common._vm_metrics, MetricsHandler.handle) ; (Datamodel_common._vm, VMHandler.handle) ; (Datamodel_common._network, NetworkHandler.handle) ; (Datamodel_common._gpu_group, GPUGroupHandler.handle) From 33b6315fa885f070409258ef83da8e4ae182903e Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Fri, 7 Jul 2023 14:24:15 +0000 Subject: [PATCH 05/50] Cross-pool live migration: move CPU check to the target host The target host of a live migration is running by definition the same or a newer version of the software compared to the source host. As CPU checks are often extended or even changed in software updates, it is best to perform such checks on the target host. However, currently it is the source host that does these checks. This patch moves the check to the target host. A cross-pool live migration always begins with a dry-run VM-metadata import on the target host. This is where checks for free memory and GPU capacity are already carried out. The metadata import handler is extended to accept a `check_cpu` query parameter to signal that a CPU check is needed. This is included in the import call done in `assert_can_migrate` on the source host, and the old CPU check in there is dropped. Source hosts without this patch will still perform the CPU checks themselves, so we do not compromise safety. NOTE: This is a rebase of the initial work from 07a2a71b6b40 that had to be reverted with Ming's suggestion to skip CPUID check on 'unspecified' snapshots implemented. Signed-off-by: Rob Hoes --- ocaml/xapi/cpuid_helpers.ml | 38 ++++++++++++++++++++--------- ocaml/xapi/cpuid_helpers.mli | 8 +++---- ocaml/xapi/import.ml | 41 ++++++++++++++++++++++++++++---- ocaml/xapi/importexport.ml | 4 +++- ocaml/xapi/message_forwarding.ml | 2 +- ocaml/xapi/xapi_dr.ml | 1 + ocaml/xapi/xapi_vm.ml | 2 +- ocaml/xapi/xapi_vm_helpers.ml | 4 ++-- ocaml/xapi/xapi_vm_migrate.ml | 15 ++++++------ 9 files changed, 82 insertions(+), 33 deletions(-) diff --git a/ocaml/xapi/cpuid_helpers.ml b/ocaml/xapi/cpuid_helpers.ml index 571a7f073b6..1bf6731efad 100644 --- a/ocaml/xapi/cpuid_helpers.ml +++ b/ocaml/xapi/cpuid_helpers.ml @@ -47,9 +47,9 @@ let threads_per_core = Map_check.(field "threads_per_core" int) let vendor = Map_check.(field "vendor" string) -let get_flags_for_vm ~__context vm cpu_info = +let get_flags_for_vm ~__context domain_type cpu_info = let features_field = - match Helpers.domain_type ~__context ~self:vm with + match domain_type with | `hvm | `pv_in_pvh | `pvh -> features_hvm | `pv -> @@ -79,36 +79,51 @@ let next_boot_cpu_features ~__context ~vm = Map_check.getf features_field_boot pool_cpu_info |> Xenops_interface.CPU_policy.to_string -let get_host_cpu_info ~__context ~vm:_ ~host ?remote () = +let get_host_cpu_info ~__context ~host ?remote () = match remote with | None -> Db.Host.get_cpu_info ~__context ~self:host | Some (rpc, session_id) -> Client.Client.Host.get_cpu_info ~rpc ~session_id ~self:host -let get_host_compatibility_info ~__context ~vm ~host ?remote () = - get_host_cpu_info ~__context ~vm ~host ?remote () - |> get_flags_for_vm ~__context vm +let get_host_compatibility_info ~__context ~domain_type ~host ?remote () = + get_host_cpu_info ~__context ~host ?remote () + |> get_flags_for_vm ~__context domain_type (* Compare the CPU on which the given VM was last booted to the CPU of the given host. *) -let assert_vm_is_compatible ~__context ~vm ~host ?remote () = +let assert_vm_is_compatible ~__context ~vm ~host = + let vm_ref, vm_rec, domain_type = + match vm with + | `db self -> + ( self + , Db.VM.get_record ~__context ~self + , Helpers.domain_type ~__context ~self + ) + | `import (vm_rec, dt) -> + (* Ref.null, because the VM to be imported does not yet have a ref *) + (Ref.null, vm_rec, Helpers.check_domain_type dt) + in let fail msg = raise (Api_errors.Server_error ( Api_errors.vm_incompatible_with_this_host - , [Ref.string_of vm; Ref.string_of host; msg] + , [Ref.string_of vm_ref; Ref.string_of host; msg] ) ) in - if Db.VM.get_power_state ~__context ~self:vm <> `Halted then + if vm_rec.API.vM_power_state <> `Halted then ( + let host_uuid = Db.Host.get_uuid ~__context ~self:host in + debug "Checking CPU compatibility of %s VM %s with host %s" + (Record_util.domain_type_to_string domain_type) + vm_rec.API.vM_uuid host_uuid ; let open Xapi_xenops_queue in let module Xenopsd = (val make_client (default_xenopsd ()) : XENOPS) in let dbg = Context.string_of_task __context in try let host_cpu_vendor, host_cpu_features = - get_host_compatibility_info ~__context ~vm ~host ?remote () + get_host_compatibility_info ~__context ~domain_type ~host () in - let vm_cpu_info = Db.VM.get_last_boot_CPU_flags ~__context ~self:vm in + let vm_cpu_info = vm_rec.API.vM_last_boot_CPU_flags in if List.mem_assoc cpu_info_vendor_key vm_cpu_info then ( (* Check the VM was last booted on a CPU with the same vendor as this host's CPU. *) let vm_cpu_vendor = List.assoc cpu_info_vendor_key vm_cpu_info in @@ -141,3 +156,4 @@ let assert_vm_is_compatible ~__context ~vm ~host ?remote () = fail "Host does not have new leveling feature keys - not comparing VM's \ flags" + ) diff --git a/ocaml/xapi/cpuid_helpers.mli b/ocaml/xapi/cpuid_helpers.mli index 4d5f091d7f6..ff672b884a2 100644 --- a/ocaml/xapi/cpuid_helpers.mli +++ b/ocaml/xapi/cpuid_helpers.mli @@ -16,11 +16,12 @@ val next_boot_cpu_features : __context:Context.t -> vm:[`VM] API.Ref.t -> string val assert_vm_is_compatible : __context:Context.t - -> vm:[`VM] API.Ref.t + -> vm:[`db of [`VM] API.Ref.t | `import of API.vM_t * API.domain_type] -> host:[`host] API.Ref.t - -> ?remote:(Rpc.call -> Rpc.response Client.Id.t) * [< `session] Ref.t - -> unit -> unit +(** Checks whether the CPU vendor and features used by the VM are compatible + with the given host. The VM can be one that is currently in the DB, or a record + coming from a metadata import as used for cross-pool migration. *) val vendor : string Map_check.field @@ -42,7 +43,6 @@ val features_hvm_host : [`host] Xenops_interface.CPU_policy.t Map_check.field val get_host_cpu_info : __context:Context.t - -> vm:[`VM] API.Ref.t -> host:[`host] API.Ref.t -> ?remote:(Rpc.call -> Rpc.response Client.Id.t) * [< `session] Ref.t -> unit diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index edc39a7673c..2a9d37b7972 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -50,6 +50,7 @@ type metadata_options = { * - If the migration is for real, we will expect the VM export code on the source host to have mapped the VDI locations onto their * mirrored counterparts which are present on this host. *) live: bool + ; check_cpu: bool ; (* An optional src VDI -> destination VDI rewrite list *) vdi_map: (string * string) list } @@ -75,6 +76,13 @@ type config = { let is_live config = match config.import_type with Metadata_import {live; _} -> live | _ -> false +let needs_cpu_check config = + match config.import_type with + | Metadata_import {check_cpu; _} -> + check_cpu + | _ -> + false + (** List of (datamodel classname * Reference in export * Reference in database) *) type table = (string * string * string) list @@ -253,8 +261,8 @@ let assert_can_restore_backup ~__context rpc session_id (x : header) = import_vms let assert_can_live_import __context vm_record = + let host = Helpers.get_localhost ~__context in let assert_memory_available () = - let host = Helpers.get_localhost ~__context in let host_mem_available = Memory_check.host_compute_free_memory_with_maximum_compression ~__context ~host None @@ -416,7 +424,7 @@ module VM : HandlerTools = struct | Skip | Clean_import of API.vM_t - let precheck __context config _rpc _session_id _state x = + let precheck __context config _rpc _session_id state x = let vm_record = get_vm_record x.snapshot in let is_default_template = vm_record.API.vM_is_default_template @@ -511,6 +519,28 @@ module VM : HandlerTools = struct | Replace (_, vm_record) | Clean_import vm_record -> if is_live config then assert_can_live_import __context vm_record ; + ( if needs_cpu_check config then + let vmm_record = + find_in_export + (Ref.string_of vm_record.API.vM_metrics) + state.export + |> API.vM_metrics_t_of_rpc + in + let host = Helpers.get_localhost ~__context in + (* Don't check CPUID on 'unspecified' snapshots *) + match + ( vm_record.API.vM_is_a_snapshot + , vmm_record.API.vM_metrics_current_domain_type + ) + with + | true, `unspecified -> + (* A snapshot which was taken from a shutdown VM. *) + () + | _, dt -> + Cpuid_helpers.assert_vm_is_compatible ~__context + ~vm:(`import (vm_record, dt)) + ~host + ) ; import_action | _ -> import_action @@ -2310,13 +2340,14 @@ let metadata_handler (req : Request.t) s _ = let force = find_query_flag req.Request.query "force" in let dry_run = find_query_flag req.Request.query "dry_run" in let live = find_query_flag req.Request.query "live" in + let check_cpu = find_query_flag req.Request.query "check_cpu" in let vdi_map = read_map_params "vdi" req.Request.query in info "VM.import_metadata: force = %b; full_restore = %b dry_run = %b; \ - live = %b; vdi_map = [ %s ]" - force full_restore dry_run live + live = %b; check_cpu = %b; vdi_map = [ %s ]" + force full_restore dry_run live check_cpu (String.concat "; " (List.map (fun (a, b) -> a ^ "=" ^ b) vdi_map)) ; - let metadata_options = {dry_run; live; vdi_map} in + let metadata_options = {dry_run; live; vdi_map; check_cpu} in let config = {import_type= Metadata_import metadata_options; full_restore; force} in diff --git a/ocaml/xapi/importexport.ml b/ocaml/xapi/importexport.ml index b6f784dc55c..a210bda04d6 100644 --- a/ocaml/xapi/importexport.ml +++ b/ocaml/xapi/importexport.ml @@ -249,6 +249,7 @@ type vm_export_import = { ; dry_run: bool ; live: bool ; send_snapshots: bool + ; check_cpu: bool } (* Copy VM metadata to a remote pool *) @@ -269,11 +270,12 @@ let remote_metadata_export_import ~__context ~rpc ~session_id ~remote_address match which with | `All -> [] - | `Only {live; dry_run; send_snapshots; _} -> + | `Only {live; dry_run; send_snapshots; check_cpu; _} -> [ Printf.sprintf "live=%b" live ; Printf.sprintf "dry_run=%b" dry_run ; Printf.sprintf "export_snapshots=%b" send_snapshots + ; Printf.sprintf "check_cpu=%b" check_cpu ] in let params = Printf.sprintf "restore=%b" restore :: params in diff --git a/ocaml/xapi/message_forwarding.ml b/ocaml/xapi/message_forwarding.ml index 6423e8d7be3..63b27076a1a 100644 --- a/ocaml/xapi/message_forwarding.ml +++ b/ocaml/xapi/message_forwarding.ml @@ -2473,7 +2473,7 @@ functor try bool_of_string (List.assoc "force" options) with _ -> false in if not force then - Cpuid_helpers.assert_vm_is_compatible ~__context ~vm ~host () ; + Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:(`db vm) ~host ; let source_host = Db.VM.get_resident_on ~__context ~self:vm in with_vm_operation ~__context ~self:vm ~doc:"VM.pool_migrate" ~op:`pool_migrate ~strict:(not force) (fun () -> diff --git a/ocaml/xapi/xapi_dr.ml b/ocaml/xapi/xapi_dr.ml index dfe563ec204..3c87d848263 100644 --- a/ocaml/xapi/xapi_dr.ml +++ b/ocaml/xapi/xapi_dr.ml @@ -279,6 +279,7 @@ let recover_vms ~__context ~vms ~session_to ~force = { Import.dry_run= false ; Import.live= false + ; check_cpu= false ; vdi_map= [] (* we expect the VDI metadata to be present *) } in diff --git a/ocaml/xapi/xapi_vm.ml b/ocaml/xapi/xapi_vm.ml index 3acd99a763e..d45afa4282c 100644 --- a/ocaml/xapi/xapi_vm.ml +++ b/ocaml/xapi/xapi_vm.ml @@ -566,7 +566,7 @@ let resume ~__context ~vm ~start_paused ~force = ) ; let host = Helpers.get_localhost ~__context in if not force then - Cpuid_helpers.assert_vm_is_compatible ~__context ~vm ~host () ; + Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:(`db vm) ~host ; (* Update CPU feature set, which will be passed to xenopsd *) Xapi_xenops.resume ~__context ~self:vm ~start_paused ~force diff --git a/ocaml/xapi/xapi_vm_helpers.ml b/ocaml/xapi/xapi_vm_helpers.ml index b7596bfbc67..d7f36c8f4de 100644 --- a/ocaml/xapi/xapi_vm_helpers.ml +++ b/ocaml/xapi/xapi_vm_helpers.ml @@ -628,7 +628,7 @@ let assert_matches_control_domain_affinity ~__context ~self ~host = let assert_enough_pcpus ~__context ~self ~host ?remote () = let vcpus = Db.VM.get_VCPUs_max ~__context ~self in let pcpus = - Cpuid_helpers.get_host_cpu_info ~__context ~vm:self ~host ?remote () + Cpuid_helpers.get_host_cpu_info ~__context ~host ?remote () |> Map_check.getf Cpuid_helpers.cpu_count |> Int64.of_int in @@ -699,7 +699,7 @@ let assert_can_boot_here ~__context ~self ~host ~snapshot ~do_cpuid_check assert_hardware_platform_support ~__context ~vm:self ~host:(Helpers.LocalObject host) ; if do_cpuid_check then - Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:self ~host () ; + Cpuid_helpers.assert_vm_is_compatible ~__context ~vm:(`db self) ~host ; if do_sr_check then assert_can_see_SRs ~__context ~self ~host ; assert_can_see_networks ~__context ~self ~host ; diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index 4ac14efa270..397243a54e5 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -581,7 +581,7 @@ let intra_pool_vdi_remap ~__context vm vdi_map = vdis_and_callbacks let inter_pool_metadata_transfer ~__context ~remote ~vm ~vdi_map ~vif_map - ~vgpu_map ~dry_run ~live ~copy = + ~vgpu_map ~dry_run ~live ~copy ~check_cpu = List.iter (fun vdi_record -> let vdi = vdi_record.local_vdi_reference in @@ -617,7 +617,7 @@ let inter_pool_metadata_transfer ~__context ~remote ~vm ~vdi_map ~vif_map ) vgpu_map ; let vm_export_import = - {Importexport.vm; dry_run; live; send_snapshots= not copy} + {Importexport.vm; dry_run; live; send_snapshots= not copy; check_cpu} in finally (fun () -> @@ -1176,6 +1176,9 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map let remote = remote_of_dest ~__context dest in (* Copy mode means we don't destroy the VM on the source host. We also don't copy over the RRDs/messages *) + let force = + try bool_of_string (List.assoc "force" options) with _ -> false + in let copy = try bool_of_string (List.assoc "copy" options) with _ -> false in let compress = use_compression ~__context options localhost remote.dest_host @@ -1463,6 +1466,7 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map in inter_pool_metadata_transfer ~__context ~remote ~vm ~vdi_map ~vif_map ~vgpu_map ~dry_run:false ~live:true ~copy + ~check_cpu:(not force) in let vm = List.hd vms in let () = @@ -1818,12 +1822,6 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options (Api_errors.Server_error (Api_errors.host_disabled, [Ref.string_of remote.dest_host]) ) ; - (* Check that the VM's required CPU features are available on the host *) - if not force then - Cpuid_helpers.assert_vm_is_compatible ~__context ~vm - ~host:remote.dest_host - ~remote:(remote.rpc, remote.session) - () ; (* Check that the destination has enough pCPUs *) Xapi_vm_helpers.assert_enough_pcpus ~__context ~self:vm ~host:remote.dest_host @@ -1865,6 +1863,7 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options not (inter_pool_metadata_transfer ~__context ~remote ~vm ~vdi_map ~vif_map ~vgpu_map ~dry_run:true ~live:true ~copy + ~check_cpu:(not force) = [] ) then From b0f93fc3e3045d2c2f7acce5904dbe95145b99cc Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Thu, 27 Jul 2023 11:19:03 +0000 Subject: [PATCH 06/50] CA-380580: cross-pool migration: no CPU checks for halted VMs CPU checks are needed only for running VMs that are being migrated, to check for compatibility with the remote-host's CPUs. NOTE: This is the rebase of the initial work from 3d039f3259a3 that had to be reverted with the fix from df7cbfddffe4 incorporated. Signed-off-by: Rob Hoes --- ocaml/xapi/xapi_vm_migrate.ml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/xapi_vm_migrate.ml b/ocaml/xapi/xapi_vm_migrate.ml index 397243a54e5..3b561e370ab 100644 --- a/ocaml/xapi/xapi_vm_migrate.ml +++ b/ocaml/xapi/xapi_vm_migrate.ml @@ -1194,6 +1194,7 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map We look at the VDIs of the VM, the VDIs of all of the snapshots, and any suspend-image VDIs. *) let vm_uuid = Db.VM.get_uuid ~__context ~self:vm in + let power_state = Db.VM.get_power_state ~__context ~self:vm in let vbds = Db.VM.get_VBDs ~__context ~self:vm in let vifs = Db.VM.get_VIFs ~__context ~self:vm in let snapshots = Db.VM.get_snapshots ~__context ~self:vm in @@ -1249,9 +1250,10 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map in let suspends_vdis = List.fold_left - (fun acc vm -> - if Db.VM.get_power_state ~__context ~self:vm = `Suspended then - let vdi = Db.VM.get_suspend_VDI ~__context ~self:vm in + (fun acc vm_or_snapshot -> + if Db.VM.get_power_state ~__context ~self:vm_or_snapshot = `Suspended + then + let vdi = Db.VM.get_suspend_VDI ~__context ~self:vm_or_snapshot in let sr = Db.VDI.get_SR ~__context ~self:vdi in if is_intra_pool @@ -1259,7 +1261,7 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map then acc else - get_vdi_mirror __context vm vdi false :: acc + get_vdi_mirror __context vm_or_snapshot vdi false :: acc else acc ) @@ -1466,7 +1468,7 @@ let migrate_send' ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~vgpu_map in inter_pool_metadata_transfer ~__context ~remote ~vm ~vdi_map ~vif_map ~vgpu_map ~dry_run:false ~live:true ~copy - ~check_cpu:(not force) + ~check_cpu:((not force) && power_state <> `Halted) in let vm = List.hd vms in let () = @@ -1863,7 +1865,7 @@ let assert_can_migrate ~__context ~vm ~dest ~live:_ ~vdi_map ~vif_map ~options not (inter_pool_metadata_transfer ~__context ~remote ~vm ~vdi_map ~vif_map ~vgpu_map ~dry_run:true ~live:true ~copy - ~check_cpu:(not force) + ~check_cpu:((not force) && power_state <> `Halted) = [] ) then From a540ac83579381583438df85d6c54ee38b866de8 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 12 Dec 2024 10:29:21 +0000 Subject: [PATCH 07/50] CA-403633: Keep vPCI devices in the same order QEMU orders devices by the time of plugging. Parallelizing them introduces randomness, which breaks the assumption that devices are ordered in a deterministic way. Serialize all PCI and VUSB plugs to restore behaviour. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index e5d8016bedb..f4c784faa11 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1630,12 +1630,11 @@ let rec atomics_of_operation = function ] ; [VM_create_device_model (id, false)] (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so the - following operations occur after creating the device models *) - ; parallel_concat "Devices.plug (qemu)" ~id - [ - List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other - ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs - ] + following operations occur after creating the device models. + The order of PCI devices depends on the order they are plugged, they + must be kept serialized. *) + ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other + ; List.map (fun vusb -> VUSB_plug vusb.Vusb.id) vusbs (* At this point the domain is considered survivable. *) ; [VM_set_domain_action_request (id, None)] ] @@ -1698,10 +1697,10 @@ let rec atomics_of_operation = function ) ; [VM_create_device_model (id, true)] (* PCI and USB devices are hot-plugged into HVM guests via QEMU, so - the following operations occur after creating the device models *) - ; parallel_map "PCIs.plug" ~id pcis_other (fun pci -> - [PCI_plug (pci.Pci.id, true)] - ) + the following operations occur after creating the device models. + The order of PCI devices depends on the order they are plugged, they + must be kept serialized. *) + ; List.map (fun pci -> PCI_plug (pci.Pci.id, true)) pcis_other ] |> List.concat | VM_poweroff (id, timeout) -> From 6f6cd81bf8b7508162de6e641e72737126020873 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Mon, 16 Dec 2024 16:46:35 +0800 Subject: [PATCH 08/50] CA-403767: verifyPeer can't use root CA for appliance cert check It is expected to use root CA certficate to verify an appliance's server certificate for a xapi outgoing TLS connection. Prior to this change, the related stunnel configurations are: "verifyPeer=yes", and "checkHost=". The 'verifyPeer' option of stunnel doesn't treat the CA bundle as root CA certificates. The 'checkHost' option of stunnel only checks the host name against the one in server certificate. In other words, the issue is that the root CA based checking doesn't work for appliance. This change adds 'verifyChain' for the appliance to ensure the outgoing TLS connection from xapi will verify the appliance's server certificates by real root CA certificate. Signed-off-by: Ming Lu --- ocaml/libs/stunnel/stunnel.ml | 40 +++++++++++++++++------------------ 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index 8d319b4b80d..6b7d42608e7 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -218,29 +218,29 @@ let config_file ?(accept = None) config host port = | None -> [] | Some {sni; verify; cert_bundle_path} -> - [ - "" - ; "# use SNI to request a specific cert. CAfile contains" - ; "# public certs of all hosts in the pool and must contain" - ; "# the cert of the server we connect to" - ; (match sni with None -> "" | Some s -> sprintf "sni = %s" s) - ; ( match verify with + List.rev_append + ( match verify with | VerifyPeer -> - "" + ["verifyPeer=yes"] | CheckHost -> - sprintf "checkHost=%s" host - ) - ; "verifyPeer=yes" - ; sprintf "CAfile=%s" cert_bundle_path - ; ( match Sys.readdir crl_path with - | [||] -> - "" - | _ -> - sprintf "CRLpath=%s" crl_path - | exception _ -> - "" + [sprintf "checkHost=%s" host; "verifyChain=yes"] ) - ] + [ + "" + ; "# use SNI to request a specific cert. CAfile contains" + ; "# public certs of all hosts in the pool and must contain" + ; "# the cert of the server we connect to" + ; (match sni with None -> "" | Some s -> sprintf "sni = %s" s) + ; sprintf "CAfile=%s" cert_bundle_path + ; ( match Sys.readdir crl_path with + | [||] -> + "" + | _ -> + sprintf "CRLpath=%s" crl_path + | exception _ -> + "" + ) + ] ) ; [""] ] From 36b9e1a72499d40223cbff746e2867ef57d0fff1 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 16 Dec 2024 10:30:55 +0000 Subject: [PATCH 09/50] Simplify event generation predicate in Xapi_event Precompute a table of object names for which events should be propagated. This avoids the list querying done every time the database queues an event. Signed-off-by: Colin James --- ocaml/xapi/xapi_event.ml | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/ocaml/xapi/xapi_event.ml b/ocaml/xapi/xapi_event.ml index 600d2859dd3..cdc82ca20d8 100644 --- a/ocaml/xapi/xapi_event.ml +++ b/ocaml/xapi/xapi_event.ml @@ -757,28 +757,27 @@ let inject ~__context ~_class ~_ref = (* Internal interface ****************************************************) -let event_add ?snapshot ty op reference = - let objs = - List.filter - (fun x -> x.Datamodel_types.gen_events) - (Dm_api.objects_of_api Datamodel.all_api) +let generate_events_for = + let table = Hashtbl.create 64 in + let add_object ({name; gen_events; _} : Datamodel_types.obj) = + (* Record only the names of objects that should generate events. *) + if gen_events then + Hashtbl.replace table name () in - let objs = List.map (fun x -> x.Datamodel_types.name) objs in - if List.mem ty objs then ( + Dm_api.objects_of_api Datamodel.all_api |> List.iter add_object ; + Hashtbl.mem table + +let event_add ?snapshot ty op reference = + let add () = + let id = Int64.to_string !Next.id in let ts = string_of_float (Unix.time ()) in + let ty = String.lowercase_ascii ty in let op = op_of_string op in - let ev = - { - id= Int64.to_string !Next.id - ; ts - ; ty= String.lowercase_ascii ty - ; op - ; reference - ; snapshot - } - in + let ev = {id; ts; ty; op; reference; snapshot} in From.add ev ; Next.add ev - ) + in + if generate_events_for ty then + add () let register_hooks () = Xapi_database.Db_action_helper.events_register event_add From ea6f35529e55e4a017d6f9047cc1697a667667a5 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 16 Dec 2024 12:25:44 +0000 Subject: [PATCH 10/50] Update comment to include implicit invariant Signed-off-by: Colin James --- ocaml/idl/datamodel.ml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ocaml/idl/datamodel.ml b/ocaml/idl/datamodel.ml index 83d5d1740c3..33eb339dfa1 100644 --- a/ocaml/idl/datamodel.ml +++ b/ocaml/idl/datamodel.ml @@ -10448,7 +10448,18 @@ let all_system = * updated must come first. The second field will be automatically * kept * up-to-date. *) -(** These are the pairs of (object, field) which are bound together in the database schema *) +(** + These are the pairs of (object, field) which are bound together in + the database schema. + + It is assumed that, for any entry (p, p'), neither p nor p' + appears in any other entry. It may be the case that p = p', which + is the only instance where some object-field pair may appear more + than once. + + This is implicitly assumed by other code which treats this list - + and its symmetric closure - as an association list + without duplicate keys. *) let all_relations = [ (* snapshots *) From a4e55f52d859be2a6935752980fe25c82d83ef7f Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 13 Dec 2024 12:03:58 +0000 Subject: [PATCH 11/50] IH-747 - xapi/import: Don't lie in VDI import logs Instead of logging errors like this and then immediately handling them: ``` [error|1141 |VM metadata import |import] Found no VDI with location = dedeeb44-62b3-460e-b55c-6de45ba10cc0: treating as fatal and abandoning import [debug|1141 |VM metadata import |import] Ignoring missing disk Ref:4 - this will be mirrored during a real live migration. ``` Log once in the handler: ``` [ warn|VM metadata import |import] Ignoring missing disk Ref:16 - this will be mirrored during a real live migration. (Suppressed error: 'Found no VDI with location = c208b47c-cf87-495f-bd3c-a4bc8167ef83') ``` Signed-off-by: Andrii Sultanov --- ocaml/xapi/import.ml | 50 +++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index 2a9d37b7972..39e069b2768 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -939,7 +939,7 @@ module VDI : HandlerTools = struct | Found_iso of API.ref_VDI | Found_no_iso | Found_disk of API.ref_VDI - | Found_no_disk of exn + | Found_no_disk of string * exn | Skip | Create of API.vDI_t @@ -1090,27 +1090,31 @@ module VDI : HandlerTools = struct | Some vdi -> Found_disk vdi | None -> - error "Found no VDI with location = %s: %s" - vdi_record.API.vDI_location - ( if config.force then - "ignoring error because '--force' is set" - else - "treating as fatal and abandoning import" - ) ; - if config.force then - Skip - else if exists vdi_record.API.vDI_SR state.table then + let error_string = + Printf.sprintf "Found no VDI with location = %s%s" + vdi_record.API.vDI_location + ( if config.force then + ": ignoring error because '--force' is set" + else + "" + ) + in + if config.force then ( + warn "%s" error_string ; Skip + ) else if exists vdi_record.API.vDI_SR state.table then let sr = lookup vdi_record.API.vDI_SR state.table in Found_no_disk - (Api_errors.Server_error - ( Api_errors.vdi_location_missing - , [Ref.string_of sr; vdi_record.API.vDI_location] - ) + ( error_string + , Api_errors.Server_error + ( Api_errors.vdi_location_missing + , [Ref.string_of sr; vdi_record.API.vDI_location] + ) ) else Found_no_disk - (Api_errors.Server_error - (Api_errors.vdi_content_id_missing, []) + ( error_string + , Api_errors.Server_error + (Api_errors.vdi_content_id_missing, []) ) ) ) @@ -1123,18 +1127,19 @@ module VDI : HandlerTools = struct state.table <- (x.cls, x.id, Ref.string_of vdi) :: state.table | Found_no_iso -> () (* VDI will be ejected. *) - | Found_no_disk e -> ( + | Found_no_disk (error_string, e) -> ( match config.import_type with | Metadata_import {live= true; _} -> (* We expect the disk to be missing during a live migration dry run. *) - debug + info "Ignoring missing disk %s - this will be mirrored during a real \ - live migration." - x.id ; + live migration. (Suppressed error: '%s')" + x.id error_string ; (* Create a dummy disk in the state table so the VBD import has a disk to look up. *) let dummy_vdi = Ref.make () in state.table <- (x.cls, x.id, Ref.string_of dummy_vdi) :: state.table | _ -> + error "%s - treating as fatal and abandoning import" error_string ; raise e ) | Skip -> @@ -1161,7 +1166,8 @@ module VDI : HandlerTools = struct with Not_found -> () ) Xapi_globs.vdi_other_config_sync_keys - | Found_no_disk e -> + | Found_no_disk (error_string, e) -> + error "%s - treating as fatal and abandoning import" error_string ; raise e | Create vdi_record -> (* Make a new VDI for streaming data into; adding task-id to sm-config on VDI.create so SM backend can see this is an import *) From cfc6594e1d2a70660a7800882b07892af63a3577 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 13 Dec 2024 15:41:53 +0000 Subject: [PATCH 12/50] IH-747 - database: Add an internal get_by_uuid_opt method Instead of raising an exception in case of an error like get_by_uuid, return None to be handled gracefully later. Do not expose it in the datamodel. This will later be used when an object is checked to exist before its creation (during migration, for example), and so its absence is expected - no need to raise a backtrace and pollute the logs with errors. Signed-off-by: Andrii Sultanov --- ocaml/database/db_cache_impl.ml | 15 +++++++++ ocaml/database/db_interface.ml | 5 +++ ocaml/database/db_rpc_client_v1.ml | 4 +++ ocaml/database/db_rpc_client_v2.ml | 7 +++++ ocaml/database/db_rpc_common_v1.ml | 2 ++ ocaml/database/db_rpc_common_v2.ml | 1 + ocaml/idl/ocaml_backend/gen_db_actions.ml | 37 ++++++++++++++++++++++- 7 files changed, 70 insertions(+), 1 deletion(-) diff --git a/ocaml/database/db_cache_impl.ml b/ocaml/database/db_cache_impl.ml index b4f23b0af00..4c4f33b728a 100644 --- a/ocaml/database/db_cache_impl.ml +++ b/ocaml/database/db_cache_impl.ml @@ -240,6 +240,21 @@ let db_get_by_uuid t tbl uuid_val = | _ -> raise (Too_many_values (tbl, "", uuid_val)) +let db_get_by_uuid_opt t tbl uuid_val = + match + read_field_where t + { + table= tbl + ; return= Db_names.ref + ; where_field= Db_names.uuid + ; where_value= uuid_val + } + with + | [r] -> + Some r + | _ -> + None + (** Return reference fields from tbl that matches specified name_label field *) let db_get_by_name_label t tbl label = read_field_where t diff --git a/ocaml/database/db_interface.ml b/ocaml/database/db_interface.ml index 834c12cd8a1..081abc687bd 100644 --- a/ocaml/database/db_interface.ml +++ b/ocaml/database/db_interface.ml @@ -56,6 +56,11 @@ module type DB_ACCESS = sig (** [db_get_by_uuid tbl uuid] returns the single object reference associated with [uuid] *) + val db_get_by_uuid_opt : Db_ref.t -> string -> string -> string option + (** [db_get_by_uuid_opt tbl uuid] returns [Some obj] with the single object + reference associated with [uuid] if one exists and [None] otherwise, + instead of raising an exception like [get_by_uuid] *) + val db_get_by_name_label : Db_ref.t -> string -> string -> string list (** [db_get_by_name_label tbl label] returns the list of object references associated with [label] *) diff --git a/ocaml/database/db_rpc_client_v1.ml b/ocaml/database/db_rpc_client_v1.ml index ecde5c4060b..7adbcd6bbed 100644 --- a/ocaml/database/db_rpc_client_v1.ml +++ b/ocaml/database/db_rpc_client_v1.ml @@ -88,6 +88,10 @@ functor do_remote_call marshall_db_get_by_uuid_args unmarshall_db_get_by_uuid_response "db_get_by_uuid" (t, u) + let db_get_by_uuid_opt _ t u = + do_remote_call marshall_db_get_by_uuid_args + unmarshall_db_get_by_uuid_opt_response "db_get_by_uuid_opt" (t, u) + let db_get_by_name_label _ t l = do_remote_call marshall_db_get_by_name_label_args unmarshall_db_get_by_name_label_response "db_get_by_name_label" (t, l) diff --git a/ocaml/database/db_rpc_client_v2.ml b/ocaml/database/db_rpc_client_v2.ml index 3c85dd82fcf..3a32b3149e9 100644 --- a/ocaml/database/db_rpc_client_v2.ml +++ b/ocaml/database/db_rpc_client_v2.ml @@ -77,6 +77,13 @@ functor | _ -> raise Remote_db_server_returned_bad_message + let db_get_by_uuid_opt _ t u = + match process (Request.Db_get_by_uuid (t, u)) with + | Response.Db_get_by_uuid_opt y -> + y + | _ -> + raise Remote_db_server_returned_bad_message + let db_get_by_name_label _ t l = match process (Request.Db_get_by_name_label (t, l)) with | Response.Db_get_by_name_label y -> diff --git a/ocaml/database/db_rpc_common_v1.ml b/ocaml/database/db_rpc_common_v1.ml index 1966595938f..cced73dd9ca 100644 --- a/ocaml/database/db_rpc_common_v1.ml +++ b/ocaml/database/db_rpc_common_v1.ml @@ -194,6 +194,8 @@ let marshall_db_get_by_uuid_response s = XMLRPC.To.string s let unmarshall_db_get_by_uuid_response xml = XMLRPC.From.string xml +let unmarshall_db_get_by_uuid_opt_response xml = unmarshall_stringopt xml + (* db_get_by_name_label *) let marshall_db_get_by_name_label_args (s1, s2) = marshall_2strings (s1, s2) diff --git a/ocaml/database/db_rpc_common_v2.ml b/ocaml/database/db_rpc_common_v2.ml index 5ecf1b3e797..4cd9d7541ab 100644 --- a/ocaml/database/db_rpc_common_v2.ml +++ b/ocaml/database/db_rpc_common_v2.ml @@ -59,6 +59,7 @@ module Response = struct | Find_refs_with_filter of string list | Read_field_where of string list | Db_get_by_uuid of string + | Db_get_by_uuid_opt of string option | Db_get_by_name_label of string list | Create_row of unit | Delete_row of unit diff --git a/ocaml/idl/ocaml_backend/gen_db_actions.ml b/ocaml/idl/ocaml_backend/gen_db_actions.ml index 06f54f228ba..86e9f426883 100644 --- a/ocaml/idl/ocaml_backend/gen_db_actions.ml +++ b/ocaml/idl/ocaml_backend/gen_db_actions.ml @@ -93,6 +93,9 @@ let dm_to_string tys : O.Module.t = "fun x -> x |> SecretString.rpc_of_t |> Rpc.string_of_rpc" | DT.Record _ -> failwith "record types never stored in the database" + | DT.Option (DT.Ref _ as ty) -> + String.concat "" + ["fun s -> set "; OU.alias_of_ty ty; "(Option.to_list s)"] | DT.Option _ -> failwith "option types never stored in the database" in @@ -148,6 +151,13 @@ let string_to_dm tys : O.Module.t = "SecretString.of_string" | DT.Record _ -> failwith "record types never stored in the database" + | DT.Option (DT.Ref _ as ty) -> + String.concat "" + [ + "fun s -> match set " + ; OU.alias_of_ty ty + ; " s with [] -> None | x::_ -> Some x" + ] | DT.Option _ -> failwith "option types never stored in the database" in @@ -515,7 +525,32 @@ let db_action api : O.Module.t = (Escaping.escape_obj obj.DT.name) (OU.escape name) in - _string_to_dm ^ "." ^ OU.alias_of_ty result_ty ^ " (" ^ query ^ ")" + let func = + _string_to_dm + ^ "." + ^ OU.alias_of_ty result_ty + ^ " (" + ^ query + ^ ")" + in + let query_opt = + Printf.sprintf "DB.db_get_by_uuid_opt __t \"%s\" %s" + (Escaping.escape_obj obj.DT.name) + (OU.escape name) + in + String.concat "\n\t\t" + ([func] + @ [ + String.concat "\n\t\t " + (["and get_by_uuid_opt ~__context ~uuid ="] + @ open_db_module + @ [ + Printf.sprintf "Option.map %s.%s (%s)" _string_to_dm + (OU.alias_of_ty result_ty) query_opt + ] + ) + ] + ) | _ -> failwith "GetByUuid call should have only one parameter and a result!" From 81c2a6af0b4965e50ebb0e34850b402a7ed4c487 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Fri, 13 Dec 2024 12:06:40 +0000 Subject: [PATCH 13/50] IH-747 - xapi/import: Use get_by_uuid_opt to not log backtraces when failure is expected Migration logs are always full of exceptions that are expected and immediately handled: ``` [error|backtrace] SR.get_by_uuid D:8651cc0c9fb6 failed with exception Db_exn.Read_missing_uuid("SR", "", "a94bf103-0169-6d70-8874-334261f5098e") [error|backtrace] Raised Db_exn.Read_missing_uuid("SR", "", "a94bf103-0169-6d70-8874-334261f5098e") [error|backtrace] 1/9 xapi Raised at file ocaml/database/db_cache_impl.ml, line 237 [error|backtrace] 2/9 xapi Called from file ocaml/xapi/db_actions.ml, line 13309 [error|backtrace] 3/9 xapi Called from file ocaml/xapi/rbac.ml, line 188 [error|backtrace] 4/9 xapi Called from file ocaml/xapi/rbac.ml, line 197 [error|backtrace] 5/9 xapi Called from file ocaml/xapi/server_helpers.ml, line 74 [error|backtrace] 6/9 xapi Called from file ocaml/xapi/server_helpers.ml, line 96 [error|backtrace] 7/9 xapi Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 24 [error|backtrace] 8/9 xapi Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 39 [error|backtrace] 9/9 xapi Called from file ocaml/libs/log/debug.ml, line 250 [ warn|import] Failed to find SR with UUID: a94bf103-0169-6d70-8874-334261f5098e content-type: user - will still try to find individual VDIs [....] [debug|import] Importing 1 VM_guest_metrics(s) [debug|import] Importing 1 VM_metrics(s) [debug|import] Importing 1 VM(s) [debug|import] Importing 1 network(s) [debug|import] Importing 0 GPU_group(s) [debug|import] Importing 1 VBD(s) [error|backtrace] VBD.get_by_uuid D:3a12311e8be4 failed with exception Db_exn.Read_missing_uuid("VBD", "", "026d61e9-ed8a-fc72-7fd3-77422585baff") [error|backtrace] Raised Db_exn.Read_missing_uuid("VBD", "", "026d61e9-ed8a-fc72-7fd3-77422585baff") [error|backtrace] 1/9 xapi Raised at file ocaml/database/db_cache_impl.ml, line 237 [error|backtrace] 2/9 xapi Called from file ocaml/xapi/db_actions.ml, line 14485 [error|backtrace] 3/9 xapi Called from file ocaml/xapi/rbac.ml, line 188 [error|backtrace] 4/9 xapi Called from file ocaml/xapi/rbac.ml, line 197 [error|backtrace] 5/9 xapi Called from file ocaml/xapi/server_helpers.ml, line 74 [error|backtrace] 6/9 xapi Called from file ocaml/xapi/server_helpers.ml, line 96 [error|backtrace] 7/9 xapi Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 24 [error|backtrace] 8/9 xapi Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 39 [error|backtrace] 9/9 xapi Called from file ocaml/libs/log/debug.ml, line 250 [debug|import] Importing 1 VIF(s) [error|backtrace] VIF.get_by_uuid D:2bc78449e0bc failed with exception Db_exn.Read_missing_uuid("VIF", "", "7d14aee4-47a4-e271-4f64-fe9f9ef6d50b") [error|backtrace] Raised Db_exn.Read_missing_uuid("VIF", "", "7d14aee4-47a4-e271-4f64-fe9f9ef6d50b") [error|backtrace] 1/9 xapi Raised at file ocaml/database/db_cache_impl.ml, line 237 [error|backtrace] 2/9 xapi Called from file ocaml/xapi/db_actions.ml, line 10813 [error|backtrace] 3/9 xapi Called from file ocaml/xapi/rbac.ml, line 188 [error|backtrace] 4/9 xapi Called from file ocaml/xapi/rbac.ml, line 197 [error|backtrace] 5/9 xapi Called from file ocaml/xapi/server_helpers.ml, line 74 [error|backtrace] 6/9 xapi Called from file ocaml/xapi/server_helpers.ml, line 96 [error|backtrace] 7/9 xapi Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 24 [error|backtrace] 8/9 xapi Called from file ocaml/libs/xapi-stdext/lib/xapi-stdext-pervasives/pervasiveext.ml, line 39 [error|backtrace] 9/9 xapi Called from file ocaml/libs/log/debug.ml, line 250 ``` Use an internal get_by_uuid_opt call and match on the Option instead, with the logs looking much clearer: ``` [debug|import] Importing 1 host(s) [debug|import] Importing 2 SR(s) [ warn|import] Failed to find SR with UUID: 8568e308-c61c-3b10-3953-45606cfecede content-type: - will still try to find individual VDIs [ warn|import] Failed to find SR with UUID: 40e9e252-46ac-ed3d-7a4c-6db175212195 content-type: user - will still try to find individual VDIs [...] [debug|import] Importing 2 VM_guest_metrics(s) [debug|import] Importing 2 VM(s) [debug|import] Importing 1 network(s) [debug|import] Importing 1 GPU_group(s) [debug|import] Importing 4 VBD(s) [ info|import] Did not find an already existing VBD with the same uuid=569d0e60-6a89-d1fa-2ed6-38b8eebe9065, try to create a new one [ info|import] Did not find an already existing VBD with the same uuid=533306da-cff1-7ada-71f7-2c4de8a0065b, try to create a new one [ info|import] Did not find an already existing VBD with the same uuid=f9dec620-0180-f67f-6711-7f9e5222a682, try to create a new one [ info|import] Did not find an already existing VBD with the same uuid=05e55076-b559-9b49-c247-e7850984ddae, try to create a new one [debug|import] Importing 2 VIF(s) [ info|import] Did not find an already existing VIF with the same uuid=a5a731d5-622c-5ca5-5b2a-a0053a11ef07, try to create a new one [ info|import] Did not find an already existing VIF with the same uuid=1738bf20-8d16-0d69-48cd-8f3d9e7ea791, try to create a new one ``` Signed-off-by: Andrii Sultanov --- ocaml/xapi/import.ml | 481 +++++++++++++++++++++++-------------------- 1 file changed, 262 insertions(+), 219 deletions(-) diff --git a/ocaml/xapi/import.ml b/ocaml/xapi/import.ml index 39e069b2768..3dd311d72e9 100644 --- a/ocaml/xapi/import.ml +++ b/ocaml/xapi/import.ml @@ -887,27 +887,31 @@ module SR : HandlerTools = struct | Will_use_SR of API.ref_SR | SR_not_needed - let precheck __context config rpc session_id _state x = + let precheck __context config _rpc _session_id _state x = let sr_record = API.sR_t_of_rpc x.snapshot in match config.import_type with | Metadata_import _ -> ( - try + match (* Look up the existing SR record *) - let sr = - Client.SR.get_by_uuid ~rpc ~session_id ~uuid:sr_record.API.sR_uuid - in - Found_SR sr - with _ -> - let msg = - match sr_record.API.sR_content_type with - | "iso" -> - "- will eject disk" (* Will be handled specially in handle_vdi *) - | _ -> - "- will still try to find individual VDIs" - in - warn "Failed to find SR with UUID: %s content-type: %s %s" - sr_record.API.sR_uuid sr_record.API.sR_content_type msg ; - Found_no_SR + (* Use an internal DB call - this avoids raising an exception and logging + the backtrace internally in case of a (reasonably expected) absence of + the object with this UUID *) + Db.SR.get_by_uuid_opt ~__context ~uuid:sr_record.API.sR_uuid + with + | Some sr -> + Found_SR sr + | None -> + let msg = + match sr_record.API.sR_content_type with + | "iso" -> + "- will eject disk" + (* Will be handled specially in handle_vdi *) + | _ -> + "- will still try to find individual VDIs" + in + warn "Failed to find SR with UUID: %s content-type: %s %s" + sr_record.API.sR_uuid sr_record.API.sR_content_type msg ; + Found_no_SR ) | Full_import sr -> if sr_record.API.sR_content_type = "iso" then @@ -1365,80 +1369,94 @@ end module VBD : HandlerTools = struct type precheck_t = Found_VBD of API.ref_VBD | Skip | Create of API.vBD_t - let precheck __context config rpc session_id state x = + let precheck __context config _rpc _session_id state x = let vbd_record = API.vBD_t_of_rpc x.snapshot in let get_vbd () = - Client.VBD.get_by_uuid ~rpc ~session_id ~uuid:vbd_record.API.vBD_uuid + (* Use an internal DB call - this avoids raising an exception and logging + the backtrace internally in case of a (reasonably expected) absence of + the object with this UUID *) + Db.VBD.get_by_uuid_opt ~__context ~uuid:vbd_record.API.vBD_uuid in - let vbd_exists () = - try - ignore (get_vbd ()) ; - true - with _ -> false + let vbd_opt = + (* If there's already a VBD with the same UUID and we're preserving UUIDs, use that one. *) + if config.full_restore then ( + match get_vbd () with + | Some x -> + Some x + | None -> + info + "Did not find an already existing VBD with the same uuid=%s, try \ + to create a new one" + vbd_record.API.vBD_uuid ; + None + ) else + None in - if config.full_restore && vbd_exists () then - let vbd = get_vbd () in - Found_VBD vbd - else - let vm = - log_reraise - ("Failed to find VBD's VM: " ^ Ref.string_of vbd_record.API.vBD_VM) - (lookup vbd_record.API.vBD_VM) - state.table - in - (* If the VBD is supposed to be attached to a PV guest (which doesn't support - currently_attached empty drives) then throw a fatal error. *) - let original_vm = - get_vm_record - (find_in_export (Ref.string_of vbd_record.API.vBD_VM) state.export) - in - (* Note: the following is potentially inaccurate: the find out whether a running or - * suspended VM has booted HVM, we must consult the VM metrics, but those aren't - * available in the exported metadata. *) - let has_qemu = Helpers.will_have_qemu_from_record original_vm in - (* In the case of dry_run live migration, don't check for - missing disks as CDs will be ejected before the real migration. *) - let dry_run, live = - match config.import_type with - | Metadata_import {dry_run; live; _} -> - (dry_run, live) - | _ -> - (false, false) - in - ( if - vbd_record.API.vBD_currently_attached - && not (exists vbd_record.API.vBD_VDI state.table) - then - (* It's only ok if it's a CDROM attached to an HVM guest, or it's part of SXM and we know the sender would eject it. *) - let will_eject = - dry_run && live && original_vm.API.vM_power_state <> `Suspended - in - if not (vbd_record.API.vBD_type = `CD && (has_qemu || will_eject)) + match vbd_opt with + | Some vbd -> + Found_VBD vbd + | None -> ( + let vm = + log_reraise + ("Failed to find VBD's VM: " ^ Ref.string_of vbd_record.API.vBD_VM) + (lookup vbd_record.API.vBD_VM) + state.table + in + (* If the VBD is supposed to be attached to a PV guest (which doesn't support + currently_attached empty drives) then throw a fatal error. *) + let original_vm = + get_vm_record + (find_in_export (Ref.string_of vbd_record.API.vBD_VM) state.export) + in + (* Note: the following is potentially inaccurate: the find out whether a running or + * suspended VM has booted HVM, we must consult the VM metrics, but those aren't + * available in the exported metadata. *) + let has_qemu = Helpers.will_have_qemu_from_record original_vm in + (* In the case of dry_run live migration, don't check for + missing disks as CDs will be ejected before the real migration. *) + let dry_run, live = + match config.import_type with + | Metadata_import {dry_run; live; _} -> + (dry_run, live) + | _ -> + (false, false) + in + ( if + vbd_record.API.vBD_currently_attached + && not (exists vbd_record.API.vBD_VDI state.table) then - raise (IFailure Attached_disks_not_found) - ) ; - let vbd_record = {vbd_record with API.vBD_VM= vm} in - match - (vbd_record.API.vBD_type, exists vbd_record.API.vBD_VDI state.table) - with - | `CD, false | `Floppy, false -> - if has_qemu || original_vm.API.vM_power_state <> `Suspended then - Create {vbd_record with API.vBD_VDI= Ref.null; API.vBD_empty= true} - (* eject *) - else - Create vbd_record - | `Disk, false -> - (* omit: cannot have empty disks *) - warn - "Cannot import VM's disk: was it an .iso attached as a disk rather \ - than CD?" ; - Skip - | _, true -> - Create - { - vbd_record with - API.vBD_VDI= lookup vbd_record.API.vBD_VDI state.table - } + (* It's only ok if it's a CDROM attached to an HVM guest, or it's part of SXM and we know the sender would eject it. *) + let will_eject = + dry_run && live && original_vm.API.vM_power_state <> `Suspended + in + if not (vbd_record.API.vBD_type = `CD && (has_qemu || will_eject)) + then + raise (IFailure Attached_disks_not_found) + ) ; + let vbd_record = {vbd_record with API.vBD_VM= vm} in + match + (vbd_record.API.vBD_type, exists vbd_record.API.vBD_VDI state.table) + with + | `CD, false | `Floppy, false -> + if has_qemu || original_vm.API.vM_power_state <> `Suspended then + Create + {vbd_record with API.vBD_VDI= Ref.null; API.vBD_empty= true} + (* eject *) + else + Create vbd_record + | `Disk, false -> + (* omit: cannot have empty disks *) + warn + "Cannot import VM's disk: was it an .iso attached as a disk \ + rather than CD?" ; + Skip + | _, true -> + Create + { + vbd_record with + API.vBD_VDI= lookup vbd_record.API.vBD_VDI state.table + } + ) let handle_dry_run __context _config _rpc _session_id state x precheck_result = @@ -1493,88 +1511,99 @@ end module VIF : HandlerTools = struct type precheck_t = Found_VIF of API.ref_VIF | Create of API.vIF_t - let precheck __context config rpc session_id state x = + let precheck __context config _rpc _session_id state x = let vif_record = API.vIF_t_of_rpc x.snapshot in let get_vif () = - Client.VIF.get_by_uuid ~rpc ~session_id ~uuid:vif_record.API.vIF_uuid + (* Use an internal DB call - this avoids raising an exception and logging + the backtrace internally in case of a (reasonably expected) absence of + the object with this UUID *) + Db.VIF.get_by_uuid_opt ~__context ~uuid:vif_record.API.vIF_uuid in - let vif_exists () = - try - ignore (get_vif ()) ; - true - with _ -> false + let vif_opt = + if config.full_restore then ( + (* If there's already a VIF with the same UUID and we're preserving UUIDs, use that one. *) + match get_vif () with + | Some x -> + Some x + | None -> + info + "Did not find an already existing VIF with the same uuid=%s, try \ + to create a new one" + vif_record.API.vIF_uuid ; + None + ) else + None in - if config.full_restore && vif_exists () then - (* If there's already a VIF with the same UUID and we're preserving UUIDs, use that one. *) - let vif = get_vif () in - Found_VIF vif - else - (* If not restoring a full backup then blank the MAC so it is regenerated *) - let vif_record = - { - vif_record with - API.vIF_MAC= - (if config.full_restore then vif_record.API.vIF_MAC else "") - } - in - (* Determine the VM to which we're going to attach this VIF. *) - let vm = - log_reraise - ("Failed to find VIF's VM: " ^ Ref.string_of vif_record.API.vIF_VM) - (lookup vif_record.API.vIF_VM) - state.table - in - (* Determine the network to which we're going to attach this VIF. *) - let net = - (* If we find the cross-pool migration key, attach the VIF to that network... *) - if - List.mem_assoc Constants.storage_migrate_vif_map_key - vif_record.API.vIF_other_config - then - Ref.of_string - (List.assoc Constants.storage_migrate_vif_map_key - vif_record.API.vIF_other_config - ) - else - (* ...otherwise fall back to looking up the network from the state table. *) + match vif_opt with + | Some vif -> + Found_VIF vif + | None -> + (* If not restoring a full backup then blank the MAC so it is regenerated *) + let vif_record = + { + vif_record with + API.vIF_MAC= + (if config.full_restore then vif_record.API.vIF_MAC else "") + } + in + (* Determine the VM to which we're going to attach this VIF. *) + let vm = log_reraise - ("Failed to find VIF's Network: " - ^ Ref.string_of vif_record.API.vIF_network - ) - (lookup vif_record.API.vIF_network) + ("Failed to find VIF's VM: " ^ Ref.string_of vif_record.API.vIF_VM) + (lookup vif_record.API.vIF_VM) state.table - in - (* Make sure we remove the cross-pool migration VIF mapping key from the other_config - * before creating a VIF - otherwise we'll risk sending this key on to another pool - * during a future cross-pool migration and it won't make sense. *) - let other_config = - List.filter - (fun (k, _) -> k <> Constants.storage_migrate_vif_map_key) - vif_record.API.vIF_other_config - in - (* Construct the VIF record we're going to try to create locally. *) - let vif_record = - if Pool_features.is_enabled ~__context Features.VIF_locking then - vif_record - else if vif_record.API.vIF_locking_mode = `locked then + in + (* Determine the network to which we're going to attach this VIF. *) + let net = + (* If we find the cross-pool migration key, attach the VIF to that network... *) + if + List.mem_assoc Constants.storage_migrate_vif_map_key + vif_record.API.vIF_other_config + then + Ref.of_string + (List.assoc Constants.storage_migrate_vif_map_key + vif_record.API.vIF_other_config + ) + else + (* ...otherwise fall back to looking up the network from the state table. *) + log_reraise + ("Failed to find VIF's Network: " + ^ Ref.string_of vif_record.API.vIF_network + ) + (lookup vif_record.API.vIF_network) + state.table + in + (* Make sure we remove the cross-pool migration VIF mapping key from the other_config + * before creating a VIF - otherwise we'll risk sending this key on to another pool + * during a future cross-pool migration and it won't make sense. *) + let other_config = + List.filter + (fun (k, _) -> k <> Constants.storage_migrate_vif_map_key) + vif_record.API.vIF_other_config + in + (* Construct the VIF record we're going to try to create locally. *) + let vif_record = + if Pool_features.is_enabled ~__context Features.VIF_locking then + vif_record + else if vif_record.API.vIF_locking_mode = `locked then + { + vif_record with + API.vIF_locking_mode= `network_default + ; API.vIF_ipv4_allowed= [] + ; API.vIF_ipv6_allowed= [] + } + else + {vif_record with API.vIF_ipv4_allowed= []; API.vIF_ipv6_allowed= []} + in + let vif_record = { vif_record with - API.vIF_locking_mode= `network_default - ; API.vIF_ipv4_allowed= [] - ; API.vIF_ipv6_allowed= [] + API.vIF_VM= vm + ; API.vIF_network= net + ; API.vIF_other_config= other_config } - else - {vif_record with API.vIF_ipv4_allowed= []; API.vIF_ipv6_allowed= []} - in - let vif_record = - { - vif_record with - API.vIF_VM= vm - ; API.vIF_network= net - ; API.vIF_other_config= other_config - } - in - Create vif_record + in + Create vif_record let handle_dry_run __context _config _rpc _session_id state x precheck_result = @@ -1710,74 +1739,88 @@ end module VGPU : HandlerTools = struct type precheck_t = Found_VGPU of API.ref_VGPU | Create of API.vGPU_t - let precheck __context config rpc session_id state x = + let precheck __context config _rpc _session_id state x = let vgpu_record = API.vGPU_t_of_rpc x.snapshot in let get_vgpu () = - Client.VGPU.get_by_uuid ~rpc ~session_id ~uuid:vgpu_record.API.vGPU_uuid + (* Use an internal DB call - this avoids raising an exception and logging + the backtrace internally in case of a (reasonably expected) absence of + the object with this UUID *) + Db.VGPU.get_by_uuid_opt ~__context ~uuid:vgpu_record.API.vGPU_uuid in - let vgpu_exists () = - try - ignore (get_vgpu ()) ; - true - with _ -> false + let vgpu_opt = + if config.full_restore then ( + (* If there's already a VGPU with the same UUID and we're preserving UUIDs, use that one. *) + match get_vgpu () with + | Some x -> + Some x + | None -> + info + "Did not find an already existing VGPU with the same uuid=%s, \ + try to create a new one" + vgpu_record.API.vGPU_uuid ; + None + ) else + None in - if config.full_restore && vgpu_exists () then - let vgpu = get_vgpu () in - Found_VGPU vgpu - else - let vm = - log_reraise - ("Failed to find VGPU's VM: " ^ Ref.string_of vgpu_record.API.vGPU_VM) - (lookup vgpu_record.API.vGPU_VM) - state.table - in - let group = - (* If we find the cross-pool migration key, attach the vgpu to the provided gpu_group... *) - if - List.mem_assoc Constants.storage_migrate_vgpu_map_key - vgpu_record.API.vGPU_other_config - then - Ref.of_string - (List.assoc Constants.storage_migrate_vgpu_map_key - vgpu_record.API.vGPU_other_config + match vgpu_opt with + | Some vgpu -> + Found_VGPU vgpu + | None -> + let vm = + log_reraise + ("Failed to find VGPU's VM: " + ^ Ref.string_of vgpu_record.API.vGPU_VM ) - else - (* ...otherwise fall back to looking up the vgpu from the state table. *) + (lookup vgpu_record.API.vGPU_VM) + state.table + in + let group = + (* If we find the cross-pool migration key, attach the vgpu to the provided gpu_group... *) + if + List.mem_assoc Constants.storage_migrate_vgpu_map_key + vgpu_record.API.vGPU_other_config + then + Ref.of_string + (List.assoc Constants.storage_migrate_vgpu_map_key + vgpu_record.API.vGPU_other_config + ) + else + (* ...otherwise fall back to looking up the vgpu from the state table. *) + log_reraise + ("Failed to find VGPU's GPU group: " + ^ Ref.string_of vgpu_record.API.vGPU_GPU_group + ) + (lookup vgpu_record.API.vGPU_GPU_group) + state.table + in + let _type = log_reraise - ("Failed to find VGPU's GPU group: " - ^ Ref.string_of vgpu_record.API.vGPU_GPU_group + ("Failed to find VGPU's type: " + ^ Ref.string_of vgpu_record.API.vGPU_type ) - (lookup vgpu_record.API.vGPU_GPU_group) + (lookup vgpu_record.API.vGPU_type) state.table - in - let _type = - log_reraise - ("Failed to find VGPU's type: " - ^ Ref.string_of vgpu_record.API.vGPU_type - ) - (lookup vgpu_record.API.vGPU_type) - state.table - in - (* Make sure we remove the cross-pool migration VGPU mapping key from the other_config - * before creating a VGPU - otherwise we'll risk sending this key on to another pool - * during a future cross-pool migration and it won't make sense. *) - let other_config = - List.filter - (fun (k, _) -> k <> Constants.storage_migrate_vgpu_map_key) - vgpu_record.API.vGPU_other_config - in - let vgpu_record = - { - vgpu_record with - API.vGPU_VM= vm - ; API.vGPU_GPU_group= group - ; API.vGPU_type= _type - ; API.vGPU_other_config= other_config - } - in - if is_live config then - assert_can_live_import_vgpu ~__context vgpu_record ; - Create vgpu_record + in + (* Make sure we remove the cross-pool migration VGPU mapping key from the other_config + * before creating a VGPU - otherwise we'll risk sending this key on to another pool + * during a future cross-pool migration and it won't make sense. *) + let other_config = + List.filter + (fun (k, _) -> k <> Constants.storage_migrate_vgpu_map_key) + vgpu_record.API.vGPU_other_config + in + let vgpu_record = + { + vgpu_record with + API.vGPU_VM= vm + ; API.vGPU_GPU_group= group + ; API.vGPU_type= _type + ; API.vGPU_other_config= other_config + } + in + if is_live config then + assert_can_live_import_vgpu ~__context vgpu_record ; + Create vgpu_record let handle_dry_run __context _config _rpc _session_id state x precheck_result = From c8af62d8e675d4471b1b7983925088fe8d768de0 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 16 Dec 2024 14:30:03 +0000 Subject: [PATCH 14/50] Simplify Eventgen Signed-off-by: Colin James --- ocaml/xapi/eventgen.ml | 380 ++++++++++++++++++----------------------- 1 file changed, 166 insertions(+), 214 deletions(-) diff --git a/ocaml/xapi/eventgen.ml b/ocaml/xapi/eventgen.ml index 46ffd833866..b19be7b33e1 100644 --- a/ocaml/xapi/eventgen.ml +++ b/ocaml/xapi/eventgen.ml @@ -11,248 +11,200 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. *) -module D = Debug.Make (struct let name = "sql" end) +open Debug.Make (struct let name = "sql" end) -open D - -type getrecord = unit -> Rpc.t +type get_record = unit -> Rpc.t let get_record_table : - (string, __context:Context.t -> self:string -> getrecord) Hashtbl.t = - Hashtbl.create 20 + (string, __context:Context.t -> self:string -> get_record) Hashtbl.t = + Hashtbl.create 64 -let find_get_record x ~__context ~self () : Rpc.t option = +let find_get_record obj_name ~__context ~self () : Rpc.t option = Option.map - (fun x -> x ~__context ~self ()) - (Hashtbl.find_opt get_record_table x) + (fun f -> f ~__context ~self ()) + (Hashtbl.find_opt get_record_table obj_name) + +(* If a record is modified, events must be emitted for related objects' records. + We collect a list of related objects by querying the (Ref _)-typed + fields of the input object against the relations encoded by the datamodel. -(* If a record is created or destroyed, then - for any (Ref _) field which is one end of a relationship, need to send - modified events for all those other objects. *) -(* we build a hashtable of these references and then look them up by object on each db write: *) + The result of this function is a list of pairs [(object, field);, ...]. + Note that the field component refers to a field in the input object, + not the related object. *) let compute_object_references_to_follow (obj_name : string) = + let module DT = Datamodel_types in let api = Datamodel.all_api in - let objs = Dm_api.objects_of_api api in - let obj = List.find (fun obj -> obj.Datamodel_types.name = obj_name) objs in - let relations = Dm_api.relations_of_api api in - let symmetric = List.concat_map (fun (a, b) -> [(a, b); (b, a)]) relations in - let set = Xapi_stdext_std.Listext.List.setify symmetric in - List.concat_map - (function - | { - Datamodel_types.ty= Datamodel_types.Ref _ - ; Datamodel_types.field_name - ; _ - } -> - let this_end = (obj.Datamodel_types.name, field_name) in - if List.mem_assoc this_end set then - let other_end = List.assoc this_end set in - let other_obj = fst other_end in - [(other_obj, field_name)] - else - [] - | _ -> - [] - ) - (Datamodel_utils.fields_of_obj obj) + let obj = Dm_api.get_obj_by_name api ~objname:obj_name in + let symmetric = + (* Symmetric closure of the field relation set. *) + Dm_api.relations_of_api api + |> List.concat_map (fun (a, b) -> [(a, b); (b, a)]) + in + (* Find an object related to the input field using the datamodel. *) + let find_related_object = function + | DT.{field_name; ty= Ref _; _} -> + let this_end = (obj_name, field_name) in + List.assoc_opt this_end symmetric + |> Option.map (fun (other_object, _) -> (other_object, field_name)) + | _ -> + None + in + let fields = Datamodel_utils.fields_of_obj obj in + List.filter_map find_related_object fields -let obj_references_table : (string, (string * string) list) Hashtbl.t = - Hashtbl.create 30 +(* For each object, precompute a list of related objects [(object, + field); ...] and store it in a hash table. -(* populate obj references table *) -let _ = - List.iter - (fun obj -> - let obj_name = obj.Datamodel_types.name in - Hashtbl.replace obj_references_table obj_name - (compute_object_references_to_follow obj_name) - ) - (Dm_api.objects_of_api Datamodel.all_api) + If looking up an entry for some object, "foo", yields a list + containing ("bar", "baz"), then it must be the case that "foo"'s + field "baz" (which is necessarily of type Ref _) is related to some + field within "bar". In which case, the database will have already + updated the related field(s) and we must emit events for those fields. *) +let obj_references_table : (string, (string * string) list) Hashtbl.t = + let table = Hashtbl.create 64 in + let populate_follows (obj : Datamodel_types.obj) = + let follows = compute_object_references_to_follow obj.name in + Hashtbl.replace table obj.name follows + in + Dm_api.objects_of_api Datamodel.all_api |> List.iter populate_follows ; + table let follow_references (obj_name : string) = Hashtbl.find obj_references_table obj_name -(** Compute a set of modify events but skip any for objects which were missing - (must have been dangling references) *) -let events_of_other_tbl_refs other_tbl_refs = - List.concat_map - (fun (tbl, fld, x) -> - try [(tbl, fld, x ())] - with _ -> - (* Probably means the reference was dangling *) - warn "skipping event for dangling reference %s: %s" tbl fld ; - [] - ) - other_tbl_refs +(* Compute a modify event's snapshot by attemping to invoke its + getter. If the record cannot be found, the event is dropped (as the + reference is probably dangling). *) +let snapshots_of_other_tbl_refs other_tbl_refs = + let try_get_records (table, field, getter) = + try Some (table, field, getter ()) + with _ -> + (* Probably means the reference was dangling *) + warn "%s: skipping event for dangling reference %s: %s" __FUNCTION__ table + field ; + None + in + List.filter_map try_get_records other_tbl_refs open Xapi_database.Db_cache_types open Xapi_database.Db_action_helper -let database_callback_inner event db context = +let is_valid_ref db = function + | Schema.Value.String r -> ( + try + ignore (Database.table_of_ref r db) ; + true + with _ -> false + ) + | _ -> + false + +type event_kind = Modify | Delete | Add + +let strings_of_event_kind = function + | Modify -> + ("mod", "MOD") + | Delete -> + ("del", "DEL") + | Add -> + ("add", "ADD") + +let emit_events ~kind events = + let kind, upper = strings_of_event_kind kind in + let emit = function + | tbl, ref, None -> + error "%s: Failed to generate %s event on %s %s" __FUNCTION__ upper tbl + ref + | tbl, ref, Some snapshot -> + events_notify ~snapshot tbl kind ref + in + List.iter emit events + +let database_callback_inner event db ~__context = let other_tbl_refs tblname = follow_references tblname in let other_tbl_refs_for_this_field tblname fldname = List.filter (fun (_, fld) -> fld = fldname) (other_tbl_refs tblname) in - let is_valid_ref = function - | Schema.Value.String r -> ( - try - ignore (Database.table_of_ref r db) ; - true - with _ -> false - ) - | _ -> - false + let compute_other_table_events table kvs = + (* Given a table and a deleted/new row's key-values, compute event + snapshots for all objects potentially referenced by values in the + row. *) + let get_potential_event (other_tbl, field) = + (* If a deleted/new field could refer to a row within + other_tbl, collect a potential event for it. *) + let field_value = List.assoc field kvs in + if is_valid_ref db field_value then + let self = Schema.Value.Unsafe_cast.string field_value in + let getter = find_get_record other_tbl ~__context ~self in + Some (other_tbl, self, getter) + else + None + in + follow_references table + |> List.filter_map get_potential_event + |> snapshots_of_other_tbl_refs in match event with - | RefreshRow (tblname, objref) -> ( - (* Generate event *) - let snapshot = find_get_record tblname ~__context:context ~self:objref in - let record = snapshot () in - match record with - | None -> - error "Failed to send MOD event for %s %s" tblname objref ; - Printf.printf "Failed to send MOD event for %s %s\n%!" tblname objref - | Some record -> - events_notify ~snapshot:record tblname "mod" objref - ) + | RefreshRow (tblname, objref) -> + (* To refresh a row, emit a modify event with the current record's + snapshot. *) + let getter = find_get_record tblname ~__context ~self:objref in + let snapshot = getter () in + emit_events ~kind:Modify [(tblname, objref, snapshot)] | WriteField (tblname, objref, fldname, oldval, newval) -> - let events_old_val = - if is_valid_ref oldval then - let oldval = Schema.Value.Unsafe_cast.string oldval in - events_of_other_tbl_refs - (List.map - (fun (tbl, _) -> - ( tbl - , oldval - , find_get_record tbl ~__context:context ~self:oldval - ) - ) - (other_tbl_refs_for_this_field tblname fldname) - ) - else - [] - in - let events_new_val = - if is_valid_ref newval then - let newval = Schema.Value.Unsafe_cast.string newval in - events_of_other_tbl_refs - (List.map - (fun (tbl, _) -> - ( tbl - , newval - , find_get_record tbl ~__context:context ~self:newval - ) - ) - (other_tbl_refs_for_this_field tblname fldname) - ) + (* When a field is written, both the new and old values of the + field may be references to other objects, which have already + been rewritten by the database layer. To follow up, we must + emit events for the previously referenced object, the current + row, and the newly referenced object.*) + let other_tbl_refs = other_tbl_refs_for_this_field tblname fldname in + (* Compute list of potential events. Some snapshots may fail to + be reified because the reference is no longer valid (i.e. it's + dangling). *) + let get_other_ref_events maybe_ref = + if is_valid_ref db maybe_ref then + let self = Schema.Value.Unsafe_cast.string maybe_ref in + let go (other_tbl, _) = + let get_record = find_get_record other_tbl ~__context ~self in + (other_tbl, self, get_record) + in + List.map go other_tbl_refs |> snapshots_of_other_tbl_refs else [] in - (* Generate event *) - let snapshot = find_get_record tblname ~__context:context ~self:objref in - let record = snapshot () in - List.iter - (function - | tbl, ref, None -> - error "Failed to send MOD event for %s %s" tbl ref ; - Printf.printf "Failed to send MOD event for %s %s\n%!" tbl ref - | tbl, ref, Some s -> - events_notify ~snapshot:s tbl "mod" ref - ) - events_old_val ; - ( match record with - | None -> - error "Failed to send MOD event for %s %s" tblname objref ; - Printf.printf "Failed to send MOD event for %s %s\n%!" tblname objref - | Some record -> - events_notify ~snapshot:record tblname "mod" objref - ) ; - List.iter - (function - | tbl, ref, None -> - error "Failed to send MOD event for %s %s" tbl ref ; - Printf.printf "Failed to send MOD event for %s %s\n%!" tbl ref - | tbl, ref, Some s -> - events_notify ~snapshot:s tbl "mod" ref - ) - events_new_val - | PreDelete (tblname, objref) -> ( - match find_get_record tblname ~__context:context ~self:objref () with - | None -> - error "Failed to generate DEL event for %s %s" tblname objref - (* Printf.printf "Failed to generate DEL event for %s %s\n%!" tblname objref; *) - | Some snapshot -> - events_notify ~snapshot tblname "del" objref - ) - | Delete (tblname, _objref, kv) -> - let other_tbl_refs = follow_references tblname in - let other_tbl_refs = - List.fold_left - (fun accu (remote_tbl, fld) -> - let fld_value = List.assoc fld kv in - if is_valid_ref fld_value then - let fld_value = Schema.Value.Unsafe_cast.string fld_value in - ( remote_tbl - , fld_value - , find_get_record remote_tbl ~__context:context ~self:fld_value - ) - :: accu - else - accu - ) - [] other_tbl_refs - in - let other_tbl_ref_events = events_of_other_tbl_refs other_tbl_refs in - List.iter - (function - | tbl, ref, None -> - error "Failed to generate MOD event on %s %s" tbl ref - (* Printf.printf "Failed to generate MOD event on %s %s\n%!" tbl ref; *) - | tbl, ref, Some s -> - events_notify ~snapshot:s tbl "mod" ref - ) - other_tbl_ref_events - | Create (tblname, new_objref, kv) -> - let snapshot = - find_get_record tblname ~__context:context ~self:new_objref - in - let other_tbl_refs = follow_references tblname in - let other_tbl_refs = - List.fold_left - (fun accu (tbl, fld) -> - let fld_value = List.assoc fld kv in - if is_valid_ref fld_value then - let fld_value = Schema.Value.Unsafe_cast.string fld_value in - ( tbl - , fld_value - , find_get_record tbl ~__context:context ~self:fld_value - ) - :: accu - else - accu - ) - [] other_tbl_refs - in - let other_tbl_events = events_of_other_tbl_refs other_tbl_refs in - ( match snapshot () with - | None -> - error "Failed to generate ADD event for %s %s" tblname new_objref - (* Printf.printf "Failed to generate ADD event for %s %s\n%!" tblname new_objref; *) - | Some snapshot -> - events_notify ~snapshot tblname "add" new_objref - ) ; - List.iter - (function - | tbl, ref, None -> - error "Failed to generate MOD event for %s %s" tbl ref - (* Printf.printf "Failed to generate MOD event for %s %s\n%!" tbl ref;*) - | tbl, ref, Some s -> - events_notify ~snapshot:s tbl "mod" ref - ) - other_tbl_events + (* Compute modify events for the old and new field values, if + either value appears to be a reference. *) + let events_old_val = get_other_ref_events oldval in + let events_new_val = get_other_ref_events newval in + (* Emit modify events for records referenced by the old row's value. *) + emit_events ~kind:Modify events_old_val ; + (* Emit a modify event for the current row. *) + let getter = find_get_record tblname ~__context ~self:objref in + let snapshot = getter () in + emit_events ~kind:Modify [(tblname, objref, snapshot)] ; + (* Emit modify events for records referenced by the new row's value. *) + emit_events ~kind:Modify events_new_val + | PreDelete (tblname, objref) -> + (* Emit a deletion event for the deleted row. *) + let getter = find_get_record tblname ~__context ~self:objref in + let snapshot = getter () in + emit_events ~kind:Delete [(tblname, objref, snapshot)] + | Delete (tblname, _objref, kvs) -> + (* Deleting a row requires similar modify events as overwriting a + field's value does. If any of the deleted cells may be a + reference, we must emit modify events for each of the related + objects. *) + compute_other_table_events tblname kvs |> emit_events ~kind:Modify + | Create (tblname, new_objref, kvs) -> + (* Emit an add event for the new object. *) + let getter = find_get_record tblname ~__context ~self:new_objref in + let snapshot = getter () in + emit_events ~kind:Add [(tblname, new_objref, snapshot)] ; + (* Emit modification events for any newly-referenced objects. *) + compute_other_table_events tblname kvs |> emit_events ~kind:Modify let database_callback event db = - let context = Context.make "eventgen" in + let __context = Context.make __MODULE__ in Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> database_callback_inner event db context) - (fun () -> Context.complete_tracing context) + (fun () -> database_callback_inner event db ~__context) + (fun () -> Context.complete_tracing __context) From cc50840312f586de3c3e7158d325a10d60a56ba3 Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 16 Dec 2024 15:13:03 +0000 Subject: [PATCH 15/50] Add eventgen.mli Signed-off-by: Colin James --- ocaml/xapi/eventgen.mli | 25 +++++++++++++++++++++++++ quality-gate.sh | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 ocaml/xapi/eventgen.mli diff --git a/ocaml/xapi/eventgen.mli b/ocaml/xapi/eventgen.mli new file mode 100644 index 00000000000..279a43eded2 --- /dev/null +++ b/ocaml/xapi/eventgen.mli @@ -0,0 +1,25 @@ +(* + * Copyright (C) 2024 Cloud Software Group. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +type get_record = unit -> Rpc.t + +val get_record_table : + (string, __context:Context.t -> self:string -> get_record) Hashtbl.t + +open Xapi_database.Db_cache_types + +val database_callback : update -> Database.t -> unit + +val find_get_record : + string -> __context:Context.t -> self:string -> unit -> Rpc.t option diff --git a/quality-gate.sh b/quality-gate.sh index a7ffefea72b..cfef6614e00 100755 --- a/quality-gate.sh +++ b/quality-gate.sh @@ -25,7 +25,7 @@ verify-cert () { } mli-files () { - N=497 + N=496 # do not count ml files from the tests in ocaml/{tests/quicktest} MLIS=$(git ls-files -- '**/*.mli' | grep -vE "ocaml/tests|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) MLS=$(git ls-files -- '**/*.ml' | grep -vE "ocaml/tests|ocaml/quicktest|ocaml/message-switch/core_test" | xargs -I {} sh -c "echo {} | cut -f 1 -d '.'" \;) From 4f871952d69c486a6b8b2f39830bd9f82a80905d Mon Sep 17 00:00:00 2001 From: Colin James Date: Mon, 16 Dec 2024 15:27:28 +0000 Subject: [PATCH 16/50] Hide that get_records are stored in a Hashtbl Signed-off-by: Colin James --- ocaml/idl/ocaml_backend/gen_db_actions.ml | 3 +-- ocaml/xapi/eventgen.ml | 2 ++ ocaml/xapi/eventgen.mli | 12 ++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ocaml/idl/ocaml_backend/gen_db_actions.ml b/ocaml/idl/ocaml_backend/gen_db_actions.ml index 06f54f228ba..cc9a23857ee 100644 --- a/ocaml/idl/ocaml_backend/gen_db_actions.ml +++ b/ocaml/idl/ocaml_backend/gen_db_actions.ml @@ -411,8 +411,7 @@ let db_action api : O.Module.t = O.Let.make ~name:"_" ~params:[] ~ty:"unit" ~body: [ - Printf.sprintf "Hashtbl.add Eventgen.get_record_table \"%s\"" - obj.DT.name + Printf.sprintf "Eventgen.set_get_record \"%s\"" obj.DT.name ; Printf.sprintf "(fun ~__context ~self -> (fun () -> API.rpc_of_%s_t \ (%s.get_record ~__context ~self:(Ref.of_%sstring self))))" diff --git a/ocaml/xapi/eventgen.ml b/ocaml/xapi/eventgen.ml index b19be7b33e1..dfc04d944b7 100644 --- a/ocaml/xapi/eventgen.ml +++ b/ocaml/xapi/eventgen.ml @@ -19,6 +19,8 @@ let get_record_table : (string, __context:Context.t -> self:string -> get_record) Hashtbl.t = Hashtbl.create 64 +let set_get_record = Hashtbl.replace get_record_table + let find_get_record obj_name ~__context ~self () : Rpc.t option = Option.map (fun f -> f ~__context ~self ()) diff --git a/ocaml/xapi/eventgen.mli b/ocaml/xapi/eventgen.mli index 279a43eded2..18167241ddb 100644 --- a/ocaml/xapi/eventgen.mli +++ b/ocaml/xapi/eventgen.mli @@ -12,14 +12,14 @@ * GNU Lesser General Public License for more details. *) -type get_record = unit -> Rpc.t - -val get_record_table : - (string, __context:Context.t -> self:string -> get_record) Hashtbl.t - open Xapi_database.Db_cache_types -val database_callback : update -> Database.t -> unit +type get_record = unit -> Rpc.t + +val set_get_record : + string -> (__context:Context.t -> self:string -> get_record) -> unit val find_get_record : string -> __context:Context.t -> self:string -> unit -> Rpc.t option + +val database_callback : update -> Database.t -> unit From ed2e5021e65bc6ddf6ab23f06db2e260b8c4c161 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 17 Dec 2024 15:43:52 +0000 Subject: [PATCH 17/50] Document eventgen.mli Signed-off-by: Colin James --- ocaml/xapi/eventgen.mli | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/ocaml/xapi/eventgen.mli b/ocaml/xapi/eventgen.mli index 18167241ddb..adc0d03b9bb 100644 --- a/ocaml/xapi/eventgen.mli +++ b/ocaml/xapi/eventgen.mli @@ -12,14 +12,64 @@ * GNU Lesser General Public License for more details. *) +(** Eventgen is responsible for producing events for the event APIs + ([event.next] and [event.from]) to provide to clients. + + Each update may result in many events, each with a snapshot of the + related object(s), marshalled as [Rpc.t] dictionaries. *) + open Xapi_database.Db_cache_types type get_record = unit -> Rpc.t +(* Type of the thunks used to read a record, marshalled as an [Rpc.t] + dictionary, from the database. + + In practice, such functions arise from functions provided by the + generated [Db_actions] module - which contains the logic to + serialise internal and external views of database records. + + This type is emphasised because values of this type are used as + thunks to delay evaluation, such that the actual fetching of + object snapshots can be done in an orderly way by this module, + when producing events. The closures produced by [Db_actions] + capture the relevant context and reference information to produce + a snapshot on-demand, when invoked. *) val set_get_record : string -> (__context:Context.t -> self:string -> get_record) -> unit +(** [set_get_record table accessor] is used by [Db_actions] to + register a means by which this module can read records from + database, in order to produce snapshots used by the events + mechanism. + + Upon initialisation, [Db_actions] calls [set_get_record] to + register an accessor for each object type stored in the + database. These accessors consist of logic internal to + [Db_actions] which performs all the related reading and + marshalling of values from the database. + + This function should not be called by any module other than + [Db_actions]. *) val find_get_record : string -> __context:Context.t -> self:string -> unit -> Rpc.t option +(** [find_get_record table context reference] yields a partial + function which, when invoked, attempts to read a record snapshot from + the database. Any [table] used must have already been registered + by initialisation code within [Db_actions] (i.e. from a previous + call to [set_get_record]). + + The function returns an option type as a convenience, as the + inherent delaying of the evaluation of snapshots could mean that a + record referred to by [reference] is no longer present in the + database. *) val database_callback : update -> Database.t -> unit +(** [database_call update db] notifies [Xapi_event] (indirectly) of + transitive events arising from a single logical [update] within the + database. + + Many events may follow a single [update] as referential fields, + related by the datamodel schema, may produce changes in related + objects (so, previously and newly referenced objects' snapshots + must be emitted as modification events). *) From d622dd8dd629b77c978c9d24409d5a4e3a6113d6 Mon Sep 17 00:00:00 2001 From: Colin James Date: Tue, 17 Dec 2024 16:16:05 +0000 Subject: [PATCH 18/50] Precompute symmetric closure table To avoid recomputing the symmetric closure several times during module initialisation for Eventgen, we introduce a hashtable that stores the relation. Signed-off-by: Colin James --- ocaml/xapi/eventgen.ml | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/ocaml/xapi/eventgen.ml b/ocaml/xapi/eventgen.ml index dfc04d944b7..753bb8fdf7b 100644 --- a/ocaml/xapi/eventgen.ml +++ b/ocaml/xapi/eventgen.ml @@ -26,6 +26,24 @@ let find_get_record obj_name ~__context ~self () : Rpc.t option = (fun f -> f ~__context ~self ()) (Hashtbl.find_opt get_record_table obj_name) +(* Bidirectional lookup for relations encoded in all_relations. *) +let lookup_object_relation = + (* Precompute the symmetric closure of all_relations and store it as + a hash table. *) + let symmetric_table = + let table = Hashtbl.create 128 in + let relate = Hashtbl.replace table in + let api = Datamodel.all_api in + let close (p, p') = + (* R U= { (p, p'), (p', p) } where p, p' are of the form + (object, field) *) + relate p p' ; relate p' p + in + Dm_api.relations_of_api api |> List.iter close ; + table + in + Hashtbl.find_opt symmetric_table + (* If a record is modified, events must be emitted for related objects' records. We collect a list of related objects by querying the (Ref _)-typed fields of the input object against the relations encoded by the datamodel. @@ -37,16 +55,11 @@ let compute_object_references_to_follow (obj_name : string) = let module DT = Datamodel_types in let api = Datamodel.all_api in let obj = Dm_api.get_obj_by_name api ~objname:obj_name in - let symmetric = - (* Symmetric closure of the field relation set. *) - Dm_api.relations_of_api api - |> List.concat_map (fun (a, b) -> [(a, b); (b, a)]) - in (* Find an object related to the input field using the datamodel. *) let find_related_object = function | DT.{field_name; ty= Ref _; _} -> let this_end = (obj_name, field_name) in - List.assoc_opt this_end symmetric + lookup_object_relation this_end |> Option.map (fun (other_object, _) -> (other_object, field_name)) | _ -> None From d08c6cb8a2581726690b1eb6f78951f9a5abf1a4 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Mon, 9 Dec 2024 18:18:26 +0000 Subject: [PATCH 19/50] CA-402921: Relax VIF constraint for PVS proxy The current constraint is that the VIF used for PVS proxy must have device number 0. It turned out that this can be relaxed. It is sufficient to enforce that the VIF is the one with the lowest device number for the VM. Signed-off-by: Rob Hoes --- ocaml/idl/datamodel_errors.ml | 5 +++++ ocaml/xapi-consts/api_errors.ml | 2 ++ ocaml/xapi/xapi_pvs_proxy.ml | 15 ++++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 80b36218f25..81f5ca9fdf3 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1818,6 +1818,11 @@ let _ = "The address specified is already in use by an existing PVS_server object" () ; + error Api_errors.pvs_vif_must_be_first_device [] + ~doc: + "The VIF used by PVS proxy must be the one with the lowest device number" + () ; + error Api_errors.usb_group_contains_vusb ["vusbs"] ~doc:"The USB group contains active VUSBs and cannot be deleted." () ; error Api_errors.usb_group_contains_pusb ["pusbs"] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 53e9e06176b..0acf87020e3 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1246,6 +1246,8 @@ let pvs_proxy_already_present = add_error "PVS_PROXY_ALREADY_PRESENT" let pvs_server_address_in_use = add_error "PVS_SERVER_ADDRESS_IN_USE" +let pvs_vif_must_be_first_device = add_error "PVS_VIF_MUST_BE_FIRST_DEVICE" + let extension_protocol_failure = add_error "EXTENSION_PROTOCOL_FAILURE" let usb_group_contains_vusb = add_error "USB_GROUP_CONTAINS_VUSB" diff --git a/ocaml/xapi/xapi_pvs_proxy.ml b/ocaml/xapi/xapi_pvs_proxy.ml index 3f81ffc783e..5c3545952a5 100644 --- a/ocaml/xapi/xapi_pvs_proxy.ml +++ b/ocaml/xapi/xapi_pvs_proxy.ml @@ -32,9 +32,18 @@ let create ~__context ~site ~vIF = ) ; Helpers.assert_is_valid_ref ~__context ~name:"site" ~ref:site ; Helpers.assert_is_valid_ref ~__context ~name:"VIF" ~ref:vIF ; - let device = Db.VIF.get_device ~__context ~self:vIF in - if device <> "0" then - raise Api_errors.(Server_error (invalid_device, [device])) ; + let device = Db.VIF.get_device ~__context ~self:vIF |> int_of_string in + let min_device = + let open Xapi_database.Db_filter_types in + let vm = Db.VIF.get_VM ~__context ~self:vIF in + Db.VIF.get_records_where ~__context + ~expr:(Eq (Field "VM", Literal (Ref.string_of vm))) + |> List.fold_left + (fun m (_, {API.vIF_device= d; _}) -> min m (int_of_string d)) + device + in + if device <> min_device then + raise Api_errors.(Server_error (pvs_vif_must_be_first_device, [])) ; let pvs_proxy = Ref.make () in let uuid = Uuidx.(to_string (make ())) in Db.PVS_proxy.create ~__context ~ref:pvs_proxy ~uuid ~site ~vIF From 2f2ee291641bb7e281533eeb0e6905c989bc7c00 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 11 Dec 2024 16:54:52 +0000 Subject: [PATCH 20/50] CA-402921: Update PVS-proxy tests Signed-off-by: Rob Hoes --- ocaml/tests/test_pvs_proxy.ml | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/ocaml/tests/test_pvs_proxy.ml b/ocaml/tests/test_pvs_proxy.ml index 2c102c0ca32..47e41ab3c24 100644 --- a/ocaml/tests/test_pvs_proxy.ml +++ b/ocaml/tests/test_pvs_proxy.ml @@ -34,12 +34,28 @@ let test_create_ok () = "test_create_ok testing get_VIF" vIF (Db.PVS_proxy.get_VIF ~__context ~self:pvs_proxy) -let test_create_invalid_device () = +let test_create_ok_lowest_device_number () = let __context = T.make_test_database () in let site = T.make_pvs_site ~__context () in let vIF = T.make_vif ~__context ~device:"1" () in - Alcotest.check_raises "test_create_invalid_device should raise invalid_device" - Api_errors.(Server_error (invalid_device, ["1"])) + let _vIF' = T.make_vif ~__context ~device:"2" () in + let pvs_proxy = Xapi_pvs_proxy.create ~__context ~site ~vIF in + Alcotest.(check (Alcotest_comparators.ref ())) + "test_create_ok testing get_site" site + (Db.PVS_proxy.get_site ~__context ~self:pvs_proxy) ; + Alcotest.(check (Alcotest_comparators.ref ())) + "test_create_ok testing get_VIF" vIF + (Db.PVS_proxy.get_VIF ~__context ~self:pvs_proxy) + +let test_create_not_lowest_device_number () = + let __context = T.make_test_database () in + let site = T.make_pvs_site ~__context () in + let _vIF' = T.make_vif ~__context ~device:"0" () in + let vIF = T.make_vif ~__context ~device:"1" () in + Alcotest.check_raises + "test_create_not_lowest_device_number should raise \ + pvs_vif_must_be_first_device" + Api_errors.(Server_error (pvs_vif_must_be_first_device, [])) (fun () -> ignore (Xapi_pvs_proxy.create ~__context ~site ~vIF)) let test_create_invalid_site () = @@ -103,7 +119,14 @@ let test = [ ("test_unlicensed", `Quick, test_unlicensed) ; ("test_create_ok", `Quick, test_create_ok) - ; ("test_create_invalid_device", `Quick, test_create_invalid_device) + ; ( "test_create_ok_lowest_device_number" + , `Quick + , test_create_ok_lowest_device_number + ) + ; ( "test_create_not_lowest_device_number" + , `Quick + , test_create_not_lowest_device_number + ) ; ("test_create_invalid_site", `Quick, test_create_invalid_site) ; ("test_create_invalid_vif", `Quick, test_create_invalid_vif) ; ("test_destroy", `Quick, test_destroy) From 14406ba90312c595c2a75e7f07eb25b8dba66892 Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 18 Dec 2024 13:15:06 +0000 Subject: [PATCH 21/50] CA-402921: Restrict VIF.create When creating a new VIF and there is already a VIF with PVS_proxy, check that the new VIF does not have a lower device number than the PVS_proxy VIF. Signed-off-by: Rob Hoes --- ocaml/idl/datamodel_errors.ml | 7 +++++++ ocaml/xapi-consts/api_errors.ml | 3 +++ ocaml/xapi/xapi_vif_helpers.ml | 37 +++++++++++++++++++++++++++++---- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/ocaml/idl/datamodel_errors.ml b/ocaml/idl/datamodel_errors.ml index 81f5ca9fdf3..fed2f830db1 100644 --- a/ocaml/idl/datamodel_errors.ml +++ b/ocaml/idl/datamodel_errors.ml @@ -1823,6 +1823,13 @@ let _ = "The VIF used by PVS proxy must be the one with the lowest device number" () ; + error Api_errors.pvs_proxy_present_on_higher_vif_device ["device"] + ~doc: + "The VM has a VIF, with a higher device number than the new VIF, that \ + uses a PVS proxy. The VIF used by PVS proxy must be the one with the \ + lowest device number." + () ; + error Api_errors.usb_group_contains_vusb ["vusbs"] ~doc:"The USB group contains active VUSBs and cannot be deleted." () ; error Api_errors.usb_group_contains_pusb ["pusbs"] diff --git a/ocaml/xapi-consts/api_errors.ml b/ocaml/xapi-consts/api_errors.ml index 0acf87020e3..54bdd6f6660 100644 --- a/ocaml/xapi-consts/api_errors.ml +++ b/ocaml/xapi-consts/api_errors.ml @@ -1248,6 +1248,9 @@ let pvs_server_address_in_use = add_error "PVS_SERVER_ADDRESS_IN_USE" let pvs_vif_must_be_first_device = add_error "PVS_VIF_MUST_BE_FIRST_DEVICE" +let pvs_proxy_present_on_higher_vif_device = + add_error "PVS_PROXY_PRESENT_ON_HIGHER_VIF_DEVICE" + let extension_protocol_failure = add_error "EXTENSION_PROTOCOL_FAILURE" let usb_group_contains_vusb = add_error "USB_GROUP_CONTAINS_VUSB" diff --git a/ocaml/xapi/xapi_vif_helpers.ml b/ocaml/xapi/xapi_vif_helpers.ml index 4a469b84368..da6ede482fa 100644 --- a/ocaml/xapi/xapi_vif_helpers.ml +++ b/ocaml/xapi/xapi_vif_helpers.ml @@ -287,13 +287,42 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config ) ; (* Check to make sure the device is unique *) Xapi_stdext_threads.Threadext.Mutex.execute m (fun () -> - let all = Db.VM.get_VIFs ~__context ~self:vM in - let all_devices = - List.map (fun self -> Db.VIF.get_device ~__context ~self) all + let all_vifs_with_devices = + Db.VM.get_VIFs ~__context ~self:vM + |> List.map (fun self -> + (self, int_of_string (Db.VIF.get_device ~__context ~self)) + ) in - if List.mem device all_devices then + let new_device = int_of_string device in + if List.exists (fun (_, d) -> d = new_device) all_vifs_with_devices then raise (Api_errors.Server_error (Api_errors.device_already_exists, [device])) ; + + (* If the VM uses a PVS_proxy, then the proxy _must_ be associated with + the VIF that has the lowest device number. Check that the new VIF + respects this. *) + ( match all_vifs_with_devices with + | [] -> + () + | hd :: tl -> + let min_vif, min_device = + List.fold_left + (fun ((_, d) as v) ((_, d') as v') -> if d' < d then v' else v) + hd tl + in + let vm_has_pvs_proxy = + Pvs_proxy_control.find_proxy_for_vif ~__context ~vif:min_vif <> None + in + if vm_has_pvs_proxy && new_device < min_device then + raise + Api_errors.( + Server_error + ( pvs_proxy_present_on_higher_vif_device + , [Printf.sprintf "%d" min_device] + ) + ) + ) ; + let metrics = Ref.make () and metrics_uuid = Uuidx.to_string (Uuidx.make ()) in Db.VIF_metrics.create ~__context ~ref:metrics ~uuid:metrics_uuid From 819279248403e034f8e9a9e5778e852d9c5bb58b Mon Sep 17 00:00:00 2001 From: Rob Hoes Date: Wed, 18 Dec 2024 13:16:07 +0000 Subject: [PATCH 22/50] CA-402921: Add some unit tests for Xapi_vif_helpers Signed-off-by: Rob Hoes --- ocaml/tests/suite_alcotest.ml | 1 + ocaml/tests/test_vif_helpers.ml | 90 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 ocaml/tests/test_vif_helpers.ml diff --git a/ocaml/tests/suite_alcotest.ml b/ocaml/tests/suite_alcotest.ml index c2e422c2379..7c425e39639 100644 --- a/ocaml/tests/suite_alcotest.ml +++ b/ocaml/tests/suite_alcotest.ml @@ -37,6 +37,7 @@ let () = ; ("Test_pvs_site", Test_pvs_site.test) ; ("Test_pvs_proxy", Test_pvs_proxy.test) ; ("Test_pvs_server", Test_pvs_server.test) + ; ("Test_vif_helpers", Test_vif_helpers.test) ; ("Test_vm_memory_constraints", Test_vm_memory_constraints.test) ; ("Test_xapi_xenops", Test_xapi_xenops.test) ; ("Test_network_event_loop", Test_network_event_loop.test) diff --git a/ocaml/tests/test_vif_helpers.ml b/ocaml/tests/test_vif_helpers.ml new file mode 100644 index 00000000000..2e92a3d7f01 --- /dev/null +++ b/ocaml/tests/test_vif_helpers.ml @@ -0,0 +1,90 @@ +(* + * Copyright (C) Citrix Systems Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published + * by the Free Software Foundation; version 2.1 only. with the special + * exception on linking described in file LICENSE. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + *) + +module T = Test_common + +(* Function under test *) +let create ~__context ~device ~network ~vM ?(mAC = "00:00:00:00:00:00") + ?(mTU = 1500L) ?(qos_algorithm_type = "") ?(qos_algorithm_params = []) + ?(currently_attached = false) ?(other_config = []) + ?(locking_mode = `unlocked) ?(ipv4_allowed = []) ?(ipv6_allowed = []) + ?(ipv4_configuration_mode = `None) ?(ipv4_addresses = []) + ?(ipv4_gateway = "") ?(ipv6_configuration_mode = `None) + ?(ipv6_addresses = []) ?(ipv6_gateway = "") () = + Xapi_vif_helpers.create ~__context ~device ~network ~vM ~mAC ~mTU + ~other_config ~qos_algorithm_type ~qos_algorithm_params ~currently_attached + ~locking_mode ~ipv4_allowed ~ipv6_allowed ~ipv4_configuration_mode + ~ipv4_addresses ~ipv4_gateway ~ipv6_configuration_mode ~ipv6_addresses + ~ipv6_gateway + +let test_create_ok () = + let __context = T.make_test_database () in + let vM = T.make_vm ~__context () in + let network = T.make_network ~__context () in + let vif = create ~__context ~device:"0" ~network ~vM () in + Alcotest.(check (Alcotest_comparators.ref ())) + "test_create_ok testing get_VM" vM + (Db.VIF.get_VM ~__context ~self:vif) + +let test_create_duplicate_device () = + let __context = T.make_test_database () in + let vM = T.make_vm ~__context () in + let network = T.make_network ~__context () in + let _vif0 = T.make_vif ~__context ~device:"0" ~vM ~network () in + Alcotest.check_raises + "test_create_duplicate_device should raise device_already_exists" + Api_errors.(Server_error (device_already_exists, ["0"])) + @@ fun () -> + let _ = create ~__context ~device:"0" ~network ~vM () in + () + +let test_create_with_pvs_proxy_ok () = + let __context = T.make_test_database () in + let vM = T.make_vm ~__context () in + let network = T.make_network ~__context () in + let vIF = T.make_vif ~__context ~device:"0" ~vM ~network () in + let _vIF2 = T.make_vif ~__context ~device:"2" ~vM ~network () in + let site = T.make_pvs_site ~__context () in + let _pvs_proxy = T.make_pvs_proxy ~__context ~site ~vIF () in + let vif1 = create ~__context ~device:"1" ~network ~vM () in + Alcotest.(check (Alcotest_comparators.ref ())) + "test_create_with_pvs_proxy_ok testing get_VM" vM + (Db.VIF.get_VM ~__context ~self:vif1) + +let test_create_with_pvs_proxy_not_ok () = + let __context = T.make_test_database () in + let vM = T.make_vm ~__context () in + let network = T.make_network ~__context () in + let vIF = T.make_vif ~__context ~device:"1" ~vM ~network () in + let _vIF2 = T.make_vif ~__context ~device:"2" ~vM ~network () in + let site = T.make_pvs_site ~__context () in + let _pvs_proxy = T.make_pvs_proxy ~__context ~site ~vIF () in + Alcotest.check_raises + "test_create_with_pvs_proxy_not_ok should raise \ + pvs_proxy_present_on_higher_vif_device" + Api_errors.(Server_error (pvs_proxy_present_on_higher_vif_device, ["1"])) + @@ fun () -> + let _ = create ~__context ~device:"0" ~network ~vM () in + () + +let test = + [ + ("test_create_ok", `Quick, test_create_ok) + ; ("test_create_duplicate_device", `Quick, test_create_duplicate_device) + ; ("test_create_with_pvs_proxy_ok", `Quick, test_create_with_pvs_proxy_ok) + ; ( "test_create_with_pvs_proxy_not_ok" + , `Quick + , test_create_with_pvs_proxy_not_ok + ) + ] From e8bde26fe9f9fd35a110723b47c425bae28bca20 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 18 Dec 2024 11:21:56 +0000 Subject: [PATCH 23/50] CA-403422: lengthen the timeout for xenopsd's serialized tasks Historically parallel operations were run in tasks as part of serial operations, and serial tasks were not run as part of parallel ones. This changed recently, causing some timeouts that did not happen before. To mitigate this issue, now the timeouts for tasks are 20 minutes per single serialized operation, instead of 20 minutes per task. Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index f4c784faa11..e3f5c48f890 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -278,6 +278,15 @@ let rec name_of_atomic = function | Best_effort atomic -> Printf.sprintf "Best_effort (%s)" (name_of_atomic atomic) +let rec atomic_expires_after = function + | Serial (_, _, ops) -> + List.map atomic_expires_after ops |> List.fold_left ( +. ) 0. + | Parallel (_, _, ops) -> + List.map atomic_expires_after ops |> List.fold_left Float.max 0. + | _ -> + (* 20 minutes, in seconds *) + 1200. + type vm_migrate_op = { vmm_id: Vm.id ; vmm_vdi_map: (string * string) list @@ -2341,16 +2350,17 @@ and queue_atomics_and_wait ~progress_callback ~max_parallel_atoms dbg id ops = let atom_id = Printf.sprintf "%s.chunk=%d.atom=%d" id chunk_idx atom_idx in - queue_atomic_int ~progress_callback dbg atom_id op + (queue_atomic_int ~progress_callback dbg atom_id op, op) ) ops in let timeout_start = Unix.gettimeofday () in List.map - (fun task -> + (fun (task, op) -> let task_id = Xenops_task.id_of_handle task in + let expiration = atomic_expires_after op in let completion = - event_wait updates task ~from ~timeout_start 1200.0 + event_wait updates task ~from ~timeout_start expiration (is_task task_id) task_ended in (task_id, task, completion) From a899232032e5b181582bac1293681aa003923525 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 19 Dec 2024 10:49:35 +0000 Subject: [PATCH 24/50] xenopsd: remove unused subtask parameter Signed-off-by: Pau Ruiz Safont --- ocaml/xenopsd/lib/xenops_server.ml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/ocaml/xenopsd/lib/xenops_server.ml b/ocaml/xenopsd/lib/xenops_server.ml index e3f5c48f890..e3f0a77f5e8 100644 --- a/ocaml/xenopsd/lib/xenops_server.ml +++ b/ocaml/xenopsd/lib/xenops_server.ml @@ -1857,7 +1857,7 @@ let with_tracing ~name ~task f = warn "Failed to start tracing: %s" (Printexc.to_string e) ; f () -let rec perform_atomic ~progress_callback ?subtask:_ ?result (op : atomic) +let rec perform_atomic ~progress_callback ?result (op : atomic) (t : Xenops_task.task_handle) : unit = let module B = (val get_backend () : S) in with_tracing ~name:(name_of_atomic op) ~task:t @@ fun () -> @@ -2396,7 +2396,7 @@ let perform_atomics atomics t = progress_callback progress (weight /. total_weight) t in debug "Performing: %s" (string_of_atomic x) ; - perform_atomic ~subtask:(string_of_atomic x) ~progress_callback x t ; + perform_atomic ~progress_callback x t ; progress_callback 1. ; progress +. (weight /. total_weight) ) @@ -2530,8 +2530,7 @@ and trigger_cleanup_after_failure_atom op t = | VM_import_metadata _ -> () -and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle) - : unit = +and perform_exn ?result (op : operation) (t : Xenops_task.task_handle) : unit = let module B = (val get_backend () : S) in with_tracing ~name:(name_of_operation op) ~task:t @@ fun () -> match op with @@ -2658,9 +2657,7 @@ and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle) (id, vm.Vm.memory_dynamic_min, vm.Vm.memory_dynamic_min) in let (_ : unit) = - perform_atomic ~subtask:(string_of_atomic atomic) - ~progress_callback:(fun _ -> ()) - atomic t + perform_atomic ~progress_callback:(fun _ -> ()) atomic t in (* Waiting here is not essential but adds a degree of safety and reducess unnecessary memory copying. *) @@ -3172,7 +3169,7 @@ and perform_exn ?subtask ?result (op : operation) (t : Xenops_task.task_handle) VUSB_DB.signal id | Atomic op -> let progress_callback = progress_callback 0. 1. t in - perform_atomic ~progress_callback ?subtask ?result op t + perform_atomic ~progress_callback ?result op t and verify_power_state op = let module B = (val get_backend () : S) in @@ -3201,7 +3198,7 @@ and perform ?subtask ?result (op : operation) (t : Xenops_task.task_handle) : unit = let one op = verify_power_state op ; - try perform_exn ?subtask ?result op t + try perform_exn ?result op t with e -> Backtrace.is_important e ; info "Caught %s executing %s: triggering cleanup actions" From ac2255bdc44157af456a4f944e2c653cf715160c Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Thu, 19 Dec 2024 14:55:06 +0000 Subject: [PATCH 25/50] XSI-1773 improve logging if service file unexpectedly exists We have seen failures where a service file unexpectedly exists. It could have been left behind but a failed stop but we don't have evidence for that. To help with this, provide more details of the file found. Signed-off-by: Christian Lindig --- ocaml/forkexecd/lib/dune | 1 + ocaml/forkexecd/lib/fe_systemctl.ml | 24 +++++++++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/ocaml/forkexecd/lib/dune b/ocaml/forkexecd/lib/dune index 749f173b977..e8dd8c8312e 100644 --- a/ocaml/forkexecd/lib/dune +++ b/ocaml/forkexecd/lib/dune @@ -13,6 +13,7 @@ xapi-log xapi-stdext-pervasives xapi-stdext-unix + xapi-stdext-date xapi-tracing ) (preprocess (per_module ((pps ppx_deriving_rpc) Fe)))) diff --git a/ocaml/forkexecd/lib/fe_systemctl.ml b/ocaml/forkexecd/lib/fe_systemctl.ml index b36ee6674ae..046396002ca 100644 --- a/ocaml/forkexecd/lib/fe_systemctl.ml +++ b/ocaml/forkexecd/lib/fe_systemctl.ml @@ -130,14 +130,28 @@ let is_active ~service = in Unix.WEXITED 0 = status -let exists ~service = - Sys.file_exists (Filename.concat run_path (service ^ ".service")) +(** path to service file *) +let path service = Filename.concat run_path (service ^ ".service") + +(** does [service] file exist *) +let exists ~service = Sys.file_exists (path service) + +(** creation time of [path] as a string *) +let ctime path = + let ctime = Unix.((stat path).st_ctime) in + Xapi_stdext_date.Date.(of_unix_time ctime |> to_rfc3339) let start_transient ?env ?properties ?(exec_ty = Type.Simple) ~service cmd args = - if exists ~service then - (* this can only happen if there is a bug in the caller *) - invalid_arg (Printf.sprintf "Tried to start %s twice" service) ; + ( match exists ~service with + | true -> + (* this can only happen if there is a bug in the caller *) + let path = path service in + let invalid fmt = Printf.ksprintf invalid_arg fmt in + invalid "Tried to start %s twice: %s exists (%s)" service path (ctime path) + | false -> + () + ) ; try start_transient ?env ?properties ~exec_ty ~service cmd args with e -> Backtrace.is_important e ; From 3d394e0e4cbf317cbe126c25aed19732008fbf8b Mon Sep 17 00:00:00 2001 From: Christian Lindig Date: Fri, 20 Dec 2024 11:31:11 +0000 Subject: [PATCH 26/50] XSI-1773 clean up swtpm service files We have seen swtpm systemd service files not being removed. We now call Fe_systemctl.stop even when the servive is potentially not running to ensure clean up is happening regardless. Signed-off-by: Christian Lindig --- ocaml/xenopsd/xc/service.ml | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/ocaml/xenopsd/xc/service.ml b/ocaml/xenopsd/xc/service.ml index 0fabf791888..98c942d13a9 100644 --- a/ocaml/xenopsd/xc/service.ml +++ b/ocaml/xenopsd/xc/service.ml @@ -608,26 +608,17 @@ module SystemdDaemonMgmt (D : DAEMONPIDPATH) = struct else None - let is_running ~xs domid = - match of_domid domid with - | None -> - Compat.is_running ~xs domid - | Some key -> - Fe_systemctl.is_active ~service:key - let stop ~xs domid = - match (of_domid domid, is_running ~xs domid) with - | None, true -> + match of_domid domid with + | None when Compat.is_running ~xs domid -> Compat.stop ~xs domid - | Some service, true -> - (* xenstore cleanup is done by systemd unit file *) - let (_ : Fe_systemctl.status) = Fe_systemctl.stop ~service in - () - | Some service, false -> - info "Not trying to stop %s since it's not running" service - | None, false -> + | None -> info "Not trying to stop %s for domid %i since it's not running" D.name domid + | Some service -> + (* call even when not running for clean up *) + let (_ : Fe_systemctl.status) = Fe_systemctl.stop ~service in + () let start_daemon ~path ~args ~domid () = debug "Starting daemon: %s with args [%s]" path (String.concat "; " args) ; From 4394f84bc43484b79c97b9d3ff38a81b506504f6 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 23 Dec 2024 10:50:31 +0000 Subject: [PATCH 27/50] CA-404020: Do not fail when removing a non-existing datasource The latest changes in the metrics daemon changes how archived and live metrics are synchronised, and archived sources are created less often. This meant that on some cases, like SR.forget, the operations failed because they could query for existing sources, but the call to remove them failed, because the metrics only exist in live form, not archived one. (what happens with the live one, do they disappear when the SR is forgotten?) Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-rrd/lib/rrd.ml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index c9d646345cd..744544693a8 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -632,13 +632,14 @@ let rrd_add_ds rrd timestamp newds = else rrd_add_ds_unsafe rrd timestamp newds -(** Remove the named DS from an RRD. Removes all of the data associated with it, too *) +(** Remove the named DS from an RRD. Removes all of the data associated with + it, too. THe function is idempotent. *) let rrd_remove_ds rrd ds_name = let n = Utils.array_index ds_name (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) in if n = -1 then - raise (Invalid_data_source ds_name) + rrd else { rrd with From 74fe42714afdad7a1c7e511edddd948ca763cb78 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 23 Dec 2024 11:34:01 +0000 Subject: [PATCH 28/50] rrd/lib: remove outdated functions from utils Also removes some traversals when reading from XML data Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-rrd/lib/rrd.ml | 105 +++++++++++++-------------- ocaml/libs/xapi-rrd/lib/rrd_utils.ml | 25 +------ 2 files changed, 53 insertions(+), 77 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index 744544693a8..d68de77909a 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -635,26 +635,24 @@ let rrd_add_ds rrd timestamp newds = (** Remove the named DS from an RRD. Removes all of the data associated with it, too. THe function is idempotent. *) let rrd_remove_ds rrd ds_name = - let n = - Utils.array_index ds_name (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) - in - if n = -1 then - rrd - else - { - rrd with - rrd_dss= Utils.array_remove n rrd.rrd_dss - ; rrd_rras= - Array.map - (fun rra -> - { - rra with - rra_data= Utils.array_remove n rra.rra_data - ; rra_cdps= Utils.array_remove n rra.rra_cdps - } - ) - rrd.rrd_rras - } + match Utils.find_index (fun ds -> ds.ds_name = ds_name) rrd.rrd_dss with + | None -> + rrd + | Some n -> + { + rrd with + rrd_dss= Utils.array_remove n rrd.rrd_dss + ; rrd_rras= + Array.map + (fun rra -> + { + rra with + rra_data= Utils.array_remove n rra.rra_data + ; rra_cdps= Utils.array_remove n rra.rra_cdps + } + ) + rrd.rrd_rras + } (** Find the RRA with a particular CF that contains a particular start time, and also has a minimum pdp_cnt. If it can't find an @@ -699,18 +697,17 @@ let find_best_rras rrd pdp_interval cf start = List.filter (contains_time newstarttime) rras let query_named_ds rrd as_of_time ds_name cf = - let n = - Utils.array_index ds_name (Array.map (fun ds -> ds.ds_name) rrd.rrd_dss) - in - if n = -1 then - raise (Invalid_data_source ds_name) - else - let rras = find_best_rras rrd 0 (Some cf) (Int64.of_float as_of_time) in - match rras with - | [] -> - raise No_RRA_Available - | rra :: _ -> - Fring.peek rra.rra_data.(n) 0 + match Utils.find_index (fun ds -> ds.ds_name = ds_name) rrd.rrd_dss with + | None -> + raise (Invalid_data_source ds_name) + | Some n -> ( + let rras = find_best_rras rrd 0 (Some cf) (Int64.of_float as_of_time) in + match rras with + | [] -> + raise No_RRA_Available + | rra :: _ -> + Fring.peek rra.rra_data.(n) 0 + ) (******************************************************************************) (* Marshalling/Unmarshalling functions *) @@ -877,30 +874,26 @@ let from_xml input = (* Purge any repeated data sources from the RRD *) let ds_names = ds_names rrd in - let ds_names_set = Utils.setify ds_names in - let ds_name_counts = - List.map - (fun name -> - let x, _ = List.partition (( = ) name) ds_names in - (name, List.length x) - ) - ds_names_set - in - let removals_required = - List.filter (fun (_, x) -> x > 1) ds_name_counts - in - List.fold_left - (fun rrd (name, n) -> - (* Remove n-1 lots of this data source *) - let rec inner rrd n = - if n = 1 then - rrd - else - inner (rrd_remove_ds rrd name) (n - 1) - in - inner rrd n - ) - rrd removals_required + List.sort_uniq String.compare ds_names + |> List.filter_map (fun name -> + match List.filter (String.equal name) ds_names with + | [] | [_] -> + None + | x -> + Some (name, List.length x) + ) + |> List.fold_left + (fun rrd (name, n) -> + (* Remove n-1 lots of this data source *) + let rec inner rrd n = + if n = 1 then + rrd + else + inner (rrd_remove_ds rrd name) (n - 1) + in + inner rrd n + ) + rrd ) input diff --git a/ocaml/libs/xapi-rrd/lib/rrd_utils.ml b/ocaml/libs/xapi-rrd/lib/rrd_utils.ml index c0863d0175f..aa959a042dd 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd_utils.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd_utils.ml @@ -47,13 +47,13 @@ end let isnan x = match classify_float x with FP_nan -> true | _ -> false -let array_index e a = +let find_index f a = let len = Array.length a in let rec check i = if len <= i then - -1 - else if a.(i) = e then - i + None + else if f a.(i) then + Some i else check (i + 1) in @@ -62,23 +62,6 @@ let array_index e a = let array_remove n a = Array.append (Array.sub a 0 n) (Array.sub a (n + 1) (Array.length a - n - 1)) -let filter_map f list = - let rec inner acc l = - match l with - | [] -> - List.rev acc - | x :: xs -> - let acc = match f x with Some res -> res :: acc | None -> acc in - inner acc xs - in - inner [] list - -let rec setify = function - | [] -> - [] - | x :: xs -> - if List.mem x xs then setify xs else x :: setify xs - (** C# and JS representation of special floats are 'NaN' and 'Infinity' which are different from ocaml's native representation. Caml is fortunately more forgiving when doing a float_of_string, and can cope with these forms, so From e4e20ad93fcebfcde5f6fb32a471a87661ebb4e1 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Mon, 23 Dec 2024 15:50:23 +0000 Subject: [PATCH 29/50] rrdd: add more comments about its datastructures Signed-off-by: Pau Ruiz Safont --- ocaml/libs/xapi-rrd/lib/rrd.ml | 6 +++--- ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml | 10 +++++----- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 2 +- ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml | 9 ++++++--- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/ocaml/libs/xapi-rrd/lib/rrd.ml b/ocaml/libs/xapi-rrd/lib/rrd.ml index d68de77909a..75610964fc1 100644 --- a/ocaml/libs/xapi-rrd/lib/rrd.ml +++ b/ocaml/libs/xapi-rrd/lib/rrd.ml @@ -621,9 +621,9 @@ let rrd_add_ds_unsafe rrd timestamp newds = rrd.rrd_rras } -(** Add in a new DS into a pre-existing RRD. Preserves data of all the other archives - and fills the new one full of NaNs. Note that this doesn't fill in the CDP values - correctly at the moment! +(** Add in a new DS into a pre-existing RRD. Preserves data of all the other + archives and fills the new one full of NaNs. Note that this doesn't fill + in the CDP values correctly at the moment! *) let rrd_add_ds rrd timestamp newds = diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml index c46a33d6f96..3decd260673 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml @@ -51,10 +51,10 @@ let merge_new_dss rrdi dss = !Rrdd_shared.enable_all_dss || ds.ds_default in let default_dss = StringMap.filter should_enable_ds dss in - (* NOTE: It's enough to check if all the default datasources have been added - to the RRD_INFO, because if a non-default one has been enabled at runtime, - it's added to the RRD immediately and we don't need to bother *) - let new_dss = + (* NOTE: Only add enabled dss to the live rrd, ignoring non-default ones. + This is because non-default ones are added to the RRD when they are + enabled. *) + let new_enabled_dss = StringMap.filter (fun ds_name _ -> not (StringMap.mem ds_name rrdi.dss)) default_dss @@ -73,7 +73,7 @@ let merge_new_dss rrdi dss = rrd_add_ds_unsafe rrd timestamp (Rrd.ds_create ds.ds_name ds.Ds.ds_type ~mrhb:300.0 Rrd.VT_Unknown) ) - new_dss rrdi.rrd + new_enabled_dss rrdi.rrd ) module OwnerMap = Map.Make (struct diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index f8f3c99bf8b..d9d41114e00 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -347,7 +347,7 @@ let send_host_rrd_to_master master_address = let fail_missing name = raise (Rrdd_error (Datasource_missing name)) (** {add_ds rrdi ds_name} creates a new time series (rrd) in {rrdi} with the - name {ds_name}. The operation fails if rrdi does not contain any live + name {ds_name}. The operation fails if rrdi does not contain any datasource with the name {ds_name} *) let add_ds ~rrdi ~ds_name = match Rrd.StringMap.find_opt ds_name rrdi.dss with diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml index 08807e39b74..883f9844cb5 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml @@ -77,10 +77,13 @@ let mutex = Mutex.create () type rrd_info = { rrd: Rrd.rrd + (** Contains the live metrics, i.e. The datasources that are enabled + and being collected .*) ; mutable dss: (float * Ds.ds) Rrd.StringMap.t - (* Important: this must contain the entire list of datasources associated - with the RRD, even the ones disabled by default, as rrd_add_ds calls - can enable DSs at runtime *) + (** Important: this must contain the entire list of datasources + associated with the RRD, even the ones disabled by default, because + functions like rrd_add_ds or rrd_remove_ds expect disabled + datasources to be present. e.g. to enable DSs at runtime *) ; mutable domid: int } From 0e8cc2074f1fa0f39bc289cd0a3f54e0532121b8 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Fri, 27 Dec 2024 14:24:51 +0800 Subject: [PATCH 30/50] CA-404062: Wrongly restart xapi when receiving HTTP errors The xapi on a supporter host would restart when it received HTTP error from the xapi on the coordinator host. This breaks the pool.designate_new_master use case for a big pool, e.g. 64-host pool. In this case, some supporters may restart unexpectedly within the phase of committing new coordinator due to the logic above. Additionally, the purpose of this logic, explained by the error message, is not correct also. Not all HTTP errors are caused by "our master address is wrong". On the other hand, if a use case requires to restart the xapi, an more explicit logic should ensure that, instead of leveraging an implicit HTTP error code. Furhtermore, if a supporter indeed is connecting to a wrong coordinator, this should be a bug and can be recovered manually. Based on above arguments, the restarting xapi after receiving HTTP error is removed. This follows the TODO concluded in CA-36936 as well. Signed-off-by: Ming Lu --- ocaml/database/master_connection.ml | 115 ++++++++++++++-------------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/ocaml/database/master_connection.ml b/ocaml/database/master_connection.ml index e10658d48c0..89247488820 100644 --- a/ocaml/database/master_connection.ml +++ b/ocaml/database/master_connection.ml @@ -204,6 +204,7 @@ let connection_timeout = ref !Db_globs.master_connection_default_timeout are exceeded *) let restart_on_connection_timeout = ref true + exception Content_length_required let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : @@ -221,6 +222,59 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : else if !backoff_delay > 256.0 then backoff_delay := 256.0 in + let reconnect () = + (* RPC failed - there's no way we can recover from this so try reopening connection every 2s + backoff delay *) + ( match !my_connection with + | None -> + () + | Some st_proc -> ( + my_connection := None ; + (* don't want to try closing multiple times *) + try Stunnel.disconnect st_proc with _ -> () + ) + ) ; + let time_sofar = Unix.gettimeofday () -. time_call_started in + if !connection_timeout < 0. then ( + if not !surpress_no_timeout_logs then ( + debug + "Connection to master died. I will continue to retry \ + indefinitely (supressing future logging of this message)." ; + error + "Connection to master died. I will continue to retry \ + indefinitely (supressing future logging of this message)." + ) ; + surpress_no_timeout_logs := true + ) else + debug + "Connection to master died: time taken so far in this call '%f'; \ + will %s" + time_sofar + ( if !connection_timeout < 0. then + "never timeout" + else + Printf.sprintf "timeout after '%f'" !connection_timeout + ) ; + if time_sofar > !connection_timeout && !connection_timeout >= 0. then + if !restart_on_connection_timeout then ( + debug + "Exceeded timeout for retrying master connection: restarting xapi" ; + !Db_globs.restart_fn () + ) else ( + debug + "Exceeded timeout for retrying master connection: raising \ + Cannot_connect_to_master" ; + raise Cannot_connect_to_master + ) ; + debug "Sleeping %f seconds before retrying master connection..." + !backoff_delay ; + let timed_out = Scheduler.PipeDelay.wait delay !backoff_delay in + if not timed_out then + debug "%s: Sleep interrupted, retrying master connection now" + __FUNCTION__ ; + update_backoff_delay () ; + D.log_and_ignore_exn open_secure_connection + in + while not !write_ok do try let req_string = req in @@ -266,67 +320,12 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : Db_globs.http_limit_max_rpc_size ; debug "Re-raising exception to caller." ; raise Http.Client_requested_size_over_limit - (* TODO: This http exception handler caused CA-36936 and can probably be removed now that there's backoff delay in the generic handler _ below *) | Http_client.Http_error (http_code, err_msg) -> - error - "Received HTTP error %s (%s) from master. This suggests our master \ - address is wrong. Sleeping for %.0fs and then executing restart_fn." - http_code err_msg - !Db_globs.permanent_master_failure_retry_interval ; - Thread.delay !Db_globs.permanent_master_failure_retry_interval ; - !Db_globs.restart_fn () + error "Received HTTP error %s (%s) from the coordinator" http_code err_msg ; + reconnect () | e -> error "Caught %s" (Printexc.to_string e) ; - (* RPC failed - there's no way we can recover from this so try reopening connection every 2s + backoff delay *) - ( match !my_connection with - | None -> - () - | Some st_proc -> ( - my_connection := None ; - (* don't want to try closing multiple times *) - try Stunnel.disconnect st_proc with _ -> () - ) - ) ; - let time_sofar = Unix.gettimeofday () -. time_call_started in - if !connection_timeout < 0. then ( - if not !surpress_no_timeout_logs then ( - debug - "Connection to master died. I will continue to retry \ - indefinitely (supressing future logging of this message)." ; - error - "Connection to master died. I will continue to retry \ - indefinitely (supressing future logging of this message)." - ) ; - surpress_no_timeout_logs := true - ) else - debug - "Connection to master died: time taken so far in this call '%f'; \ - will %s" - time_sofar - ( if !connection_timeout < 0. then - "never timeout" - else - Printf.sprintf "timeout after '%f'" !connection_timeout - ) ; - if time_sofar > !connection_timeout && !connection_timeout >= 0. then - if !restart_on_connection_timeout then ( - debug - "Exceeded timeout for retrying master connection: restarting xapi" ; - !Db_globs.restart_fn () - ) else ( - debug - "Exceeded timeout for retrying master connection: raising \ - Cannot_connect_to_master" ; - raise Cannot_connect_to_master - ) ; - debug "Sleeping %f seconds before retrying master connection..." - !backoff_delay ; - let timed_out = Scheduler.PipeDelay.wait delay !backoff_delay in - if not timed_out then - debug "%s: Sleep interrupted, retrying master connection now" - __FUNCTION__ ; - update_backoff_delay () ; - D.log_and_ignore_exn open_secure_connection + reconnect () done ; !result From 9a44916385b7e787d553210f5b90e4d684b860fc Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 2 Jan 2025 05:32:59 +0000 Subject: [PATCH 31/50] CA-404062: Reformat Signed-off-by: Ming Lu --- ocaml/database/master_connection.ml | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ocaml/database/master_connection.ml b/ocaml/database/master_connection.ml index 89247488820..ed9bfbd2826 100644 --- a/ocaml/database/master_connection.ml +++ b/ocaml/database/master_connection.ml @@ -204,7 +204,6 @@ let connection_timeout = ref !Db_globs.master_connection_default_timeout are exceeded *) let restart_on_connection_timeout = ref true - exception Content_length_required let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : @@ -237,17 +236,17 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : if !connection_timeout < 0. then ( if not !surpress_no_timeout_logs then ( debug - "Connection to master died. I will continue to retry \ - indefinitely (supressing future logging of this message)." ; + "Connection to master died. I will continue to retry indefinitely \ + (supressing future logging of this message)." ; error - "Connection to master died. I will continue to retry \ - indefinitely (supressing future logging of this message)." + "Connection to master died. I will continue to retry indefinitely \ + (supressing future logging of this message)." ) ; surpress_no_timeout_logs := true ) else debug - "Connection to master died: time taken so far in this call '%f'; \ - will %s" + "Connection to master died: time taken so far in this call '%f'; will \ + %s" time_sofar ( if !connection_timeout < 0. then "never timeout" @@ -256,8 +255,7 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : ) ; if time_sofar > !connection_timeout && !connection_timeout >= 0. then if !restart_on_connection_timeout then ( - debug - "Exceeded timeout for retrying master connection: restarting xapi" ; + debug "Exceeded timeout for retrying master connection: restarting xapi" ; !Db_globs.restart_fn () ) else ( debug @@ -269,8 +267,7 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : !backoff_delay ; let timed_out = Scheduler.PipeDelay.wait delay !backoff_delay in if not timed_out then - debug "%s: Sleep interrupted, retrying master connection now" - __FUNCTION__ ; + debug "%s: Sleep interrupted, retrying master connection now" __FUNCTION__ ; update_backoff_delay () ; D.log_and_ignore_exn open_secure_connection in @@ -321,7 +318,8 @@ let do_db_xml_rpc_persistent_with_reopen ~host:_ ~path (req : string) : debug "Re-raising exception to caller." ; raise Http.Client_requested_size_over_limit | Http_client.Http_error (http_code, err_msg) -> - error "Received HTTP error %s (%s) from the coordinator" http_code err_msg ; + error "Received HTTP error %s (%s) from the coordinator" http_code + err_msg ; reconnect () | e -> error "Caught %s" (Printexc.to_string e) ; From 2fafeb7724fdf88705d22093deab50ad8f86f15c Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Tue, 31 Dec 2024 08:39:41 +0000 Subject: [PATCH 32/50] CP-51895: Drop FCoE support when fcoe_driver does not exists FCoE support will be removed from XS9 and dom0 will no longer provide fcoe_driver. However, XS8 still actively support it. Xapi checks the existence of fcoe_driver, and thinks all PIFs no longer support FCoE if fcoe_driver not found Signed-off-by: Lin Liu --- ocaml/networkd/lib/network_utils.ml | 17 +++++--- ocaml/xapi/xapi_globs.ml | 11 +++--- ocaml/xapi/xapi_pif.ml | 60 ++++++++++++++++------------- 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 4a473b29579..0cd8d4769bb 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -1067,12 +1067,17 @@ module Fcoe = struct let call ?log args = call_script ?log ~timeout:(Some 10.0) !fcoedriver args let get_capabilities name = - try - let output = call ~log:false ["--xapi"; name; "capable"] in - if Astring.String.is_infix ~affix:"True" output then ["fcoe"] else [] - with _ -> - debug "Failed to get fcoe support status on device %s" name ; - [] + match Sys.file_exists !fcoedriver with + | false -> + [] (* Does not support FCoE *) + | true -> ( + try + let output = call ~log:false ["--xapi"; name; "capable"] in + if Astring.String.is_infix ~affix:"True" output then ["fcoe"] else [] + with _ -> + debug "Failed to get fcoe support status on device %s" name ; + [] + ) end module Sysctl = struct diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index efdcabfbdb6..23cc4e95c39 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1689,11 +1689,6 @@ module Resources = struct ; ("xsh", xsh, "Path to xsh binary") ; ("static-vdis", static_vdis, "Path to static-vdis script") ; ("xen-cmdline-script", xen_cmdline_script, "Path to xen-cmdline script") - ; ( "fcoe-driver" - , fcoe_driver - , "Execute during PIF unplug to get the lun devices related with the \ - ether interface of the PIF" - ) ; ("list_domains", list_domains, "Path to the list_domains command") ; ("systemctl", systemctl, "Control the systemd system and service manager") ; ( "alert-certificate-check" @@ -1797,6 +1792,12 @@ module Resources = struct , "Path to yum-config-manager command" ) ; ("c_rehash", c_rehash, "Path to regenerate CA store") + (* Dropped since XS9, list here as XS8 still requires it *) + ; ( "fcoe-driver" + , fcoe_driver + , "Execute during PIF unplug to get the lun devices related with the \ + ether interface of the PIF" + ) ] let essential_files = diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index a2383ed9d9b..f52def6d8ee 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -319,33 +319,41 @@ let assert_no_other_local_pifs ~__context ~host ~network = ) let assert_fcoe_not_in_use ~__context ~self = - let interface = Db.PIF.get_device ~__context ~self in - let output, _ = - Forkhelpers.execute_command_get_output !Xapi_globs.fcoe_driver - ["-t"; interface] - in - let output = String.trim output in - debug "Scsi ids on %s are: %s" interface output ; - let fcoe_scsids = Str.split (Str.regexp " ") output in - Helpers.get_my_pbds __context - |> List.iter (fun (_, pbd_rec) -> - let sr = pbd_rec.API.pBD_SR in - match Db.SR.get_type ~__context ~self:sr with - | "lvmofcoe" -> ( - try - let scsid = List.assoc "SCSIid" pbd_rec.API.pBD_device_config in - if List.mem scsid fcoe_scsids then - raise - (Api_errors.Server_error - ( Api_errors.pif_has_fcoe_sr_in_use - , [Ref.string_of self; Ref.string_of sr] - ) - ) - with Not_found -> () + match Sys.file_exists !Xapi_globs.fcoe_driver with + | false -> + (* Does not support FCoE from XS9, presuming not in use + * Upgrade plugin will block upgrade with FCoE in use *) + () + | true -> + let interface = Db.PIF.get_device ~__context ~self in + let output, _ = + Forkhelpers.execute_command_get_output !Xapi_globs.fcoe_driver + ["-t"; interface] + in + let output = String.trim output in + debug "Scsi ids on %s are: %s" interface output ; + let fcoe_scsids = Str.split (Str.regexp " ") output in + Helpers.get_my_pbds __context + |> List.iter (fun (_, pbd_rec) -> + let sr = pbd_rec.API.pBD_SR in + match Db.SR.get_type ~__context ~self:sr with + | "lvmofcoe" -> ( + try + let scsid = + List.assoc "SCSIid" pbd_rec.API.pBD_device_config + in + if List.mem scsid fcoe_scsids then + raise + (Api_errors.Server_error + ( Api_errors.pif_has_fcoe_sr_in_use + , [Ref.string_of self; Ref.string_of sr] + ) + ) + with Not_found -> () + ) + | _ -> + () ) - | _ -> - () - ) let find_or_create_network (bridge : string) (device : string) ~managed ~__context = From e3f11da6af88b815cfcbf3cc750dbe71a20d7603 Mon Sep 17 00:00:00 2001 From: Guillaume Date: Thu, 2 Jan 2025 10:07:56 +0100 Subject: [PATCH 33/50] Report memory available as Kib This fixes #6157 Signed-off-by: Guillaume --- ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml index 455723633bb..bb0285b4b18 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml @@ -292,7 +292,7 @@ let dss_mem_vms doms = | Ok mem -> Some ( Rrd.VM uuid - , Ds.ds_make ~name:"memory_internal_free" ~units:"B" + , Ds.ds_make ~name:"memory_internal_free" ~units:"KiB" ~description:"Dom0 current free memory" ~value:(Rrd.VT_Int64 mem) ~ty:Rrd.Gauge ~min:0.0 ~default:true () From 75f0b41f93996d9d52fe7911844edf6f939ef161 Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Thu, 2 Jan 2025 13:46:39 +0000 Subject: [PATCH 34/50] xenopsd: Avoid calling to_string every time Minor style. "uuid" is always converted to string, avoid doing it every time it's used. Signed-off-by: Frediano Ziglio --- ocaml/xenopsd/xc/domain.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ocaml/xenopsd/xc/domain.ml b/ocaml/xenopsd/xc/domain.ml index d33fc482e5f..fce32abf19b 100644 --- a/ocaml/xenopsd/xc/domain.ml +++ b/ocaml/xenopsd/xc/domain.ml @@ -625,6 +625,7 @@ let shutdown_wait_for_ack (t : Xenops_task.task_handle) ~timeout ~xc ~xs domid (domain_type : [`pv | `pvh | `hvm]) req = let di = Xenctrl.domain_getinfo xc domid in let uuid = get_uuid ~xc domid in + let uuid = Uuidx.to_string uuid in let expecting_ack = match (di.Xenctrl.hvm_guest, domain_type) with | false, _ -> @@ -640,12 +641,12 @@ let shutdown_wait_for_ack (t : Xenops_task.task_handle) ~timeout ~xc ~xs domid debug "VM = %s; domid = %d; HVM guest without PV drivers: not expecting any \ acknowledgement" - (Uuidx.to_string uuid) domid ; + uuid domid ; Xenctrl.domain_shutdown xc domid (shutdown_to_xc_shutdown req) ) else ( debug "VM = %s; domid = %d; Waiting for domain to acknowledge shutdown request" - (Uuidx.to_string uuid) domid ; + uuid domid ; let path = control_shutdown ~xs domid in let cancel = Domain domid in if @@ -654,11 +655,10 @@ let shutdown_wait_for_ack (t : Xenops_task.task_handle) ~timeout ~xc ~xs domid [Watch.key_to_disappear path] t ~xs ~timeout () then - info "VM = %s; domid = %d; Domain acknowledged shutdown request" - (Uuidx.to_string uuid) domid - else - debug "VM = %s; domid = %d; Domain disappeared" (Uuidx.to_string uuid) + info "VM = %s; domid = %d; Domain acknowledged shutdown request" uuid domid + else + debug "VM = %s; domid = %d; Domain disappeared" uuid domid ) let sysrq ~xs domid key = From a2e04f9811d39f65b1d1294823a7f56455a49769 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Tue, 17 Dec 2024 18:00:44 +0800 Subject: [PATCH 35/50] CA-403620: Drop the usage of fuser in stunnel client proxy The drawback of fuser is that it gets too many things involved. E.g. it is observed that it got stuck on cifs kernel module. This change uses a cleaner way to remember the stunnel client proxy. Even when the xapi restarted unexpectedly, it can stop the remnant stunnel proxy and start a new one. Signed-off-by: Ming Lu --- ocaml/libs/stunnel/stunnel.ml | 58 ++++++++++++-------------------- ocaml/libs/stunnel/stunnel.mli | 3 +- ocaml/xapi/repository_helpers.ml | 7 ++-- 3 files changed, 28 insertions(+), 40 deletions(-) diff --git a/ocaml/libs/stunnel/stunnel.ml b/ocaml/libs/stunnel/stunnel.ml index 6b7d42608e7..0f5c74564c8 100644 --- a/ocaml/libs/stunnel/stunnel.ml +++ b/ocaml/libs/stunnel/stunnel.ml @@ -448,44 +448,30 @@ let with_connect ?unique_id ?use_fork_exec_helper ?write_to_log ~verify_cert ) 5 -let with_client_proxy ~verify_cert ~remote_host ~remote_port ~local_host - ~local_port f = - ( try - D.debug "Clean up running stunnel client proxy if there is any ..." ; - let out, _ = - Forkhelpers.execute_command_get_output "/usr/sbin/fuser" - ["-4k"; string_of_int local_port ^ "/tcp"] - in - D.debug "Killed running stunnel client proxy:%s" out - with - | Forkhelpers.Spawn_internal_error (stderr, stdout, process_status) -> ( - match process_status with - | Unix.WEXITED 1 -> - D.debug "No running stunnel client proxy" - | _ -> - D.warn - "Cleaning up running stunnel client proxy returned unexpectedly: \ - stdout=(%s); stderr=(%s)" - stdout stderr - ) - ) ; - - retry +let with_client_proxy_systemd_service ~verify_cert ~remote_host ~remote_port + ~local_host ~local_port ~service f = + let cmd_path = stunnel_path () in + let config = + config_file + ~accept:(Some (local_host, local_port)) + verify_cert remote_host remote_port + in + let stop () = ignore (Fe_systemctl.stop ~service) in + (* Try stopping anyway before starting it. *) + ignore_exn stop () ; + let conf_path, out = Filename.open_temp_file service ".conf" in + let finally = Xapi_stdext_pervasives.Pervasiveext.finally in + finally (fun () -> - let pid, _ = - attempt_one_connect - (`Local_host_port (local_host, local_port)) - verify_cert remote_host remote_port - in - D.debug "Started a client proxy (pid:%s): %s:%s -> %s:%s" - (string_of_int (getpid pid)) - local_host (string_of_int local_port) remote_host - (string_of_int remote_port) ; - Xapi_stdext_pervasives.Pervasiveext.finally - (fun () -> f ()) - (fun () -> disconnect_with_pid ~wait:false ~force:true pid) + finally (fun () -> output_string out config) (fun () -> close_out out) ; + finally + (fun () -> + Fe_systemctl.start_transient ~service cmd_path [conf_path] ; + f () + ) + (fun () -> ignore_exn stop ()) ) - 5 + (fun () -> Unixext.unlink_safe conf_path) let check_verify_error line = let sub_after i s = diff --git a/ocaml/libs/stunnel/stunnel.mli b/ocaml/libs/stunnel/stunnel.mli index eba084a9ef2..99fcba608ce 100644 --- a/ocaml/libs/stunnel/stunnel.mli +++ b/ocaml/libs/stunnel/stunnel.mli @@ -88,11 +88,12 @@ val with_moved_exn : t -> (t -> 'd) -> 'd val safe_release : t -> unit -val with_client_proxy : +val with_client_proxy_systemd_service : verify_cert:verification_config option -> remote_host:string -> remote_port:int -> local_host:string -> local_port:int + -> service:string -> (unit -> 'a) -> 'a diff --git a/ocaml/xapi/repository_helpers.ml b/ocaml/xapi/repository_helpers.ml index 51699612739..38a46edc3bb 100644 --- a/ocaml/xapi/repository_helpers.ml +++ b/ocaml/xapi/repository_helpers.ml @@ -398,10 +398,11 @@ let with_local_repositories ~__context f = with Pool_role.This_host_is_a_master -> Option.get (Helpers.get_management_ip_addr ~__context) in - Stunnel.with_client_proxy ~verify_cert:(Stunnel_client.pool ()) - ~remote_host:master_addr ~remote_port:Constants.default_ssl_port - ~local_host:"127.0.0.1" + Stunnel.with_client_proxy_systemd_service + ~verify_cert:(Stunnel_client.pool ()) ~remote_host:master_addr + ~remote_port:Constants.default_ssl_port ~local_host:"127.0.0.1" ~local_port:!Xapi_globs.local_yum_repo_port + ~service:"stunnel_proxy_for_update_client" @@ fun () -> let enabled = get_enabled_repositories ~__context in Xapi_stdext_pervasives.Pervasiveext.finally From e7f2b70dfa210418c08b2bb16432f6943c370c16 Mon Sep 17 00:00:00 2001 From: Ming Lu Date: Thu, 26 Dec 2024 14:41:31 +0800 Subject: [PATCH 36/50] CA-403620: Make the stunnel proxy local port configurable Making it configurable can avoid the situation when the port conflicts with others, e.g. an external program from users. Signed-off-by: Ming Lu --- ocaml/xapi/xapi_globs.ml | 1 + 1 file changed, 1 insertion(+) diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml index efdcabfbdb6..a62ba9cdb43 100644 --- a/ocaml/xapi/xapi_globs.ml +++ b/ocaml/xapi/xapi_globs.ml @@ -1143,6 +1143,7 @@ let xapi_globs_spec = ; ("max_traces", Int max_traces) ; ("max_observer_file_size", Int max_observer_file_size) ; ("test-open", Int test_open) (* for consistency with xenopsd *) + ; ("local_yum_repo_port", Int local_yum_repo_port) ] let xapi_globs_spec_with_descriptions = [] From b245449bc885be9a2dc226fc231ab68598588d8e Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 2 Jan 2025 15:45:07 +0000 Subject: [PATCH 37/50] gencert: name the pem parsers Helpful for debugging pem-parsing issues Signed-off-by: Pau Ruiz Safont --- ocaml/gencert/pem.ml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/ocaml/gencert/pem.ml b/ocaml/gencert/pem.ml index 436fa73e4fd..54a98875dbb 100644 --- a/ocaml/gencert/pem.ml +++ b/ocaml/gencert/pem.ml @@ -43,29 +43,31 @@ let footer = function | OTHER -> "-----END PRIVATE KEY-----" -let key_header = string "-----BEGIN" *> kind <* string "PRIVATE KEY-----" +let key_header = + string "-----BEGIN" *> kind <* string "PRIVATE KEY-----" "key_header" -let key_footer k = string (footer k) +let key_footer k = string (footer k) "key_footer" -let cert_header = string "-----BEGIN CERTIFICATE-----" +let cert_header = string "-----BEGIN CERTIFICATE-----" "cert_header" -let cert_footer = string "-----END CERTIFICATE-----" +let cert_footer = string "-----END CERTIFICATE-----" "cert_footer" let key = key_header >>= fun kind -> data >>= fun body -> key_footer kind *> return (String.concat "" [header kind; body; footer kind]) + "key" let cert = cert_header >>= fun hd -> data >>= fun body -> - cert_footer >>= fun tl -> return (String.concat "" [hd; body; tl]) + cert_footer >>= fun tl -> return (String.concat "" [hd; body; tl]) "cert" let pem = many end_of_line *> key >>= fun private_key -> many end_of_line *> cert >>= fun host_cert -> many end_of_line *> many cert >>= fun other_certs -> - many end_of_line *> return {private_key; host_cert; other_certs} + many end_of_line *> return {private_key; host_cert; other_certs} "pem" let defer f = Fun.protect ~finally:f From 2d84622f08f1c20c487e2a3132cc42d5e97fe37b Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Thu, 2 Jan 2025 15:50:44 +0000 Subject: [PATCH 38/50] CA-404236, gencert: when parsing pems, ignore data between key and certificates This is needed in order to be compliant with RFC 7468: https://datatracker.ietf.org/doc/html/rfc7468#section-2 Data before the encapsulation boundaries are permitted, and parsers MUST NOT malfunction when processing such data. Signed-off-by: Pau Ruiz Safont --- ocaml/gencert/pem.ml | 16 +++- .../pems/pass-extra-lines-multiple-certs.pem | 87 +++++++++++++++++++ .../pems/pass-extra-lines-one-cert.pem | 49 +++++++++++ 3 files changed, 149 insertions(+), 3 deletions(-) create mode 100644 ocaml/gencert/test_data/pems/pass-extra-lines-multiple-certs.pem create mode 100644 ocaml/gencert/test_data/pems/pass-extra-lines-one-cert.pem diff --git a/ocaml/gencert/pem.ml b/ocaml/gencert/pem.ml index 54a98875dbb..86182c2dfd6 100644 --- a/ocaml/gencert/pem.ml +++ b/ocaml/gencert/pem.ml @@ -18,6 +18,8 @@ type t = {private_key: string; host_cert: string; other_certs: string list} let is_data = function '-' -> false | _ -> true +let is_eol = function '\n' | '\r' -> true | _ -> false + let data = take_while1 is_data type kind = RSA | EC | OTHER @@ -58,15 +60,23 @@ let key = key_footer kind *> return (String.concat "" [header kind; body; footer kind]) "key" +let line = take_till is_eol *> end_of_line + +(* try to read a key, or skip a line and try again *) +let until_key = fix (fun m -> key <|> line *> m) "until_key" + let cert = cert_header >>= fun hd -> data >>= fun body -> cert_footer >>= fun tl -> return (String.concat "" [hd; body; tl]) "cert" +(* try to read a cert, or skip a line and try again *) +let until_cert = fix (fun m -> cert <|> line *> m) "until_cert" + let pem = - many end_of_line *> key >>= fun private_key -> - many end_of_line *> cert >>= fun host_cert -> - many end_of_line *> many cert >>= fun other_certs -> + until_key >>= fun private_key -> + until_cert >>= fun host_cert -> + many until_cert >>= fun other_certs -> many end_of_line *> return {private_key; host_cert; other_certs} "pem" let defer f = Fun.protect ~finally:f diff --git a/ocaml/gencert/test_data/pems/pass-extra-lines-multiple-certs.pem b/ocaml/gencert/test_data/pems/pass-extra-lines-multiple-certs.pem new file mode 100644 index 00000000000..eeb088698a2 --- /dev/null +++ b/ocaml/gencert/test_data/pems/pass-extra-lines-multiple-certs.pem @@ -0,0 +1,87 @@ +extra line +-----BEGIN RSA PRIVATE KEY----- +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQD7lI5jUArCqncH +tzcSFK+OJgXMv16Ai0sjsLKmxUVCnlElen/0VEIyTWwsHeB+oxxDluGE8zhB9bSI +r3C1xHEUgtgj+gf6/A0Qnl/fIGkhuOFsPoholxXXGk2QkPQlGrE9FRxFNGQpIur0 +eniepJw4K+SMqAaGUvdwWfe44pYjOsmAS3IWLERdpjxcupgvHLwEnk+4zheypp93 +iVvmqwrX8okLiJJZap1ew6EgKzeK2mw9HUed4d0AGKCkAhghzGTl/IsLz4QOOFes +rY4awQwzK1SwBvF49xAuOiRbURdzt+K7GsoN0lm5P8CxZmrDSGpGR1BICrMSatSG +N0NuOCJ9AgMBAAECggEATgm51VKZ0+Kew5Twjzo9bqGawPVHsiYDK9H+yL5+inij +gTWrhTWxxvq/KDwoS//6n3ipAd2UQNmfo5qQIsIJtawUsaw4V4Fh6BrIcGUUV3KK +8lG/bHoZOz0cfFCKewv5mJH4z/q9awk6ypVG3yb+kmoDHiJsy7Pmr0IpFn+qxMg1 +EYZU91G10DguXekciRtNcZJRL0wCQR3s2OwDdQUC+XIotvAsKiuhWl++MLwn42ad +EwhzLuLd312qWg58ByCcNq8/XJkHJUbKDTWmBRGopWRliduP+Kb6vJZ16KL0G2B+ +OKuTQxMOzVVmumXdEVj3kH54cjpn7kCq9jwhhSJiQQKBgQD94ZFOzsUzZfmNlDZ3 +hFmkFuFpQCacH58FQX/vD6JQ84HwEHJx69aHYI6olCNaKcNwMhsOw+0KqBRWZnCf +A6oMWUf3pkogV5JZJy7DyHNOmkfI/w8NcWtqJ03pCoA237f5RH0sul2ady9BVzsJ +/8rb3B5uDw8+XesnG8Ryj6BCsQKBgQD9rhKfHxJgsZUzyassIumYcLTefgdoeCq5 +awd+YaM9jrGGN1ty8dTEzo3rbovnz8y+ZJMzaDRbCUeNTQjKDox8mWffRTpjxcks +rJzImY7coBdnZT8K4C5OMoeCAr30FI1veXBk/XFfr56h1X8QbmM2kuJwpsf5bOaf +CTfL2q2XjQKBgHem4pvYuXoC2n1OV+k2GCVMn0nCcS/tez234/qgTKiISzoAFl/4 +fW/qIvHyd0LcIf7zrmrkDgiStJsPxo465N7TCSb/WToq649W9yRQiX+HGMPy6X41 +cSFjisWFLG4wO/2fuLrmzoypFT1fRjTtOAcsk67dLBsBmn0hChHP/QDRAoGASXS7 +XaogpzEk1A8kaq5dV8/i/74cpQqOzIwKanUZULzd+NBUwa72/loVTEQBbQmF7ueu +nCcjae0A9BCHaALYeUfuhP9Fzhg6jZ4Z9BhK/uW4gS8XFy4dGnWVOXdTy7ab0din +TAb7akqvM4tftMFSJz5XJWmV5Eq9aPXBW10iAQ0CgYBl6PsdqWBjvPnqX3NCyAGH +ZO4iUcrqODdeTcKpILgqBmh9/IepClgCtwW1Iluna7QTDtVqotKcft1BtHJzeKWT +6TvCgje2k0RWo6TkzroaF74lyAojzWOrmuq+skVbWTiebc4bCA1KtLMLaQHIEtdo +FIPEq03cDKVNDCgABw4mkw== +-----END RSA PRIVATE KEY----- +extra line +-----BEGIN CERTIFICATE----- +MIIC5jCCAc6gAwIBAgIIaYRSm3Q7zc8wDQYJKoZIhvcNAQELBQAwIDEeMBwGA1UE +AwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMB4XDTIwMTAxNDE1NTc1MloXDTMwMTAx +MjE1NTc1MlowIDEeMBwGA1UEAwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+5SOY1AKwqp3B7c3EhSvjiYFzL9e +gItLI7CypsVFQp5RJXp/9FRCMk1sLB3gfqMcQ5bhhPM4QfW0iK9wtcRxFILYI/oH ++vwNEJ5f3yBpIbjhbD6IaJcV1xpNkJD0JRqxPRUcRTRkKSLq9Hp4nqScOCvkjKgG +hlL3cFn3uOKWIzrJgEtyFixEXaY8XLqYLxy8BJ5PuM4Xsqafd4lb5qsK1/KJC4iS +WWqdXsOhICs3itpsPR1HneHdABigpAIYIcxk5fyLC8+EDjhXrK2OGsEMMytUsAbx +ePcQLjokW1EXc7fiuxrKDdJZuT/AsWZqw0hqRkdQSAqzEmrUhjdDbjgifQIDAQAB +oyQwIjAgBgNVHREEGTAXghVsY3kyLWR0MTEwLnhlbnJ0Y2xvdWQwDQYJKoZIhvcN +AQELBQADggEBAHEkeEjHilXdVgQhD/z46prXObB26uO97yFUcUIalzhb/P3zmfjb +LFatTFn5jgienMmdP90uj7Ly1R6VOa+tX/o+XtSJaZwuNMtixv9qwo3nrFZdw8yF +GgsmbAR+1hu0TG3RNpDIiES4D3JmVP8MgmwLw1kN3cBVptx73lE3uc8vZnNtIDOl +erJb9fD3IOv/RZ78mxMnajZTHY5kg2e96d/a6HgY39vXMwycjp8wIE/+4g94fIc/ +/a2+BGYjCWJZyoLgmHcXEU8fOxe9yUWbFQf0wnqsLJIqzaQU1w2w6mkh4+xsI/nA +JwFfXQKd3fzsvgmufpAbXt/AHljFvC/qjTI= +-----END CERTIFICATE----- +extra line +-----BEGIN CERTIFICATE----- +MIIC5jCCAc6gAwIBAgIIaYRSm3Q7zc8wDQYJKoZIhvcNAQELBQAwIDEeMBwGA1UE +AwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMB4XDTIwMTAxNDE1NTc1MloXDTMwMTAx +MjE1NTc1MlowIDEeMBwGA1UEAwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+5SOY1AKwqp3B7c3EhSvjiYFzL9e +gItLI7CypsVFQp5RJXp/9FRCMk1sLB3gfqMcQ5bhhPM4QfW0iK9wtcRxFILYI/oH ++vwNEJ5f3yBpIbjhbD6IaJcV1xpNkJD0JRqxPRUcRTRkKSLq9Hp4nqScOCvkjKgG +hlL3cFn3uOKWIzrJgEtyFixEXaY8XLqYLxy8BJ5PuM4Xsqafd4lb5qsK1/KJC4iS +WWqdXsOhICs3itpsPR1HneHdABigpAIYIcxk5fyLC8+EDjhXrK2OGsEMMytUsAbx +ePcQLjokW1EXc7fiuxrKDdJZuT/AsWZqw0hqRkdQSAqzEmrUhjdDbjgifQIDAQAB +oyQwIjAgBgNVHREEGTAXghVsY3kyLWR0MTEwLnhlbnJ0Y2xvdWQwDQYJKoZIhvcN +AQELBQADggEBAHEkeEjHilXdVgQhD/z46prXObB26uO97yFUcUIalzhb/P3zmfjb +LFatTFn5jgienMmdP90uj7Ly1R6VOa+tX/o+XtSJaZwuNMtixv9qwo3nrFZdw8yF +GgsmbAR+1hu0TG3RNpDIiES4D3JmVP8MgmwLw1kN3cBVptx73lE3uc8vZnNtIDOl +erJb9fD3IOv/RZ78mxMnajZTHY5kg2e96d/a6HgY39vXMwycjp8wIE/+4g94fIc/ +/a2+BGYjCWJZyoLgmHcXEU8fOxe9yUWbFQf0wnqsLJIqzaQU1w2w6mkh4+xsI/nA +JwFfXQKd3fzsvgmufpAbXt/AHljFvC/qjTI= +-----END CERTIFICATE----- +extra line +-----BEGIN CERTIFICATE----- +MIIC5jCCAc6gAwIBAgIIaYRSm3Q7zc8wDQYJKoZIhvcNAQELBQAwIDEeMBwGA1UE +AwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMB4XDTIwMTAxNDE1NTc1MloXDTMwMTAx +MjE1NTc1MlowIDEeMBwGA1UEAwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+5SOY1AKwqp3B7c3EhSvjiYFzL9e +gItLI7CypsVFQp5RJXp/9FRCMk1sLB3gfqMcQ5bhhPM4QfW0iK9wtcRxFILYI/oH ++vwNEJ5f3yBpIbjhbD6IaJcV1xpNkJD0JRqxPRUcRTRkKSLq9Hp4nqScOCvkjKgG +hlL3cFn3uOKWIzrJgEtyFixEXaY8XLqYLxy8BJ5PuM4Xsqafd4lb5qsK1/KJC4iS +WWqdXsOhICs3itpsPR1HneHdABigpAIYIcxk5fyLC8+EDjhXrK2OGsEMMytUsAbx +ePcQLjokW1EXc7fiuxrKDdJZuT/AsWZqw0hqRkdQSAqzEmrUhjdDbjgifQIDAQAB +oyQwIjAgBgNVHREEGTAXghVsY3kyLWR0MTEwLnhlbnJ0Y2xvdWQwDQYJKoZIhvcN +AQELBQADggEBAHEkeEjHilXdVgQhD/z46prXObB26uO97yFUcUIalzhb/P3zmfjb +LFatTFn5jgienMmdP90uj7Ly1R6VOa+tX/o+XtSJaZwuNMtixv9qwo3nrFZdw8yF +GgsmbAR+1hu0TG3RNpDIiES4D3JmVP8MgmwLw1kN3cBVptx73lE3uc8vZnNtIDOl +erJb9fD3IOv/RZ78mxMnajZTHY5kg2e96d/a6HgY39vXMwycjp8wIE/+4g94fIc/ +/a2+BGYjCWJZyoLgmHcXEU8fOxe9yUWbFQf0wnqsLJIqzaQU1w2w6mkh4+xsI/nA +JwFfXQKd3fzsvgmufpAbXt/AHljFvC/qjTI= +-----END CERTIFICATE----- +extra line diff --git a/ocaml/gencert/test_data/pems/pass-extra-lines-one-cert.pem b/ocaml/gencert/test_data/pems/pass-extra-lines-one-cert.pem new file mode 100644 index 00000000000..bc43f444a28 --- /dev/null +++ b/ocaml/gencert/test_data/pems/pass-extra-lines-one-cert.pem @@ -0,0 +1,49 @@ +extra line +-----BEGIN PRIVATE KEY----- +MIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQD7lI5jUArCqncH +tzcSFK+OJgXMv16Ai0sjsLKmxUVCnlElen/0VEIyTWwsHeB+oxxDluGE8zhB9bSI +r3C1xHEUgtgj+gf6/A0Qnl/fIGkhuOFsPoholxXXGk2QkPQlGrE9FRxFNGQpIur0 +eniepJw4K+SMqAaGUvdwWfe44pYjOsmAS3IWLERdpjxcupgvHLwEnk+4zheypp93 +iVvmqwrX8okLiJJZap1ew6EgKzeK2mw9HUed4d0AGKCkAhghzGTl/IsLz4QOOFes +rY4awQwzK1SwBvF49xAuOiRbURdzt+K7GsoN0lm5P8CxZmrDSGpGR1BICrMSatSG +N0NuOCJ9AgMBAAECggEATgm51VKZ0+Kew5Twjzo9bqGawPVHsiYDK9H+yL5+inij +gTWrhTWxxvq/KDwoS//6n3ipAd2UQNmfo5qQIsIJtawUsaw4V4Fh6BrIcGUUV3KK +8lG/bHoZOz0cfFCKewv5mJH4z/q9awk6ypVG3yb+kmoDHiJsy7Pmr0IpFn+qxMg1 +EYZU91G10DguXekciRtNcZJRL0wCQR3s2OwDdQUC+XIotvAsKiuhWl++MLwn42ad +EwhzLuLd312qWg58ByCcNq8/XJkHJUbKDTWmBRGopWRliduP+Kb6vJZ16KL0G2B+ +OKuTQxMOzVVmumXdEVj3kH54cjpn7kCq9jwhhSJiQQKBgQD94ZFOzsUzZfmNlDZ3 +hFmkFuFpQCacH58FQX/vD6JQ84HwEHJx69aHYI6olCNaKcNwMhsOw+0KqBRWZnCf +A6oMWUf3pkogV5JZJy7DyHNOmkfI/w8NcWtqJ03pCoA237f5RH0sul2ady9BVzsJ +/8rb3B5uDw8+XesnG8Ryj6BCsQKBgQD9rhKfHxJgsZUzyassIumYcLTefgdoeCq5 +awd+YaM9jrGGN1ty8dTEzo3rbovnz8y+ZJMzaDRbCUeNTQjKDox8mWffRTpjxcks +rJzImY7coBdnZT8K4C5OMoeCAr30FI1veXBk/XFfr56h1X8QbmM2kuJwpsf5bOaf +CTfL2q2XjQKBgHem4pvYuXoC2n1OV+k2GCVMn0nCcS/tez234/qgTKiISzoAFl/4 +fW/qIvHyd0LcIf7zrmrkDgiStJsPxo465N7TCSb/WToq649W9yRQiX+HGMPy6X41 +cSFjisWFLG4wO/2fuLrmzoypFT1fRjTtOAcsk67dLBsBmn0hChHP/QDRAoGASXS7 +XaogpzEk1A8kaq5dV8/i/74cpQqOzIwKanUZULzd+NBUwa72/loVTEQBbQmF7ueu +nCcjae0A9BCHaALYeUfuhP9Fzhg6jZ4Z9BhK/uW4gS8XFy4dGnWVOXdTy7ab0din +TAb7akqvM4tftMFSJz5XJWmV5Eq9aPXBW10iAQ0CgYBl6PsdqWBjvPnqX3NCyAGH +ZO4iUcrqODdeTcKpILgqBmh9/IepClgCtwW1Iluna7QTDtVqotKcft1BtHJzeKWT +6TvCgje2k0RWo6TkzroaF74lyAojzWOrmuq+skVbWTiebc4bCA1KtLMLaQHIEtdo +FIPEq03cDKVNDCgABw4mkw== +-----END PRIVATE KEY----- +extra line +-----BEGIN CERTIFICATE----- +MIIC5jCCAc6gAwIBAgIIaYRSm3Q7zc8wDQYJKoZIhvcNAQELBQAwIDEeMBwGA1UE +AwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMB4XDTIwMTAxNDE1NTc1MloXDTMwMTAx +MjE1NTc1MlowIDEeMBwGA1UEAwwVbGN5Mi1kdDExMC54ZW5ydGNsb3VkMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+5SOY1AKwqp3B7c3EhSvjiYFzL9e +gItLI7CypsVFQp5RJXp/9FRCMk1sLB3gfqMcQ5bhhPM4QfW0iK9wtcRxFILYI/oH ++vwNEJ5f3yBpIbjhbD6IaJcV1xpNkJD0JRqxPRUcRTRkKSLq9Hp4nqScOCvkjKgG +hlL3cFn3uOKWIzrJgEtyFixEXaY8XLqYLxy8BJ5PuM4Xsqafd4lb5qsK1/KJC4iS +WWqdXsOhICs3itpsPR1HneHdABigpAIYIcxk5fyLC8+EDjhXrK2OGsEMMytUsAbx +ePcQLjokW1EXc7fiuxrKDdJZuT/AsWZqw0hqRkdQSAqzEmrUhjdDbjgifQIDAQAB +oyQwIjAgBgNVHREEGTAXghVsY3kyLWR0MTEwLnhlbnJ0Y2xvdWQwDQYJKoZIhvcN +AQELBQADggEBAHEkeEjHilXdVgQhD/z46prXObB26uO97yFUcUIalzhb/P3zmfjb +LFatTFn5jgienMmdP90uj7Ly1R6VOa+tX/o+XtSJaZwuNMtixv9qwo3nrFZdw8yF +GgsmbAR+1hu0TG3RNpDIiES4D3JmVP8MgmwLw1kN3cBVptx73lE3uc8vZnNtIDOl +erJb9fD3IOv/RZ78mxMnajZTHY5kg2e96d/a6HgY39vXMwycjp8wIE/+4g94fIc/ +/a2+BGYjCWJZyoLgmHcXEU8fOxe9yUWbFQf0wnqsLJIqzaQU1w2w6mkh4+xsI/nA +JwFfXQKd3fzsvgmufpAbXt/AHljFvC/qjTI= +-----END CERTIFICATE----- +extra line From 71c87a18a383d9f72366349b93d5235345a6a1da Mon Sep 17 00:00:00 2001 From: Lin Liu Date: Mon, 6 Jan 2025 05:12:25 +0000 Subject: [PATCH 39/50] CP-51895: Drop FCoE support when fcoe_driver does not exists - Add logs when fcoe_driver does not exist - Use List.assoc_opt instead of try catch - Add __FUNCTION__ to the logs Signed-off-by: Lin Liu --- ocaml/networkd/lib/network_utils.ml | 4 +++- ocaml/xapi/xapi_pif.ml | 27 +++++++++++++-------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/ocaml/networkd/lib/network_utils.ml b/ocaml/networkd/lib/network_utils.ml index 0cd8d4769bb..b6c696ea896 100644 --- a/ocaml/networkd/lib/network_utils.ml +++ b/ocaml/networkd/lib/network_utils.ml @@ -1069,13 +1069,15 @@ module Fcoe = struct let get_capabilities name = match Sys.file_exists !fcoedriver with | false -> + info "%s: %s not found, does not support FCoE" __FUNCTION__ !fcoedriver ; [] (* Does not support FCoE *) | true -> ( try let output = call ~log:false ["--xapi"; name; "capable"] in if Astring.String.is_infix ~affix:"True" output then ["fcoe"] else [] with _ -> - debug "Failed to get fcoe support status on device %s" name ; + debug "%s: Failed to get fcoe support status on device %s" __FUNCTION__ + name ; [] ) end diff --git a/ocaml/xapi/xapi_pif.ml b/ocaml/xapi/xapi_pif.ml index f52def6d8ee..0284a134a68 100644 --- a/ocaml/xapi/xapi_pif.ml +++ b/ocaml/xapi/xapi_pif.ml @@ -323,7 +323,7 @@ let assert_fcoe_not_in_use ~__context ~self = | false -> (* Does not support FCoE from XS9, presuming not in use * Upgrade plugin will block upgrade with FCoE in use *) - () + debug "%s not found, does not support FCoE" !Xapi_globs.fcoe_driver | true -> let interface = Db.PIF.get_device ~__context ~self in let output, _ = @@ -331,25 +331,24 @@ let assert_fcoe_not_in_use ~__context ~self = ["-t"; interface] in let output = String.trim output in - debug "Scsi ids on %s are: %s" interface output ; + debug "%s: SCSI ids on %s are: %s" __FUNCTION__ interface output ; let fcoe_scsids = Str.split (Str.regexp " ") output in Helpers.get_my_pbds __context |> List.iter (fun (_, pbd_rec) -> let sr = pbd_rec.API.pBD_SR in match Db.SR.get_type ~__context ~self:sr with | "lvmofcoe" -> ( - try - let scsid = - List.assoc "SCSIid" pbd_rec.API.pBD_device_config - in - if List.mem scsid fcoe_scsids then - raise - (Api_errors.Server_error - ( Api_errors.pif_has_fcoe_sr_in_use - , [Ref.string_of self; Ref.string_of sr] - ) - ) - with Not_found -> () + match List.assoc_opt "SCSIid" pbd_rec.API.pBD_device_config with + | Some scsid -> + if List.mem scsid fcoe_scsids then + raise + (Api_errors.Server_error + ( Api_errors.pif_has_fcoe_sr_in_use + , [Ref.string_of self; Ref.string_of sr] + ) + ) + | None -> + () ) | _ -> () From 5428164b405e9a996dd41b4a5b9e1c631351cff6 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 3 Jan 2025 11:25:32 +0000 Subject: [PATCH 40/50] CA-403344: Add `db_get_by_uuid_opt` to db_cache* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Edwin Török Signed-off-by: Vincent Liu --- ocaml/database/db_remote_cache_access_v1.ml | 6 ++++++ ocaml/database/db_remote_cache_access_v2.ml | 2 ++ ocaml/database/db_rpc_client_v2.ml | 2 +- ocaml/database/db_rpc_common_v1.ml | 2 ++ ocaml/database/db_rpc_common_v2.ml | 1 + 5 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ocaml/database/db_remote_cache_access_v1.ml b/ocaml/database/db_remote_cache_access_v1.ml index ec198755739..1499fa3fc13 100644 --- a/ocaml/database/db_remote_cache_access_v1.ml +++ b/ocaml/database/db_remote_cache_access_v1.ml @@ -102,6 +102,12 @@ module DBCacheRemoteListener = struct let s, e = unmarshall_db_get_by_uuid_args args in success (marshall_db_get_by_uuid_response (DBCache.db_get_by_uuid t s e)) + | "db_get_by_uuid_opt" -> + let s, e = unmarshall_db_get_by_uuid_args args in + success + (marshall_db_get_by_uuid_opt_response + (DBCache.db_get_by_uuid_opt t s e) + ) | "db_get_by_name_label" -> let s, e = unmarshall_db_get_by_name_label_args args in success diff --git a/ocaml/database/db_remote_cache_access_v2.ml b/ocaml/database/db_remote_cache_access_v2.ml index 040ad215600..754fd2fa340 100644 --- a/ocaml/database/db_remote_cache_access_v2.ml +++ b/ocaml/database/db_remote_cache_access_v2.ml @@ -36,6 +36,8 @@ let process_rpc (req : Rpc.t) = Response.Read_field_where (DB.read_field_where t w) | Request.Db_get_by_uuid (a, b) -> Response.Db_get_by_uuid (DB.db_get_by_uuid t a b) + | Request.Db_get_by_uuid_opt (a, b) -> + Response.Db_get_by_uuid_opt (DB.db_get_by_uuid_opt t a b) | Request.Db_get_by_name_label (a, b) -> Response.Db_get_by_name_label (DB.db_get_by_name_label t a b) | Request.Create_row (a, b, c) -> diff --git a/ocaml/database/db_rpc_client_v2.ml b/ocaml/database/db_rpc_client_v2.ml index 3a32b3149e9..2e03f069497 100644 --- a/ocaml/database/db_rpc_client_v2.ml +++ b/ocaml/database/db_rpc_client_v2.ml @@ -78,7 +78,7 @@ functor raise Remote_db_server_returned_bad_message let db_get_by_uuid_opt _ t u = - match process (Request.Db_get_by_uuid (t, u)) with + match process (Request.Db_get_by_uuid_opt (t, u)) with | Response.Db_get_by_uuid_opt y -> y | _ -> diff --git a/ocaml/database/db_rpc_common_v1.ml b/ocaml/database/db_rpc_common_v1.ml index cced73dd9ca..b4ef9a4163e 100644 --- a/ocaml/database/db_rpc_common_v1.ml +++ b/ocaml/database/db_rpc_common_v1.ml @@ -192,6 +192,8 @@ let unmarshall_db_get_by_uuid_args xml = unmarshall_2strings xml let marshall_db_get_by_uuid_response s = XMLRPC.To.string s +let marshall_db_get_by_uuid_opt_response = marshall_stringopt + let unmarshall_db_get_by_uuid_response xml = XMLRPC.From.string xml let unmarshall_db_get_by_uuid_opt_response xml = unmarshall_stringopt xml diff --git a/ocaml/database/db_rpc_common_v2.ml b/ocaml/database/db_rpc_common_v2.ml index 4cd9d7541ab..6700d159f18 100644 --- a/ocaml/database/db_rpc_common_v2.ml +++ b/ocaml/database/db_rpc_common_v2.ml @@ -23,6 +23,7 @@ module Request = struct | Find_refs_with_filter of string * Db_filter_types.expr | Read_field_where of Db_cache_types.where_record | Db_get_by_uuid of string * string + | Db_get_by_uuid_opt of string * string | Db_get_by_name_label of string * string | Create_row of string * (string * string) list * string | Delete_row of string * string From eacc53b8255b4bd1f5eaaa0f8c528a5e3af6ec06 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Fri, 3 Jan 2025 11:34:50 +0000 Subject: [PATCH 41/50] Add unit test to the new `db_get_by_uuid_opt` function Signed-off-by: Vincent Liu --- ocaml/database/database_test.ml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ocaml/database/database_test.ml b/ocaml/database/database_test.ml index 51e28dbf387..f3c10944b19 100644 --- a/ocaml/database/database_test.ml +++ b/ocaml/database/database_test.ml @@ -580,6 +580,24 @@ functor let (_ : string) = Client.db_get_by_uuid t "VM" invalid_uuid in failwith "db_get_by_uuid " ) ; + Printf.printf "db_get_by_uuid_opt \n" ; + let r = Client.db_get_by_uuid_opt t "VM" valid_uuid in + ( if r <> Some valid_ref then + let rs = Option.value ~default:"None" r in + failwith + (Printf.sprintf + "db_get_by_uuid_opt : got %s; expected %s" rs + valid_ref + ) + ) ; + Printf.printf "db_get_by_uuid_opt \n" ; + let r = Client.db_get_by_uuid_opt t "VM" invalid_uuid in + if not (Option.is_none r) then + failwith + (Printf.sprintf + "db_get_by_uuid_opt : got %s; expected None" + valid_ref + ) ; Printf.printf "get_by_name_label \n" ; if Client.db_get_by_name_label t "VM" invalid_name <> [] then failwith "db_get_by_name_label " ; From c69aec917145bc9be9539cdadaa2d6ae75e566f9 Mon Sep 17 00:00:00 2001 From: Vincent Liu Date: Mon, 6 Jan 2025 12:11:40 +0000 Subject: [PATCH 42/50] Style: Refactor using failwith_fmt Signed-off-by: Vincent Liu --- ocaml/database/database_test.ml | 93 ++++++++++++--------------------- 1 file changed, 34 insertions(+), 59 deletions(-) diff --git a/ocaml/database/database_test.ml b/ocaml/database/database_test.ml index f3c10944b19..dc176488f3b 100644 --- a/ocaml/database/database_test.ml +++ b/ocaml/database/database_test.ml @@ -16,6 +16,8 @@ let name_label = "name__label" let name_description = "name__description" +let failwith_fmt fmt = Printf.ksprintf failwith fmt + module Tests = functor (Client : Db_interface.DB_ACCESS) @@ -111,7 +113,7 @@ functor ; where_value= "" } in - failwith (Printf.sprintf "%s " fn_name) + failwith_fmt "%s " fn_name ) ; Printf.printf "%s \n" @@ -126,11 +128,9 @@ functor ; where_value= name } in - failwith - (Printf.sprintf - "%s " - fn_name - ) + failwith_fmt + "%s " + fn_name ) ; Printf.printf "%s \n" @@ -145,11 +145,9 @@ functor ; where_value= "" } in - failwith - (Printf.sprintf - "%s " - fn_name - ) + failwith_fmt + "%s " + fn_name ) (* Verify the ref_index contents are correct for a given [tblname] and [key] (uuid/ref) *) @@ -168,10 +166,9 @@ functor | Some {Ref_index.name_label= name_label'; uuid; _ref} -> (* key should be either uuid or _ref *) if key <> uuid && key <> _ref then - failwith - (Printf.sprintf "check_ref_index %s key %s: got ref %s uuid %s" - tblname key _ref uuid - ) ; + failwith_fmt "check_ref_index %s key %s: got ref %s uuid %s" tblname + key _ref uuid ; + let real_ref = if Client.is_valid_ref t key then key @@ -183,14 +180,11 @@ functor with _ -> None in if name_label' <> real_name_label then - failwith - (Printf.sprintf - "check_ref_index %s key %s: ref_index name_label = %s; db has \ - %s" - tblname key - (Option.value ~default:"None" name_label') - (Option.value ~default:"None" real_name_label) - ) + failwith_fmt + "check_ref_index %s key %s: ref_index name_label = %s; db has %s" + tblname key + (Option.value ~default:"None" name_label') + (Option.value ~default:"None" real_name_label) open Db_cache_types @@ -226,11 +220,9 @@ functor in let bar_foos = Row.find "foos" bar_1 in if bar_foos <> Set ["foo:1"] then - failwith - (Printf.sprintf - "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s" - (Schema.Value.marshal bar_foos) - ) ; + failwith_fmt + "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s" + (Schema.Value.marshal bar_foos) ; (* set foo.bars to [] *) (* let foo_1 = Table.find "foo:1" (TableSet.find "foo" (Database.tableset db)) in*) let db = set_field "foo" "foo:1" "bars" (Set []) db in @@ -240,11 +232,8 @@ functor in let bar_foos = Row.find "foos" bar_1 in if bar_foos <> Set [] then - failwith - (Printf.sprintf - "check_many_to_many: bar(bar:1).foos expected () got %s" - (Schema.Value.marshal bar_foos) - ) ; + failwith_fmt "check_many_to_many: bar(bar:1).foos expected () got %s" + (Schema.Value.marshal bar_foos) ; (* add 'bar' to foo.bars *) let db = set_field "foo" "foo:1" "bars" (Set ["bar:1"]) db in (* check that 'bar.foos' includes 'foo' *) @@ -253,11 +242,9 @@ functor in let bar_foos = Row.find "foos" bar_1 in if bar_foos <> Set ["foo:1"] then - failwith - (Printf.sprintf - "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s - 2" - (Schema.Value.marshal bar_foos) - ) ; + failwith_fmt + "check_many_to_many: bar(bar:1).foos expected ('foo:1') got %s - 2" + (Schema.Value.marshal bar_foos) ; (* delete 'bar' *) let db = remove_row "bar" "bar:1" db in (* check that 'foo.bars' is empty *) @@ -266,11 +253,8 @@ functor in let foo_bars = Row.find "bars" foo_1 in if foo_bars <> Set [] then - failwith - (Printf.sprintf - "check_many_to_many: foo(foo:1).foos expected () got %s" - (Schema.Value.marshal foo_bars) - ) ; + failwith_fmt "check_many_to_many: foo(foo:1).foos expected () got %s" + (Schema.Value.marshal foo_bars) ; () let check_events t = @@ -503,8 +487,7 @@ functor | None -> Printf.printf "Reference '%s' has no associated table\n" invalid_ref | Some t -> - failwith - (Printf.sprintf "Reference '%s' exists in table '%s'" invalid_ref t) + failwith_fmt "Reference '%s' exists in table '%s'" invalid_ref t ) ; Printf.printf "is_valid_ref \n" ; if Client.is_valid_ref t invalid_ref then @@ -571,10 +554,8 @@ functor Printf.printf "db_get_by_uuid \n" ; let r = Client.db_get_by_uuid t "VM" valid_uuid in if r <> valid_ref then - failwith - (Printf.sprintf "db_get_by_uuid : got %s; expected %s" r - valid_ref - ) ; + failwith_fmt "db_get_by_uuid : got %s; expected %s" r + valid_ref ; Printf.printf "db_get_by_uuid \n" ; expect_missing_uuid "VM" invalid_uuid (fun () -> let (_ : string) = Client.db_get_by_uuid t "VM" invalid_uuid in @@ -584,20 +565,14 @@ functor let r = Client.db_get_by_uuid_opt t "VM" valid_uuid in ( if r <> Some valid_ref then let rs = Option.value ~default:"None" r in - failwith - (Printf.sprintf - "db_get_by_uuid_opt : got %s; expected %s" rs - valid_ref - ) + failwith_fmt "db_get_by_uuid_opt : got %s; expected %s" rs + valid_ref ) ; Printf.printf "db_get_by_uuid_opt \n" ; let r = Client.db_get_by_uuid_opt t "VM" invalid_uuid in if not (Option.is_none r) then - failwith - (Printf.sprintf - "db_get_by_uuid_opt : got %s; expected None" - valid_ref - ) ; + failwith_fmt "db_get_by_uuid_opt : got %s; expected None" + valid_ref ; Printf.printf "get_by_name_label \n" ; if Client.db_get_by_name_label t "VM" invalid_name <> [] then failwith "db_get_by_name_label " ; From 769c863b8e0c9bd08a9647492cda5736ecfc4bb3 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 20 Dec 2024 18:36:58 +0000 Subject: [PATCH 43/50] Removed deprecated methods. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/Session.cs | 30 ------------------- .../csharp/templates/ApiVersion.mustache | 28 +++-------------- 2 files changed, 4 insertions(+), 54 deletions(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index 4a84b5bcd0c..e6b4b87eb47 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -70,24 +70,11 @@ public Session(JsonRpcClient client) JsonRpcClient = client; } - [Obsolete("Use Session(string url) { Timeout = ... }; instead.")] - public Session(int timeout, string url) - : this(new JsonRpcClient(url)) - { - JsonRpcClient.Timeout = timeout; - } - public Session(string url) : this(new JsonRpcClient(url)) { } - [Obsolete("Use Session(string host, int port) { Timeout = ... }; instead.")] - public Session(int timeout, string host, int port) - : this(timeout, GetUrl(host, port)) - { - } - public Session(string host, int port) : this(GetUrl(host, port)) { @@ -100,23 +87,6 @@ public Session(string url, string opaqueRef) SetupSessionDetails(); } - /// - /// Create a new Session instance, using the given instance and timeout. The connection details and Xen-API session handle will be - /// copied from the given instance, but a new connection will be created. Use this if you want a duplicate connection to a host, - /// for example when you need to cancel an operation that is blocking the primary connection. - /// - /// - /// - [Obsolete("Use Session(Session session) { Timeout = ... }; instead.")] - public Session(Session session, int timeout) - : this(session) - { - if (JsonRpcClient != null) - { - JsonRpcClient.Timeout = timeout; - } - } - /// /// Create a new Session instance, using the given instance. The connection details /// and Xen-API session handle will be copied from the given instance, but a new diff --git a/ocaml/sdk-gen/csharp/templates/ApiVersion.mustache b/ocaml/sdk-gen/csharp/templates/ApiVersion.mustache index a8be1e4e8a5..9cac0b97c18 100644 --- a/ocaml/sdk-gen/csharp/templates/ApiVersion.mustache +++ b/ocaml/sdk-gen/csharp/templates/ApiVersion.mustache @@ -28,9 +28,6 @@ */ using System; -using System.Collections; -using System.Collections.Generic; - namespace XenAPI { @@ -63,8 +60,7 @@ namespace XenAPI { try { - return (API_Version)Enum.Parse(typeof(API_Version), - string.Format("API_{0}_{1}", major, minor)); + return (API_Version)Enum.Parse(typeof(API_Version), $"API_{major}_{minor}"); } catch (ArgumentException) { @@ -82,30 +78,14 @@ namespace XenAPI { string[] tokens = version.Split('.'); int major, minor; - if (tokens.Length == 2 && int.TryParse(tokens[0], out major) && int.TryParse(tokens[1], out minor)) + if (tokens.Length == 2 && + int.TryParse(tokens[0], out major) && + int.TryParse(tokens[1], out minor)) { return GetAPIVersion(major, minor); } } return API_Version.UNKNOWN; } - - /// - /// Return a positive number if the given session's API version is greater than the given - /// API_version, negative if it is less, and 0 if they are equal. - /// - internal static int APIVersionCompare(Session session, API_Version v) - { - return (int)session.APIVersion - (int)v; - } - - /// - /// Return true if the given session's API version is greater than or equal to the given - /// API_version. - /// - internal static bool APIVersionMeets(Session session, API_Version v) - { - return APIVersionCompare(session, v) >= 0; - } } } From ad4f3d94ca58bc1fd09aaa130c4aed0156f3b787 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 20 Dec 2024 18:45:06 +0000 Subject: [PATCH 44/50] Cmdlet refactoring: - Moved certificate methods to the Connect-XenServer cmdlet and refactored them to avoid multiple loads of the global variable KnownServerCertificatesFilePath. - Fixed accessibility of CommonCmdletFunctions members. Signed-off-by: Konstantina Chremmou --- .../autogen/src/CommonCmdletFunctions.cs | 73 ++-------------- .../autogen/src/Connect-XenServer.cs | 83 ++++++++++++++++--- 2 files changed, 76 insertions(+), 80 deletions(-) diff --git a/ocaml/sdk-gen/powershell/autogen/src/CommonCmdletFunctions.cs b/ocaml/sdk-gen/powershell/autogen/src/CommonCmdletFunctions.cs index 8f29ecde1f5..d22d5eadee0 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/CommonCmdletFunctions.cs +++ b/ocaml/sdk-gen/powershell/autogen/src/CommonCmdletFunctions.cs @@ -39,14 +39,11 @@ namespace Citrix.XenServer { - class CommonCmdletFunctions + internal class CommonCmdletFunctions { private const string SessionsVariable = "global:Citrix.XenServer.Sessions"; - private const string DefaultSessionVariable = "global:XenServer_Default_Session"; - private const string KnownServerCertificatesFilePathVariable = "global:KnownServerCertificatesFilePath"; - static CommonCmdletFunctions() { Session.UserAgent = string.Format("XenServerPSModule/{0}", Assembly.GetExecutingAssembly().GetName().Version); @@ -78,72 +75,12 @@ internal static void SetDefaultXenSession(PSCmdlet cmdlet, Session session) cmdlet.SessionState.PSVariable.Set(DefaultSessionVariable, session); } - internal static string GetKnownServerCertificatesFilePathVariable(PSCmdlet cmdlet) - { - var knownCertificatesFilePathObject = cmdlet.SessionState.PSVariable.GetValue(KnownServerCertificatesFilePathVariable); - if (knownCertificatesFilePathObject is PSObject psObject) - return psObject.BaseObject as string; - return knownCertificatesFilePathObject?.ToString() ?? string.Empty; - } - internal static string GetUrl(string hostname, int port) { - return string.Format("{0}://{1}:{2}", port == 80 ? "http" : "https", hostname, port); - } - - public static Dictionary LoadCertificates(PSCmdlet cmdlet) - { - Dictionary certificates = new Dictionary(); - var knownServerCertificatesFilePath = GetKnownServerCertificatesFilePathVariable(cmdlet); - - if (File.Exists(knownServerCertificatesFilePath)) - { - XmlDocument doc = new XmlDocument(); - doc.Load(knownServerCertificatesFilePath); - - foreach (XmlNode node in doc.GetElementsByTagName("certificate")) - { - XmlAttribute hostAtt = node.Attributes?["hostname"]; - XmlAttribute fngprtAtt = node.Attributes?["fingerprint"]; - - if (hostAtt != null && fngprtAtt != null) - certificates[hostAtt.Value] = fngprtAtt.Value; - } - } - - return certificates; - } - - public static void SaveCertificates(PSCmdlet cmdlet, Dictionary certificates) - { - var knownServerCertificatesFilePath = GetKnownServerCertificatesFilePathVariable(cmdlet); - string dirName = Path.GetDirectoryName(knownServerCertificatesFilePath); - - if (!Directory.Exists(dirName)) - Directory.CreateDirectory(dirName); - - XmlDocument doc = new XmlDocument(); - XmlDeclaration decl = doc.CreateXmlDeclaration("1.0", "utf-8", null); - doc.AppendChild(decl); - XmlNode node = doc.CreateElement("certificates"); - - foreach (KeyValuePair cert in certificates) - { - XmlNode certNode = doc.CreateElement("certificate"); - XmlAttribute hostname = doc.CreateAttribute("hostname"); - XmlAttribute fingerprint = doc.CreateAttribute("fingerprint"); - hostname.Value = cert.Key; - fingerprint.Value = cert.Value; - certNode.Attributes?.Append(hostname); - certNode.Attributes?.Append(fingerprint); - node.AppendChild(certNode); - } - - doc.AppendChild(node); - doc.Save(knownServerCertificatesFilePath); + return $"{(port == 80 ? "http" : "https")}://{hostname}:{port}"; } - public static string FingerprintPrettyString(string fingerprint) + internal static string FingerprintPrettyString(string fingerprint) { List pairs = new List(); while (fingerprint.Length > 1) @@ -157,7 +94,7 @@ public static string FingerprintPrettyString(string fingerprint) return string.Join(":", pairs.ToArray()); } - public static Dictionary ConvertHashTableToDictionary(Hashtable tbl) + internal static Dictionary ConvertHashTableToDictionary(Hashtable tbl) { if (tbl == null) return null; @@ -169,7 +106,7 @@ public static Dictionary ConvertHashTableToDictionary(Hashtable tbl) return dict; } - public static Hashtable ConvertDictionaryToHashtable(Dictionary dict) + internal static Hashtable ConvertDictionaryToHashtable(Dictionary dict) { if (dict == null) return null; diff --git a/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs b/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs index f300801f5ef..a1dc4ecf964 100644 --- a/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs +++ b/ocaml/sdk-gen/powershell/autogen/src/Connect-XenServer.cs @@ -29,6 +29,7 @@ using System; using System.Collections.Generic; +using System.IO; using System.Management.Automation; using System.Net; using System.Net.Security; @@ -36,6 +37,7 @@ using System.Security; using System.Security.Cryptography; using System.Security.Cryptography.X509Certificates; +using System.Xml; using XenAPI; namespace Citrix.XenServer.Commands @@ -43,6 +45,8 @@ namespace Citrix.XenServer.Commands [Cmdlet("Connect", "XenServer")] public class ConnectXenServerCommand : PSCmdlet { + private const string CertificatesPathVariable = "global:KnownServerCertificatesFilePath"; + private readonly object _certificateValidationLock = new object(); public ConnectXenServerCommand() @@ -214,7 +218,10 @@ protected override void ProcessRecord() { if (ShouldContinue(ex.Message, ex.Caption)) { - AddCertificate(ex.Hostname, ex.Fingerprint); + var certPath = GetCertificatesPath(); + var certificates = LoadCertificates(certPath); + certificates[ex.Hostname] = ex.Fingerprint; + SaveCertificates(certPath, certificates); i--; continue; } @@ -254,13 +261,6 @@ protected override void ProcessRecord() WriteObject(newSessions.Values, true); } - private void AddCertificate(string hostname, string fingerprint) - { - var certificates = CommonCmdletFunctions.LoadCertificates(this); - certificates[hostname] = fingerprint; - CommonCmdletFunctions.SaveCertificates(this, certificates); - } - private bool ValidateServerCertificate(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors sslPolicyErrors) { if (sslPolicyErrors == SslPolicyErrors.None) @@ -277,11 +277,11 @@ private bool ValidateServerCertificate(object sender, X509Certificate certificat bool trusted = VerifyInAllStores(new X509Certificate2(certificate)); - var certificates = CommonCmdletFunctions.LoadCertificates(this); + var certPath = GetCertificatesPath(); + var certificates = LoadCertificates(certPath); - if (certificates.ContainsKey(hostname)) + if (certificates.TryGetValue(hostname, out var fingerprintOld)) { - string fingerprintOld = certificates[hostname]; if (fingerprintOld == fingerprint) return true; @@ -295,7 +295,7 @@ private bool ValidateServerCertificate(object sender, X509Certificate certificat } certificates[hostname] = fingerprint; - CommonCmdletFunctions.SaveCertificates(this, certificates); + SaveCertificates(certPath, certificates); return true; } } @@ -312,6 +312,65 @@ private bool VerifyInAllStores(X509Certificate2 certificate2) return false; } } + + private string GetCertificatesPath() + { + var certPathObject = SessionState.PSVariable.GetValue(CertificatesPathVariable); + + return certPathObject is PSObject psObject + ? psObject.BaseObject as string + : certPathObject?.ToString() ?? string.Empty; + } + + private Dictionary LoadCertificates(string certPath) + { + var certificates = new Dictionary(); + + if (File.Exists(certPath)) + { + var doc = new XmlDocument(); + doc.Load(certPath); + + foreach (XmlNode node in doc.GetElementsByTagName("certificate")) + { + var hostAtt = node.Attributes?["hostname"]; + var fngprtAtt = node.Attributes?["fingerprint"]; + + if (hostAtt != null && fngprtAtt != null) + certificates[hostAtt.Value] = fngprtAtt.Value; + } + } + + return certificates; + } + + private void SaveCertificates(string certPath, Dictionary certificates) + { + string dirName = Path.GetDirectoryName(certPath); + + if (!Directory.Exists(dirName)) + Directory.CreateDirectory(dirName); + + XmlDocument doc = new XmlDocument(); + XmlDeclaration decl = doc.CreateXmlDeclaration("1.0", "utf-8", null); + doc.AppendChild(decl); + XmlNode node = doc.CreateElement("certificates"); + + foreach (KeyValuePair cert in certificates) + { + XmlNode certNode = doc.CreateElement("certificate"); + XmlAttribute hostname = doc.CreateAttribute("hostname"); + XmlAttribute fingerprint = doc.CreateAttribute("fingerprint"); + hostname.Value = cert.Key; + fingerprint.Value = cert.Value; + certNode.Attributes?.Append(hostname); + certNode.Attributes?.Append(fingerprint); + node.AppendChild(certNode); + } + + doc.AppendChild(node); + doc.Save(certPath); + } } internal abstract class CertificateValidationException : Exception From fa1bf903f260ed442620e13ebcdb6a03d2c1e6d2 Mon Sep 17 00:00:00 2001 From: Konstantina Chremmou Date: Fri, 20 Dec 2024 20:35:07 +0000 Subject: [PATCH 45/50] CP-53003: Use JsonRpc v1.0 by default and switch to v2.0 once the API version is known. Signed-off-by: Konstantina Chremmou --- ocaml/sdk-gen/csharp/autogen/src/Session.cs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ocaml/sdk-gen/csharp/autogen/src/Session.cs b/ocaml/sdk-gen/csharp/autogen/src/Session.cs index e6b4b87eb47..82db84a8210 100644 --- a/ocaml/sdk-gen/csharp/autogen/src/Session.cs +++ b/ocaml/sdk-gen/csharp/autogen/src/Session.cs @@ -65,7 +65,6 @@ public Session(JsonRpcClient client) client.KeepAlive = true; client.UserAgent = UserAgent; client.WebProxy = Proxy; - client.JsonRpcVersion = JsonRpcVersion.v2; client.AllowAutoRedirect = true; JsonRpcClient = client; } @@ -145,6 +144,14 @@ private void SetAPIVersion() Host host = Host.get_record(this, pool.master); APIVersion = Helper.GetAPIVersion(host.API_version_major, host.API_version_minor); } + + if (JsonRpcClient != null) + { + if (APIVersion == API_Version.API_2_6) + JsonRpcClient.JsonRpcVersion = JsonRpcVersion.v1; + else if (APIVersion >= API_Version.API_2_8) + JsonRpcClient.JsonRpcVersion = JsonRpcVersion.v2; + } } private void CopyADFromSession(Session session) From e68cda76071000e5513521cd307bebb0e3e8c19c Mon Sep 17 00:00:00 2001 From: Frediano Ziglio Date: Tue, 7 Jan 2025 10:35:13 +0000 Subject: [PATCH 46/50] Use Mtime.Span.to_float_ns instead of Mtime.Span.to_uint64_ns+Int64.to_float Minor code reduction. Signed-off-by: Frediano Ziglio --- ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml index a544ed79bbb..200b9925786 100644 --- a/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml +++ b/ocaml/libs/xapi-stdext/lib/xapi-stdext-threads/scheduler.ml @@ -36,8 +36,7 @@ let lock = Mutex.create () module Clock = struct let span s = Mtime.Span.of_uint64_ns (Int64.of_float (s *. 1e9)) - let span_to_s span = - Mtime.Span.to_uint64_ns span |> Int64.to_float |> fun ns -> ns /. 1e9 + let span_to_s span = Mtime.Span.to_float_ns span |> fun ns -> ns /. 1e9 let add_span clock secs = (* return mix or max available value if the add overflows *) From 38a8f9883c9b8d4c3d9abb68d3d734a24ce01ae3 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Tue, 7 Jan 2025 15:01:29 +0000 Subject: [PATCH 47/50] CA-404013: do not relock the mutex when backing up rrds The point of using try_lock is to not get the thread suspended while trying to hold the mutex. Releasing it and calling `lock` may suspend the thread and defeats the purpose of using try_lock in the first place. Reorganise the sequence to read and copy all the rrds first while under the locked mutex, release it, and then archive the copies. Signed-off-by: Pau Ruiz Safont --- ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml | 52 +++++++++++--------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml index d9d41114e00..fdae263d867 100644 --- a/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml +++ b/ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml @@ -99,6 +99,7 @@ let archive_rrd vm_uuid (remote_address : string option) : unit = master host, exclusively. Any attempt to send the rrds to pools outside the host will fail. *) let backup_rrds (remote_address : string option) () : unit = + let __FUN = __FUNCTION__ in let transport = Option.map (fun address -> @@ -119,50 +120,39 @@ let backup_rrds (remote_address : string option) () : unit = | Some address -> Printf.sprintf "host %s" address in - info "%s: trying to back up RRDs to %s" __FUNCTION__ destination ; + info "%s: trying to back up RRDs to %s" __FUN destination ; let total_cycles = 5 in let cycles_tried = ref 0 in + let host_uuid = Inventory.lookup Inventory._installation_uuid in while !cycles_tried < total_cycles do if Mutex.try_lock mutex then ( cycles_tried := total_cycles ; - let vrrds = - try Hashtbl.fold (fun k v acc -> (k, v.rrd) :: acc) vm_rrds [] - with exn -> Mutex.unlock mutex ; raise exn - in - Mutex.unlock mutex ; - List.iter - (fun (uuid, rrd) -> - debug "%s: saving RRD for VM uuid=%s" __FUNCTION__ uuid ; - let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrd) in - archive_rrd_internal ~transport ~uuid ~rrd () - ) - vrrds ; - Mutex.lock mutex ; - let srrds = - try Hashtbl.fold (fun k v acc -> (k, v.rrd) :: acc) sr_rrds [] - with exn -> Mutex.unlock mutex ; raise exn + let rrds_copy = + [ + Hashtbl.fold + (fun k v acc -> ("VM", k, Rrd.copy_rrd v.rrd) :: acc) + vm_rrds [] + ; Hashtbl.fold + (fun k v acc -> ("SR", k, Rrd.copy_rrd v.rrd) :: acc) + sr_rrds [] + ; Option.fold ~none:[] + ~some:(fun rrdi -> [("host", host_uuid, Rrd.copy_rrd rrdi.rrd)]) + !host_rrd + ] + |> List.concat in Mutex.unlock mutex ; + List.iter - (fun (uuid, rrd) -> - debug "%s: saving RRD for SR uuid=%s" __FUNCTION__ uuid ; - let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrd) in + (fun (cls, uuid, rrd) -> + debug "%s: saving RRD for %s uuid=%s" __FUN cls uuid ; archive_rrd_internal ~transport ~uuid ~rrd () ) - srrds ; - match !host_rrd with - | Some rrdi -> - debug "%s: saving RRD for host" __FUNCTION__ ; - let rrd = with_lock mutex (fun () -> Rrd.copy_rrd rrdi.rrd) in - archive_rrd_internal ~transport - ~uuid:(Inventory.lookup Inventory._installation_uuid) - ~rrd () - | None -> - () + rrds_copy ) else ( cycles_tried := 1 + !cycles_tried ; if !cycles_tried >= total_cycles then - warn "%s: Could not acquire RRD lock, skipping RRD backup" __FUNCTION__ + warn "%s: Could not acquire RRD lock, skipping RRD backup" __FUN else Thread.delay 1. ) From 77258a4f5c20a8217d9fae7ebdefab030007d143 Mon Sep 17 00:00:00 2001 From: Pau Ruiz Safont Date: Wed, 8 Jan 2025 10:04:29 +0000 Subject: [PATCH 48/50] database: do not log when a field is dropped when loading from db_xml It causes logspam and hasn't been useful in the years it's been present. Lowering it to a debug statement would still cause it to be logged, so drop the message completely instead. Signed-off-by: Pau Ruiz Safont --- ocaml/database/db_xml.ml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ocaml/database/db_xml.ml b/ocaml/database/db_xml.ml index d7c73db3d84..1795cdef3bd 100644 --- a/ocaml/database/db_xml.ml +++ b/ocaml/database/db_xml.ml @@ -161,12 +161,9 @@ module From = struct D.warn "no lifetime information about %s.%s, ignoring" tblname k ; false in - if do_not_load then ( - D.info - {|dropping column "%s.%s": it has been removed from the datamodel|} - tblname k ; + if do_not_load then row - ) else + else let column_schema = Schema.Table.find k table_schema in let value = Schema.Value.unmarshal column_schema.Schema.Column.ty From 5efa5d066837f995ef1494fb0ebae742f86698b6 Mon Sep 17 00:00:00 2001 From: Gang Ji Date: Tue, 7 Jan 2025 19:27:42 +0800 Subject: [PATCH 49/50] CA-404013: replace Thread.delay with Delay module Reporter.cancel would be blocked for a long time by backoff delay when another thread is waiting for next reading, replace Thread.delay with Delay module so that Reporter.cancel will not be blocked. Signed-off-by: Gang Ji --- ocaml/xcp-rrdd/lib/plugin/reporter.ml | 36 ++++++++++++++++++--- ocaml/xcp-rrdd/lib/plugin/reporter_local.ml | 2 +- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/ocaml/xcp-rrdd/lib/plugin/reporter.ml b/ocaml/xcp-rrdd/lib/plugin/reporter.ml index 683af78b243..b7c9c018cbd 100644 --- a/ocaml/xcp-rrdd/lib/plugin/reporter.ml +++ b/ocaml/xcp-rrdd/lib/plugin/reporter.ml @@ -12,6 +12,8 @@ * GNU Lesser General Public License for more details. *) +module Delay = Xapi_stdext_threads.Threadext.Delay + let with_lock = Xapi_stdext_threads.Threadext.Mutex.execute (* This exception is setup to be raised on sigint by Process.initialise, @@ -57,10 +59,20 @@ type state = | Cancelled | Stopped of [`New | `Cancelled | `Failed of exn] -type t = {mutable state: state; lock: Mutex.t; condition: Condition.t} +type t = { + mutable state: state + ; lock: Mutex.t + ; condition: Condition.t + ; delay: Delay.t +} let make () = - {state= Stopped `New; lock= Mutex.create (); condition= Condition.create ()} + { + state= Stopped `New + ; lock= Mutex.create () + ; condition= Condition.create () + ; delay= Delay.make () + } let choose_protocol = function | Rrd_interface.V1 -> @@ -69,13 +81,20 @@ let choose_protocol = function Rrd_protocol_v2.protocol let wait_until_next_reading (module D : Debug.DEBUG) ~neg_shift ~uid ~protocol - ~overdue_count = + ~overdue_count ~reporter = let next_reading = RRDD.Plugin.Local.register uid Rrd.Five_Seconds protocol in let wait_time = next_reading -. neg_shift in let wait_time = if wait_time < 0.1 then wait_time +. 5. else wait_time in (* overdue count - 0 if there is no overdue; +1 if there is overdue *) if wait_time > 0. then ( - Thread.delay wait_time ; 0 + ( match reporter with + | Some reporter -> + let (_ : bool) = Delay.wait reporter.delay wait_time in + () + | None -> + Thread.delay wait_time + ) ; + 0 ) else ( if overdue_count > 1 then ( (* if register returns negative more than once in a succession, @@ -84,7 +103,12 @@ let wait_until_next_reading (module D : Debug.DEBUG) ~neg_shift ~uid ~protocol D.debug "rrdd says next reading is overdue, seems like rrdd is busy;\n\ \t\t\t\tBacking off for %.1f seconds" backoff_time ; - Thread.delay backoff_time + match reporter with + | Some reporter -> + let (_ : bool) = Delay.wait reporter.delay backoff_time in + () + | None -> + Thread.delay backoff_time ) else D.debug "rrdd says next reading is overdue by %.1f seconds; not sleeping" (-.wait_time) ; @@ -147,8 +171,10 @@ let cancel ~reporter = match reporter.state with | Running -> reporter.state <- Cancelled ; + Delay.signal reporter.delay ; Condition.wait reporter.condition reporter.lock | Cancelled -> + Delay.signal reporter.delay ; Condition.wait reporter.condition reporter.lock | Stopped _ -> () diff --git a/ocaml/xcp-rrdd/lib/plugin/reporter_local.ml b/ocaml/xcp-rrdd/lib/plugin/reporter_local.ml index 42955b5ae1f..fec90d1dc8e 100644 --- a/ocaml/xcp-rrdd/lib/plugin/reporter_local.ml +++ b/ocaml/xcp-rrdd/lib/plugin/reporter_local.ml @@ -28,7 +28,7 @@ let start_local (module D : Debug.DEBUG) ~reporter ~uid ~neg_shift ~page_count overdue_count := wait_until_next_reading (module D) - ~neg_shift ~uid ~protocol ~overdue_count:!overdue_count ; + ~neg_shift ~uid ~protocol ~overdue_count:!overdue_count ~reporter ; if page_count > 0 then let payload = Rrd_protocol.{timestamp= Utils.now (); datasources= dss_f ()} From 32b154edf76ccfe543c5b3c075bb825979ce7a63 Mon Sep 17 00:00:00 2001 From: Andrii Sultanov Date: Tue, 7 Jan 2025 14:18:40 +0000 Subject: [PATCH 50/50] rrd2csv: Accept a trailing comma in metrics names rrd2csv itself prints out a list of comma-separated metrics names, but could't accept this list as command line arguments: ``` $ rrd2csv .....memory_total_kib, memory_free_kib, .... $ rrd2csv memory_total_kib, memory_free_kib WARNING: Requested metric AVERAGE:host:05e817e2-3a65-484d-b0da-a7163f9ffc12:memory_total_kib, is disabled or non-existant timestamp, AVERAGE:host:05e817e2-3a65-484d-b0da-a7163f9ffc12:memory_free_kib 2025-01-07T14:06:45Z, 30042000 ``` Now this works just fine: ``` $ rrd2csv memory_total_kib, memory_free_kib timestamp, AVERAGE:host:92bc3b1e-e0a3-49ba-8994-fc305ff882b7:memory_total_kib, AVERAGE:host:92bc3b1e-e0a3-49ba-8994-fc305ff882b7:memory_free_kib 2025-01-07T15:04:50Z, 33350000, 30023000 ``` Signed-off-by: Andrii Sultanov --- ocaml/rrd2csv/src/rrd2csv.ml | 46 ++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/ocaml/rrd2csv/src/rrd2csv.ml b/ocaml/rrd2csv/src/rrd2csv.ml index 0448c4e067f..b22c430f656 100644 --- a/ocaml/rrd2csv/src/rrd2csv.ml +++ b/ocaml/rrd2csv/src/rrd2csv.ml @@ -149,27 +149,33 @@ module Ds_selector = struct let of_string str = let open Rrd in - let splitted = Xstringext.String.split ':' str in + let splitted = Xstringext.String.split ',' str in match splitted with - | [cf; owner; uuid; metric] -> - { - cf= (try Some (cf_type_of_string cf) with _ -> None) - ; owner= - ( match owner with - | "vm" -> - Some (VM uuid) - | "sr" -> - Some (SR uuid) - | "host" -> - Some Host - | _ -> - None - ) - ; uuid - ; metric - } - | [metric] -> - {empty with metric} + | without_trailing_comma :: _ -> ( + let splitted = Xstringext.String.split ':' without_trailing_comma in + match splitted with + | [cf; owner; uuid; metric] -> + { + cf= (try Some (cf_type_of_string cf) with _ -> None) + ; owner= + ( match owner with + | "vm" -> + Some (VM uuid) + | "sr" -> + Some (SR uuid) + | "host" -> + Some Host + | _ -> + None + ) + ; uuid + ; metric + } + | [metric] -> + {empty with metric} + | _ -> + failwith "ds_selector_of_string" + ) | _ -> failwith "ds_selector_of_string"