From 34b00fd272b5c454137b15a36a17e322f7bbda90 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 21 Jan 2020 12:45:00 -0800 Subject: [PATCH 001/432] Prepare for post-2.13.0 (2.13.90). Acked-by: Gurucharan Shetty Signed-off-by: Ben Pfaff --- NEWS | 4 ++++ configure.ac | 2 +- debian/changelog | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 89b53bfae..100cb22d5 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Post-v2.13.0 +--------------------- + + v2.13.0 - xx xxx xxxx --------------------- - OVN: diff --git a/configure.ac b/configure.ac index 92b52f671..1877aae56 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.13.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.13.90, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/debian/changelog b/debian/changelog index c6db87d54..4ec058d99 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +openvswitch (2.13.90-1) unstable; urgency=low + + * New upstream version + + -- Open vSwitch team Tue, 21 Jan 2020 12:44:30 -0700 + openvswitch (2.13.0-1) unstable; urgency=low * New upstream version -- GitLab From 56c8027b5fd830a810f320a6ded6e8f8289e4fe6 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Thu, 19 Dec 2019 14:58:43 +0000 Subject: [PATCH 002/432] tc: handle packet mark of zero Openstack may set an skb mark of 0 in tunnel rules. This is considered to be an unused/unset value. However, it prevents the rule from being offloaded. Check if the key value of the skb mark is 0 when it is in use (mask is set to all ones). If it is then ignore the field and continue with TC offload. Only the exact-match case is covered by this patch as it addresses the Openstack use-case and seems most robust against feature evolution: f.e. in future there may exist hardware offload scenarios where an operation, such as a BPF offload, sets the SKB mark before proceeding tho the in-HW OVS. datapath. Signed-off-by: John Hurley Co-Authored-by: Simon Horman Signed-off-by: Simon Horman Acked-by: Aaron Conole --- lib/netdev-offload-tc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 723ec376d..550e440b3 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1620,6 +1620,11 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, mask->ct_label = OVS_U128_ZERO; } + /* ignore exact match on skb_mark of 0. */ + if (mask->pkt_mark == UINT32_MAX && !key->pkt_mark) { + mask->pkt_mark = 0; + } + err = test_key_and_mask(match); if (err) { return err; -- GitLab From dbbd0cf64492426938c4ad3177cabb444b1e9163 Mon Sep 17 00:00:00 2001 From: Damijan Skvarc Date: Wed, 22 Jan 2020 15:06:43 +0100 Subject: [PATCH 003/432] dpif: Fix memory leak while dumping dpif flows. Leak was detected by running test: "ofproto-dpif - balance-tcp bonding" Fixes: 0e8f5c6a38d0 ("dpif-netdev: Modified ovs-appctl dpctl/dump-flows command") Signed-off-by: Damijan Skvarc Signed-off-by: Ilya Maximets --- ofproto/ofproto-dpif.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d3cb39207..ded170588 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -6282,6 +6282,9 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) { struct flow flow; + /* No need for extra info. */ + free(f.attrs.dp_extra_info); + if ((odp_flow_key_to_flow(f.key, f.key_len, &flow, NULL) == ODP_FIT_ERROR) || (xlate_lookup_ofproto(ofproto->backer, &flow, NULL, NULL) -- GitLab From 929dc96d0bca21fe3dc134cf45c3e0718811536a Mon Sep 17 00:00:00 2001 From: Ning Wu Date: Tue, 21 Jan 2020 23:46:58 -0800 Subject: [PATCH 004/432] lib/stream-windows.c: Grant Access Privilege of Named Pipe to Creator Current implementation of ovs on windows only allows LocalSystem and Administrators to access the named pipe created with API of ovs. Thus any service that needs to invoke the API to create named pipe has to run as System account to interactive with ovs. It causes the system more vulnerable if one of those services was break into. The patch adds the creator owner account to allowed ACLs. Signed-off-by: Ning Wu Acked-by: Alin Gabriel Serdean Acked-by: Anand Kumar Signed-off-by: Alin Gabriel Serdean --- Documentation/ref/ovsdb.7.rst | 3 ++- lib/stream-windows.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Documentation/ref/ovsdb.7.rst b/Documentation/ref/ovsdb.7.rst index b1f3f5d49..da4dbedd2 100644 --- a/Documentation/ref/ovsdb.7.rst +++ b/Documentation/ref/ovsdb.7.rst @@ -422,7 +422,8 @@ punix: named . On Windows, listens on a local named pipe, creating a named pipe - to mimic the behavior of a Unix domain socket. + to mimic the behavior of a Unix domain socket. The ACLs of the named + pipe include LocalSystem, Administrators, and Creator Owner. All IP-based connection methods accept IPv4 and IPv6 addresses. To specify an IPv6 address, wrap it in square brackets, e.g. ``ssl:[::1]:6640``. Passive diff --git a/lib/stream-windows.c b/lib/stream-windows.c index 34bc610b6..5c4c55e5d 100644 --- a/lib/stream-windows.c +++ b/lib/stream-windows.c @@ -41,7 +41,7 @@ static void maybe_unlink_and_free(char *path); #define LOCAL_PREFIX "\\\\.\\pipe\\" /* Size of the allowed PSIDs for securing Named Pipe. */ -#define ALLOWED_PSIDS_SIZE 2 +#define ALLOWED_PSIDS_SIZE 3 /* This function has the purpose to remove all the slashes received in s. */ static char * @@ -412,6 +412,9 @@ create_pnpipe(char *name) PACL acl = NULL; PSECURITY_DESCRIPTOR psd = NULL; HANDLE npipe; + HANDLE hToken = NULL; + DWORD dwBufSize = 0; + PTOKEN_USER pTokenUsr = NULL; /* Disable access over network. */ if (!AllocateAndInitializeSid(&sia, 1, SECURITY_NETWORK_RID, @@ -438,6 +441,32 @@ create_pnpipe(char *name) goto handle_error; } + /* Open the access token of calling process */ + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) { + VLOG_ERR_RL(&rl, "Error opening access token of calling process."); + goto handle_error; + } + + /* get the buffer size buffer needed for SID */ + GetTokenInformation(hToken, TokenUser, NULL, 0, &dwBufSize); + + pTokenUsr = xmalloc(dwBufSize); + memset(pTokenUsr, 0, dwBufSize); + + /* Retrieve the token information in a TOKEN_USER structure. */ + if (!GetTokenInformation(hToken, TokenUser, pTokenUsr, dwBufSize, + &dwBufSize)) { + VLOG_ERR_RL(&rl, "Error retrieving token information."); + goto handle_error; + } + CloseHandle(hToken); + + if (!IsValidSid(pTokenUsr->User.Sid)) { + VLOG_ERR_RL(&rl, "Invalid SID."); + goto handle_error; + } + allowedPsid[2] = pTokenUsr->User.Sid; + for (int i = 0; i < ALLOWED_PSIDS_SIZE; i++) { aclSize += sizeof(ACCESS_ALLOWED_ACE) + GetLengthSid(allowedPsid[i]) - @@ -490,11 +519,13 @@ create_pnpipe(char *name) npipe = CreateNamedPipe(name, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED, PIPE_TYPE_MESSAGE | PIPE_READMODE_BYTE | PIPE_WAIT, 64, BUFSIZE, BUFSIZE, 0, &sa); + free(pTokenUsr); free(acl); free(psd); return npipe; handle_error: + free(pTokenUsr); free(acl); free(psd); return INVALID_HANDLE_VALUE; -- GitLab From 586cd3101e7fda54d14fb5bf12d847f35d968627 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 9 Jan 2020 12:49:43 -0800 Subject: [PATCH 005/432] ofproto-dpif-upcall: Get rid of udpif_synchronize(). RCU provides the semantics we want from udpif_synchronize() and it should be much more lightweight than killing and restarting all the upcall threads. It looks like udpif_synchronize() was written before the OVS tree had RCU support, which is probably why we didn't use it here from the beginning. So we can just change udpif_synchronize() to a single ovsrcu_synchronize() call. However, udpif_synchronize() only has a single caller, which calls ovsrcu_synchronize() anyway just beforehand, via xlate_txn_commit(). So we can get rid of udpif_synchronize() entirely, which this patch does. As a side effect, this eliminates one reason why terminating OVS cleanly clears the datapath flow table. An upcoming patch will eliminate other reasons. Acked-by: Numan Siddique Signed-off-by: Ben Pfaff --- ofproto/ofproto-dpif-upcall.c | 17 ----------------- ofproto/ofproto-dpif-upcall.h | 1 - ofproto/ofproto-dpif-xlate.c | 14 +++++++++----- ofproto/ofproto-dpif.c | 4 ---- 4 files changed, 9 insertions(+), 27 deletions(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 3aef9a6c3..cff6d4bf3 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -644,23 +644,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers_, } } -/* Waits for all ongoing upcall translations to complete. This ensures that - * there are no transient references to any removed ofprotos (or other - * objects). In particular, this should be called after an ofproto is removed - * (e.g. via xlate_remove_ofproto()) but before it is destroyed. */ -void -udpif_synchronize(struct udpif *udpif) -{ - /* This is stronger than necessary. It would be sufficient to ensure - * (somehow) that each handler and revalidator thread had passed through - * its main loop once. */ - size_t n_handlers_ = udpif->n_handlers; - size_t n_revalidators_ = udpif->n_revalidators; - - udpif_stop_threads(udpif); - udpif_start_threads(udpif, n_handlers_, n_revalidators_); -} - /* Notifies 'udpif' that something changed which may render previous * xlate_actions() results invalid. */ void diff --git a/ofproto/ofproto-dpif-upcall.h b/ofproto/ofproto-dpif-upcall.h index cef1d3419..693107ae5 100644 --- a/ofproto/ofproto-dpif-upcall.h +++ b/ofproto/ofproto-dpif-upcall.h @@ -33,7 +33,6 @@ struct udpif *udpif_create(struct dpif_backer *, struct dpif *); void udpif_run(struct udpif *udpif); void udpif_set_threads(struct udpif *, size_t n_handlers, size_t n_revalidators); -void udpif_synchronize(struct udpif *); void udpif_destroy(struct udpif *); void udpif_revalidate(struct udpif *); void udpif_get_memory_usage(struct udpif *, struct simap *usage); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 4407f9c97..0b45ecf3d 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1171,11 +1171,15 @@ xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle, * * A sample workflow: * - * xlate_txn_start(); - * ... - * edit_xlate_configuration(); - * ... - * xlate_txn_commit(); */ + * xlate_txn_start(); + * ... + * edit_xlate_configuration(); + * ... + * xlate_txn_commit(); + * + * The ovsrcu_synchronize() call here also ensures that the upcall threads + * retain no references to anything in the previous configuration. + */ void xlate_txn_commit(void) { diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index ded170588..67a4ad46f 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1751,10 +1751,6 @@ destruct(struct ofproto *ofproto_, bool del) xlate_remove_ofproto(ofproto); xlate_txn_commit(); - /* Ensure that the upcall processing threads have no remaining references - * to the ofproto or anything in it. */ - udpif_synchronize(ofproto->backer->udpif); - hmap_remove(&all_ofproto_dpifs_by_name, &ofproto->all_ofproto_dpifs_by_name_node); hmap_remove(&all_ofproto_dpifs_by_uuid, -- GitLab From 79eadafeb1b47a3871cb792aa972f6e4d89d1a0b Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 9 Jan 2020 12:49:44 -0800 Subject: [PATCH 006/432] ofproto: Do not delete datapath flows on exit by default. Commit e96a5c24e853 ("upcall: Remove datapath flows when setting n-threads.") caused OVS to delete datapath flows when it exits through any graceful means. This is not necessarily desirable, especially when OVS is being stopped as part of an upgrade. This commit changes OVS so that it only removes datapath flows when requested, via "ovs-appctl exit --cleanup". Acked-by: Numan Siddique Tested-by: Numan Siddique Signed-off-by: Ben Pfaff --- NEWS | 1 + ofproto/ofproto-dpif-upcall.c | 26 ++++++++++++++++---------- ofproto/ofproto.c | 8 ++++---- vswitchd/ovs-vswitchd.8.in | 13 +++++++------ 4 files changed, 28 insertions(+), 20 deletions(-) diff --git a/NEWS b/NEWS index 100cb22d5..c24ec536c 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,6 @@ Post-v2.13.0 --------------------- + - ovs-vswitchd no longer deletes datapath flows on exit by default. v2.13.0 - xx xxx xxxx diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index cff6d4bf3..483cebb52 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -332,7 +332,7 @@ static size_t recv_upcalls(struct handler *); static int process_upcall(struct udpif *, struct upcall *, struct ofpbuf *odp_actions, struct flow_wildcards *); static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls); -static void udpif_stop_threads(struct udpif *); +static void udpif_stop_threads(struct udpif *, bool delete_flows); static void udpif_start_threads(struct udpif *, size_t n_handlers, size_t n_revalidators); static void udpif_pause_revalidators(struct udpif *); @@ -483,7 +483,7 @@ udpif_run(struct udpif *udpif) void udpif_destroy(struct udpif *udpif) { - udpif_stop_threads(udpif); + udpif_stop_threads(udpif, false); dpif_register_dp_purge_cb(udpif->dpif, NULL, udpif); dpif_register_upcall_cb(udpif->dpif, NULL, udpif); @@ -504,9 +504,15 @@ udpif_destroy(struct udpif *udpif) free(udpif); } -/* Stops the handler and revalidator threads. */ +/* Stops the handler and revalidator threads. + * + * If 'delete_flows' is true, we delete ukeys and delete all flows from the + * datapath. Otherwise, we end up double-counting stats for flows that remain + * in the datapath. If 'delete_flows' is false, we skip this step. This is + * appropriate if OVS is about to exit anyway and it is desirable to let + * existing network connections continue being forwarded afterward. */ static void -udpif_stop_threads(struct udpif *udpif) +udpif_stop_threads(struct udpif *udpif, bool delete_flows) { if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) { size_t i; @@ -526,10 +532,10 @@ udpif_stop_threads(struct udpif *udpif) dpif_disable_upcall(udpif->dpif); ovsrcu_quiesce_end(); - /* Delete ukeys, and delete all flows from the datapath to prevent - * double-counting stats. */ - for (i = 0; i < udpif->n_revalidators; i++) { - revalidator_purge(&udpif->revalidators[i]); + if (delete_flows) { + for (i = 0; i < udpif->n_revalidators; i++) { + revalidator_purge(&udpif->revalidators[i]); + } } latch_poll(&udpif->exit_latch); @@ -627,7 +633,7 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers_, if (udpif->n_handlers != n_handlers_ || udpif->n_revalidators != n_revalidators_) { - udpif_stop_threads(udpif); + udpif_stop_threads(udpif, true); } if (!udpif->handlers && !udpif->revalidators) { @@ -681,7 +687,7 @@ udpif_flush(struct udpif *udpif) size_t n_handlers_ = udpif->n_handlers; size_t n_revalidators_ = udpif->n_revalidators; - udpif_stop_threads(udpif); + udpif_stop_threads(udpif, true); dpif_flow_flush(udpif->dpif); udpif_start_threads(udpif, n_handlers_, n_revalidators_); } diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 08830d837..5d69a4332 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1601,13 +1601,13 @@ ofproto_rule_delete(struct ofproto *ofproto, struct rule *rule) } static void -ofproto_flush__(struct ofproto *ofproto) +ofproto_flush__(struct ofproto *ofproto, bool del) OVS_EXCLUDED(ofproto_mutex) { struct oftable *table; /* This will flush all datapath flows. */ - if (ofproto->ofproto_class->flush) { + if (del && ofproto->ofproto_class->flush) { ofproto->ofproto_class->flush(ofproto); } @@ -1710,7 +1710,7 @@ ofproto_destroy(struct ofproto *p, bool del) return; } - ofproto_flush__(p); + ofproto_flush__(p, del); HMAP_FOR_EACH_SAFE (ofport, next_ofport, hmap_node, &p->ports) { ofport_destroy(ofport, del); } @@ -2288,7 +2288,7 @@ void ofproto_flush_flows(struct ofproto *ofproto) { COVERAGE_INC(ofproto_flush); - ofproto_flush__(ofproto); + ofproto_flush__(ofproto, false); connmgr_flushed(ofproto->connmgr); } diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index a23477176..ac66ed7bb 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -107,12 +107,13 @@ how to configure Open vSwitch. .SS "GENERAL COMMANDS" .IP "\fBexit\fR \fI--cleanup\fR" Causes \fBovs\-vswitchd\fR to gracefully terminate. If \fI--cleanup\fR -is specified, release datapath resources configured by \fBovs\-vswitchd\fR. -Otherwise, datapath flows and other resources remains undeleted. -Resources of datapaths that are integrated into \fBovs\-vswitchd\fR (e.g. -the \fBnetdev\fR datapath type) are always released regardless of -\fI--cleanup\fR except for ports with \fBinternal\fR type. Use \fI--cleanup\fR -to release \fBinternal\fR ports too. +is specified, deletes flows from datpaths and releases other datapath +resources configured by \fBovs\-vswitchd\fR. Otherwise, datapath +flows and other resources remains undeleted. Resources of datapaths +that are integrated into \fBovs\-vswitchd\fR (e.g. the \fBnetdev\fR +datapath type) are always released regardless of \fI--cleanup\fR +except for ports with \fBinternal\fR type. Use \fI--cleanup\fR to +release \fBinternal\fR ports too. . .IP "\fBqos/show-types\fR \fIinterface\fR" Queries the interface for a list of Quality of Service types that are -- GitLab From 342b8904ab4f29b2a4a429e032f30ddad420a29e Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 23 Jan 2020 19:10:05 +0100 Subject: [PATCH 007/432] dpif: Fix dp_extra_info leak by reworking the allocation scheme. dpctl module leaks the 'dp_extra_info' in case the dumped flow doesn't fit the dump filter while executing dpctl/dump-flows and also while executing dpctl/get-flow. This is already a 3rd attempt to fix all the leaks and incorrect usage of this string that definitely indicates poor initial design of the feature. Flow dump/get documentation clearly states that the caller does not own the data provided in dpif_flow. Datapath still owns all the data and promises to not free/modify it until the next quiescent period, however we're requesting the caller to free 'dp_extra_info' and this obviously breaks the rules. This patch fixes the issue by by storing 'dp_extra_info' within 'struct dp_netdev_flow' making datapath to own it. 'dp_netdev_flow' is RCU-protected, so it will be valid until the next quiescent period. Fixes: 0e8f5c6a38d0 ("dpif-netdev: Modified ovs-appctl dpctl/dump-flows command") Tested-by: Emma Finn Acked-by: Emma Finn Signed-off-by: Ilya Maximets --- lib/dpctl.c | 1 - lib/dpif-netdev.c | 32 +++++++++++++++++--------------- lib/dpif.c | 1 - lib/dpif.h | 9 ++++----- ofproto/ofproto-dpif-upcall.c | 3 --- ofproto/ofproto-dpif.c | 3 --- 6 files changed, 21 insertions(+), 28 deletions(-) diff --git a/lib/dpctl.c b/lib/dpctl.c index 4ebb00456..db2b1f896 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -832,7 +832,6 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow *f, struct hmap *ports, if (dpctl_p->verbosity && f->attrs.dp_extra_info) { ds_put_format(ds, ", dp-extra-info:%s", f->attrs.dp_extra_info); } - free(f->attrs.dp_extra_info); } struct dump_types { diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 3be41014e..d393aab5e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -551,6 +551,7 @@ struct dp_netdev_flow { struct packet_batch_per_flow *batch; /* Packet classification. */ + char *dp_extra_info; /* String to return in a flow dump/get. */ struct dpcls_rule cr; /* In owning dp_netdev's 'cls'. */ /* 'cr' must be the last member. */ }; @@ -2096,6 +2097,7 @@ static void dp_netdev_flow_free(struct dp_netdev_flow *flow) { dp_netdev_actions_free(dp_netdev_flow_get_actions(flow)); + free(flow->dp_extra_info); free(flow); } @@ -3158,21 +3160,7 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev *dp, flow->pmd_id = netdev_flow->pmd_id; get_dpif_flow_status(dp, netdev_flow, &flow->stats, &flow->attrs); - - struct ds extra_info = DS_EMPTY_INITIALIZER; - size_t unit; - - ds_put_cstr(&extra_info, "miniflow_bits("); - FLOWMAP_FOR_EACH_UNIT (unit) { - if (unit) { - ds_put_char(&extra_info, ','); - } - ds_put_format(&extra_info, "%d", - count_1bits(netdev_flow->cr.mask->mf.map.bits[unit])); - } - ds_put_char(&extra_info, ')'); - flow->attrs.dp_extra_info = ds_steal_cstr(&extra_info); - ds_destroy(&extra_info); + flow->attrs.dp_extra_info = netdev_flow->dp_extra_info; } static int @@ -3312,9 +3300,11 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, const struct nlattr *actions, size_t actions_len) OVS_REQUIRES(pmd->flow_mutex) { + struct ds extra_info = DS_EMPTY_INITIALIZER; struct dp_netdev_flow *flow; struct netdev_flow_key mask; struct dpcls *cls; + size_t unit; /* Make sure in_port is exact matched before we read it. */ ovs_assert(match->wc.masks.in_port.odp_port == ODPP_NONE); @@ -3355,6 +3345,18 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, cls = dp_netdev_pmd_find_dpcls(pmd, in_port); dpcls_insert(cls, &flow->cr, &mask); + ds_put_cstr(&extra_info, "miniflow_bits("); + FLOWMAP_FOR_EACH_UNIT (unit) { + if (unit) { + ds_put_char(&extra_info, ','); + } + ds_put_format(&extra_info, "%d", + count_1bits(flow->cr.mask->mf.map.bits[unit])); + } + ds_put_char(&extra_info, ')'); + flow->dp_extra_info = ds_steal_cstr(&extra_info); + ds_destroy(&extra_info); + cmap_insert(&pmd->flow_table, CONST_CAST(struct cmap_node *, &flow->node), dp_netdev_flow_hash(&flow->ufid)); diff --git a/lib/dpif.c b/lib/dpif.c index 6cbcdfb2e..9d9c716c1 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -966,7 +966,6 @@ dpif_probe_feature(struct dpif *dpif, const char *name, && ovs_u128_equals(*ufid, flow.ufid)))) { enable_feature = true; } - free(flow.attrs.dp_extra_info); error = dpif_flow_del(dpif, key->data, key->size, ufid, NON_PMD_CORE_ID, NULL); diff --git a/lib/dpif.h b/lib/dpif.h index 286a0e2d5..4df8f7c8b 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -511,9 +511,9 @@ struct dpif_flow_detailed_stats { }; struct dpif_flow_attrs { - bool offloaded; /* True if flow is offloaded to HW. */ - const char *dp_layer; /* DP layer the flow is handled in. */ - char *dp_extra_info; /* Extra information provided by DP. */ + bool offloaded; /* True if flow is offloaded to HW. */ + const char *dp_layer; /* DP layer the flow is handled in. */ + const char *dp_extra_info; /* Extra information provided by DP. */ }; struct dpif_flow_dump_types { @@ -745,8 +745,7 @@ struct dpif_execute { * for the datapath flow corresponding to 'key'. The mask and actions may point * within '*buffer', or may point at RCU-protected data. Therefore, callers * that wish to hold these over quiescent periods must make a copy of these - * fields before quiescing. 'attrs.dp_extra_info' is a dynamically allocated - * string that should be freed if provided by the datapath. + * fields before quiescing. * * Callers should always provide 'key' to improve dpif logging in the event of * errors or unexpected behaviour. diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 483cebb52..8dfa05b71 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -2635,9 +2635,6 @@ revalidate(struct revalidator *revalidator) bool already_dumped; int error; - /* We don't need an extra information. */ - free(f->attrs.dp_extra_info); - if (ukey_acquire(udpif, f, &ukey, &error)) { if (error == EBUSY) { /* Another thread is processing this flow, so don't bother diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 67a4ad46f..0222ec82f 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -6278,9 +6278,6 @@ ofproto_unixctl_dpif_dump_flows(struct unixctl_conn *conn, while (dpif_flow_dump_next(flow_dump_thread, &f, 1)) { struct flow flow; - /* No need for extra info. */ - free(f.attrs.dp_extra_info); - if ((odp_flow_key_to_flow(f.key, f.key_len, &flow, NULL) == ODP_FIT_ERROR) || (xlate_lookup_ofproto(ofproto->backer, &flow, NULL, NULL) -- GitLab From a867c010ee9183885ee9d3eb76a0005c075c4d2e Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Fri, 20 Dec 2019 09:51:08 -0800 Subject: [PATCH 008/432] conntrack: Fix conntrack new state In connection tracking system, a connection is established if we see packets from both directions. However, in userspace datapath's conntrack, if we send a connection setup packet in one direction twice, it will make the connection to be in established state. This patch fixes the aforementioned issue, and adds a system traffic test for UDP and TCP traffic to avoid regression. Fixes: a489b16854b59 ("conntrack: New userspace connection tracker.") Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- lib/conntrack-other.c | 4 +++- lib/conntrack-private.h | 1 + lib/conntrack-tcp.c | 15 ++++++++++----- lib/conntrack.c | 3 +++ tests/system-traffic.at | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 6 deletions(-) diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c index 932f2f4ad..de22ef87c 100644 --- a/lib/conntrack-other.c +++ b/lib/conntrack-other.c @@ -47,16 +47,18 @@ other_conn_update(struct conntrack *ct, struct conn *conn_, struct dp_packet *pkt OVS_UNUSED, bool reply, long long now) { struct conn_other *conn = conn_other_cast(conn_); + enum ct_update_res ret = CT_UPDATE_VALID; if (reply && conn->state != OTHERS_BIDIR) { conn->state = OTHERS_BIDIR; } else if (conn->state == OTHERS_FIRST) { conn->state = OTHERS_MULTIPLE; + ret = CT_UPDATE_VALID_NEW; } conn_update_expiration(ct, &conn->up, other_timeouts[conn->state], now); - return CT_UPDATE_VALID; + return ret; } static bool diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index b04e4cd77..9a8ca3910 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -124,6 +124,7 @@ enum ct_update_res { CT_UPDATE_INVALID, CT_UPDATE_VALID, CT_UPDATE_NEW, + CT_UPDATE_VALID_NEW, }; /* Timeouts: all the possible timeout states passed to update_expiration() diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c index 47eb8e203..416cb769d 100644 --- a/lib/conntrack-tcp.c +++ b/lib/conntrack-tcp.c @@ -181,11 +181,16 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_, return CT_UPDATE_INVALID; } - if (((tcp_flags & (TCP_SYN | TCP_ACK)) == TCP_SYN) - && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 - && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) { - src->state = dst->state = CT_DPIF_TCPS_CLOSED; - return CT_UPDATE_NEW; + if ((tcp_flags & (TCP_SYN | TCP_ACK)) == TCP_SYN) { + if (dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 + && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) { + src->state = dst->state = CT_DPIF_TCPS_CLOSED; + return CT_UPDATE_NEW; + } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) { + src->state = CT_DPIF_TCPS_SYN_SENT; + conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now); + return CT_UPDATE_NEW; + } } if (src->wscale & CT_WSCALE_FLAG diff --git a/lib/conntrack.c b/lib/conntrack.c index 60222ca53..ff5a89457 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -1110,6 +1110,9 @@ conn_update_state(struct conntrack *ct, struct dp_packet *pkt, ovs_mutex_unlock(&ct->ct_lock); create_new_conn = true; break; + case CT_UPDATE_VALID_NEW: + pkt->md.ct_state |= CS_NEW; + break; default: OVS_NOT_REACHED(); } diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 0fb7aacfa..4a39c929c 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2290,6 +2290,47 @@ tcp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - new connections]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_DATA([flows1.txt], [dnl +table=0, priority=1,action=drop +table=0, priority=10,arp,action=normal +table=0, priority=100,tcp,action=ct(table=1) +table=0, priority=100,udp,action=ct(table=1) +table=1, priority=100,in_port=1,tcp,ct_state=+trk+new,action=ct(commit) +table=1, priority=100,in_port=1,udp,ct_state=+trk+new,action=ct(commit) +table=1, priority=100,in_port=1,ct_state=+trk+est,action=2 +table=1, priority=100,in_port=2,ct_state=+trk+est,action=1 +]) + +ovs-appctl vlog/set dbg + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows1.txt]) + +dnl TCP traffic from ns0 to ns1 should fail. +OVS_START_L7([at_ns1], [http]) +NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log], [4]) + +dnl Send UDP packet on port 1 twice. +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) + +dnl There should not be any packet that matches the established ct_state. +AT_CHECK([ovs-ofctl dump-flows br0 "table=1 in_port=1,ct_state=+trk+est" | ofctl_strip], [0], [dnl +NXST_FLOW reply: + table=1, priority=100,ct_state=+est+trk,in_port=1 actions=output:2 +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - ICMP related]) AT_SKIP_IF([test $HAVE_NC = no]) CHECK_CONNTRACK() -- GitLab From 20dac08fdcce4b7fda1d07add3b346aa9751cfbc Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 23 Jan 2020 09:03:11 -0800 Subject: [PATCH 009/432] docs: Add header install command for afxdp. The 'XDP_RING_NEED_WAKEUP' and related flags are defined if_xdp.h, so if users are building their own kernel, users have to update the kernel's header files, by doing: $ make headers_install INSTALL_HDR_PATH=/usr Otherwise the following error shows: /usr/local/include/bpf/xsk.h: In function 'xsk_ring_prod__needs_wakeup': /usr/local/include/bpf/xsk.h:82:21: error: 'XDP_RING_NEED_WAKEUP' undeclared \ (first use in this function) return *r->flags & XDP_RING_NEED_WAKEUP; Reported-by: Tomek Osinski Reported-at: https://osinstom.github.io/en/tutorial/ovs-afxdp-installation/ Signed-off-by: William Tu Acked-by: Ben Pfaff Signed-off-by: William Tu --- Documentation/intro/install/afxdp.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index c4685fa7e..99003e4db 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -108,6 +108,14 @@ vSwitch with AF_XDP will require the following: * CONFIG_XDP_SOCKETS_DIAG=y (Debugging) +- If you're building your own kernel, be sure that you're installing kernel + headers too. For example, with the following command:: + + make headers_install INSTALL_HDR_PATH=/usr + +- If you're using kernel from the distribution, be sure that corresponding + kernel headers package installed. + - Once your AF_XDP-enabled kernel is ready, if possible, run **./xdpsock -r -N -z -i ** under linux/samples/bpf. This is an OVS independent benchmark tools for AF_XDP. -- GitLab From cb208379195729031e84a3ef640bb89693592a0d Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 30 Jan 2020 15:55:27 -0800 Subject: [PATCH 010/432] compat: Remove HAVE_BOOL_TYPE OVS only supports Linux kernels since 3.10 and all kernels since then have the bool type. This check is unnecessary so remove it. Passes Travis: https://travis-ci.org/gvrose8192/ovs-experimental/builds/644103253 Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- acinclude.m4 | 2 -- datapath/linux/compat/include/linux/stddef.h | 7 ------- datapath/linux/compat/include/linux/types.h | 4 ---- 3 files changed, 13 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index c1470ccc6..1212a463e 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -830,8 +830,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_nfct]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_put_zero]) - OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [bool], - [OVS_DEFINE([HAVE_BOOL_TYPE])]) OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [__wsum], [OVS_DEFINE([HAVE_CSUM_TYPES])]) OVS_GREP_IFELSE([$KSRC/include/uapi/linux/types.h], [__wsum], diff --git a/datapath/linux/compat/include/linux/stddef.h b/datapath/linux/compat/include/linux/stddef.h index f2b7c319a..5b44c0dee 100644 --- a/datapath/linux/compat/include/linux/stddef.h +++ b/datapath/linux/compat/include/linux/stddef.h @@ -5,13 +5,6 @@ #ifdef __KERNEL__ -#ifndef HAVE_BOOL_TYPE -enum { - false = 0, - true = 1 -}; -#endif /* !HAVE_BOOL_TYPE */ - #ifndef offsetofend #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER)) diff --git a/datapath/linux/compat/include/linux/types.h b/datapath/linux/compat/include/linux/types.h index b989d96c3..a58623e70 100644 --- a/datapath/linux/compat/include/linux/types.h +++ b/datapath/linux/compat/include/linux/types.h @@ -8,8 +8,4 @@ typedef __u16 __bitwise __sum16; typedef __u32 __bitwise __wsum; #endif -#ifndef HAVE_BOOL_TYPE -typedef _Bool bool; -#endif /* !HAVE_BOOL_TYPE */ - #endif -- GitLab From eb540c0f5fc86d772521dcb05c104ff93f8049a0 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Tue, 14 Jan 2020 13:21:15 +0000 Subject: [PATCH 011/432] flow: Fix parsing l3_ofs with partial offloading l3_ofs should be set all Ethernet packets, not just IPv4/IPv6 ones. For example for ARP over VLAN tagged packets, it may cause wrong processing like in changing the VLAN ID action. Fix it. Fixes: aab96ec4d81e ("dpif-netdev: retrieve flow directly from the flow mark") Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Ben Pfaff --- lib/flow.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/flow.c b/lib/flow.c index 45bb96b54..5c32b4a01 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -1107,6 +1107,7 @@ parse_tcp_flags(struct dp_packet *packet) if (OVS_UNLIKELY(eth_type_mpls(dl_type))) { packet->l2_5_ofs = (char *)data - frame; } + packet->l3_ofs = (char *)data - frame; if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) { const struct ip_header *nh = data; int ip_len; @@ -1116,7 +1117,6 @@ parse_tcp_flags(struct dp_packet *packet) return 0; } dp_packet_set_l2_pad_size(packet, size - tot_len); - packet->l3_ofs = (uint16_t)((char *)nh - frame); nw_proto = nh->ip_proto; nw_frag = ipv4_get_nw_frag(nh); @@ -1129,7 +1129,6 @@ parse_tcp_flags(struct dp_packet *packet) if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) { return 0; } - packet->l3_ofs = (uint16_t)((char *)nh - frame); data_pull(&data, &size, sizeof *nh); plen = ntohs(nh->ip6_plen); /* Never pull padding. */ -- GitLab From 8e371aa497aa95e3562d53f566c2d634b4b0f589 Mon Sep 17 00:00:00 2001 From: "Kirill A. Kornilov" Date: Mon, 13 Jan 2020 12:29:10 +0300 Subject: [PATCH 012/432] vswitchd: Add serial number configuration. Signed-off-by: Kirill A. Kornilov Signed-off-by: Ben Pfaff --- NEWS | 3 +++ ofproto/ofproto.c | 7 +++++++ ofproto/ofproto.h | 1 + vswitchd/bridge.c | 9 +++++++++ vswitchd/vswitch.xml | 12 ++++++++++++ 5 files changed, 32 insertions(+) diff --git a/NEWS b/NEWS index c24ec536c..9bbe71d9e 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,9 @@ Post-v2.13.0 --------------------- - ovs-vswitchd no longer deletes datapath flows on exit by default. + - OpenFlow: + * The OpenFlow ofp_desc/serial_num may now be configured by setting the + value of other-config:dp-sn in the Bridge table. v2.13.0 - xx xxx xxxx diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 5d69a4332..e2591287d 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -815,6 +815,13 @@ ofproto_set_dp_desc(struct ofproto *p, const char *dp_desc) p->dp_desc = nullable_xstrdup(dp_desc); } +void +ofproto_set_serial_desc(struct ofproto *p, const char *serial_desc) +{ + free(p->serial_desc); + p->serial_desc = nullable_xstrdup(serial_desc); +} + int ofproto_set_snoops(struct ofproto *ofproto, const struct sset *snoops) { diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index bac4a1c21..2dd253167 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -351,6 +351,7 @@ void ofproto_set_threads(int n_handlers, int n_revalidators); void ofproto_type_set_config(const char *type, const struct smap *other_config); void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc); +void ofproto_set_serial_desc(struct ofproto *p, const char *serial_desc); int ofproto_set_snoops(struct ofproto *, const struct sset *snoops); int ofproto_set_netflow(struct ofproto *, const struct netflow_options *nf_options); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index e591c26a6..fe73c38d4 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -288,6 +288,7 @@ static void bridge_configure_ipfix(struct bridge *); static void bridge_configure_spanning_tree(struct bridge *); static void bridge_configure_tables(struct bridge *); static void bridge_configure_dp_desc(struct bridge *); +static void bridge_configure_serial_desc(struct bridge *); static void bridge_configure_aa(struct bridge *); static void bridge_aa_refresh_queued(struct bridge *); static bool bridge_aa_need_refresh(struct bridge *); @@ -939,6 +940,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg) bridge_configure_spanning_tree(br); bridge_configure_tables(br); bridge_configure_dp_desc(br); + bridge_configure_serial_desc(br); bridge_configure_aa(br); } free(managers); @@ -4123,6 +4125,13 @@ bridge_configure_dp_desc(struct bridge *br) smap_get(&br->cfg->other_config, "dp-desc")); } +static void +bridge_configure_serial_desc(struct bridge *br) +{ + ofproto_set_serial_desc(br->ofproto, + smap_get(&br->cfg->other_config, "dp-sn")); +} + static struct aa_mapping * bridge_aa_mapping_find(struct bridge *br, const int64_t isid) { diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 3ddaaefda..4a74ed3ef 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1261,6 +1261,18 @@ Human readable description of datapath. It is a maximum 256 byte-long free-form string to describe the datapath for debugging purposes, e.g. switch3 in room 3120. + The value is returned by the switch as a part of reply to OFPMP_DESC + request (ofp_desc). The OpenFlow specification (e.g. 1.3.5) describes + the ofp_desc structure to contaion "NULL terminated ASCII strings". + For the compatibility reasons no more than 255 ASCII characters should be used. + + + + Serial number. It is a maximum 32 byte-long free-form string to + provide an additional switch identification. The value is returned + by the switch as a part of reply to OFPMP_DESC request (ofp_desc). + Same as mentioned in the description of , + the string should be no more than 31 ASCII characters for the compatibility. Date: Tue, 4 Feb 2020 22:28:26 +0100 Subject: [PATCH 013/432] netdev-dpdk: Fix port init when lacking Tx offloads for TSO. The check on TSO capability did not ensure ip checksum, tcp checksum and TSO tx offloads were available which resulted in a port init failure (example below with a ena device): *2020-02-04T17:42:52.976Z|00084|dpdk|ERR|Ethdev port_id=0 requested Tx offloads 0x2a doesn't match Tx offloads capabilities 0xe in rte_eth_dev_configure()* Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Reported-by: Ravi Kerur Signed-off-by: David Marchand Acked-by: Kevin Traynor Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- lib/netdev-dpdk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index b108cbd6b..eb1a7af94 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1132,7 +1132,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER; } - if (info.tx_offload_capa & tx_tso_offload_capa) { + if ((info.tx_offload_capa & tx_tso_offload_capa) == tx_tso_offload_capa) { dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; } else { dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; -- GitLab From 2297cbe6cc25b6b1862c499ce8f16f52f75d9e5f Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 3 Feb 2020 11:22:22 -0300 Subject: [PATCH 014/432] netdev-linux-private: fix max length to be 16 bits The dp_packet length is limited to 16 bits, so document that and fix the length value accordingly. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Signed-off-by: Flavio Leitner Signed-off-by: Ben Pfaff --- lib/netdev-linux-private.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h index 143616ca8..be2d7b10b 100644 --- a/lib/netdev-linux-private.h +++ b/lib/netdev-linux-private.h @@ -38,7 +38,8 @@ struct netdev; -#define LINUX_RXQ_TSO_MAX_LEN 65536 +/* The maximum packet length is 16 bits */ +#define LINUX_RXQ_TSO_MAX_LEN 65535 struct netdev_rxq_linux { struct netdev_rxq up; -- GitLab From 73858f9dbe83daf8cc8d4b604acc23eb62cc3f52 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 3 Feb 2020 18:45:50 -0300 Subject: [PATCH 015/432] netdev-linux: Prepend the std packet in the TSO packet Usually TSO packets are close to 50k, 60k bytes long, so to to copy less bytes when receiving a packet from the kernel change the approach. Instead of extending the MTU sized packet received and append with remaining TSO data from the TSO buffer, allocate a TSO packet with enough headroom to prepend the std packet data. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Suggested-by: Ben Pfaff Signed-off-by: Flavio Leitner Signed-off-by: Ben Pfaff --- lib/dp-packet.c | 8 +-- lib/dp-packet.h | 2 + lib/netdev-linux-private.h | 3 +- lib/netdev-linux.c | 117 ++++++++++++++++++++++--------------- 4 files changed, 78 insertions(+), 52 deletions(-) diff --git a/lib/dp-packet.c b/lib/dp-packet.c index 8dfedcb7c..cd2623500 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -243,8 +243,8 @@ dp_packet_copy__(struct dp_packet *b, uint8_t *new_base, /* Reallocates 'b' so that it has exactly 'new_headroom' and 'new_tailroom' * bytes of headroom and tailroom, respectively. */ -static void -dp_packet_resize__(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) +void +dp_packet_resize(struct dp_packet *b, size_t new_headroom, size_t new_tailroom) { void *new_base, *new_data; size_t new_allocated; @@ -297,7 +297,7 @@ void dp_packet_prealloc_tailroom(struct dp_packet *b, size_t size) { if (size > dp_packet_tailroom(b)) { - dp_packet_resize__(b, dp_packet_headroom(b), MAX(size, 64)); + dp_packet_resize(b, dp_packet_headroom(b), MAX(size, 64)); } } @@ -308,7 +308,7 @@ void dp_packet_prealloc_headroom(struct dp_packet *b, size_t size) { if (size > dp_packet_headroom(b)) { - dp_packet_resize__(b, MAX(size, 64), dp_packet_tailroom(b)); + dp_packet_resize(b, MAX(size, 64), dp_packet_tailroom(b)); } } diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 69ae5dfac..9a9d35183 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -152,6 +152,8 @@ struct dp_packet *dp_packet_clone_with_headroom(const struct dp_packet *, struct dp_packet *dp_packet_clone_data(const void *, size_t); struct dp_packet *dp_packet_clone_data_with_headroom(const void *, size_t, size_t headroom); +void dp_packet_resize(struct dp_packet *b, size_t new_headroom, + size_t new_tailroom); static inline void dp_packet_delete(struct dp_packet *); static inline void *dp_packet_at(const struct dp_packet *, size_t offset, diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h index be2d7b10b..c7c515f70 100644 --- a/lib/netdev-linux-private.h +++ b/lib/netdev-linux-private.h @@ -45,7 +45,8 @@ struct netdev_rxq_linux { struct netdev_rxq up; bool is_tap; int fd; - char *aux_bufs[NETDEV_MAX_BURST]; /* Batch of preallocated TSO buffers. */ + struct dp_packet *aux_bufs[NETDEV_MAX_BURST]; /* Preallocated TSO + packets. */ }; int netdev_linux_construct(struct netdev *); diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6add3e2fc..c6f3d2740 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1052,15 +1052,6 @@ static struct netdev_rxq * netdev_linux_rxq_alloc(void) { struct netdev_rxq_linux *rx = xzalloc(sizeof *rx); - if (userspace_tso_enabled()) { - int i; - - /* Allocate auxiliay buffers to receive TSO packets. */ - for (i = 0; i < NETDEV_MAX_BURST; i++) { - rx->aux_bufs[i] = xmalloc(LINUX_RXQ_TSO_MAX_LEN); - } - } - return &rx->up; } @@ -1172,7 +1163,7 @@ netdev_linux_rxq_destruct(struct netdev_rxq *rxq_) } for (i = 0; i < NETDEV_MAX_BURST; i++) { - free(rx->aux_bufs[i]); + dp_packet_delete(rx->aux_bufs[i]); } } @@ -1238,13 +1229,18 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, virtio_net_hdr_size = 0; } - std_len = VLAN_ETH_HEADER_LEN + mtu + virtio_net_hdr_size; + /* The length here needs to be accounted in the same way when the + * aux_buf is allocated so that it can be prepended to TSO buffer. */ + std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; for (i = 0; i < NETDEV_MAX_BURST; i++) { buffers[i] = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM); iovs[i][IOV_PACKET].iov_base = dp_packet_data(buffers[i]); iovs[i][IOV_PACKET].iov_len = std_len; - iovs[i][IOV_AUXBUF].iov_base = rx->aux_bufs[i]; - iovs[i][IOV_AUXBUF].iov_len = LINUX_RXQ_TSO_MAX_LEN; + if (iovlen == IOV_TSO_SIZE) { + iovs[i][IOV_AUXBUF].iov_base = dp_packet_data(rx->aux_bufs[i]); + iovs[i][IOV_AUXBUF].iov_len = dp_packet_tailroom(rx->aux_bufs[i]); + } + mmsgs[i].msg_hdr.msg_name = NULL; mmsgs[i].msg_hdr.msg_namelen = 0; mmsgs[i].msg_hdr.msg_iov = iovs[i]; @@ -1268,6 +1264,8 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, } for (i = 0; i < retval; i++) { + struct dp_packet *pkt; + if (mmsgs[i].msg_len < ETH_HEADER_LEN) { struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); struct netdev_linux *netdev = netdev_linux_cast(netdev_); @@ -1280,29 +1278,29 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, } if (mmsgs[i].msg_len > std_len) { - /* Build a single linear TSO packet by expanding the current packet - * to append the data received in the aux_buf. */ - size_t extra_len = mmsgs[i].msg_len - std_len; - - dp_packet_set_size(buffers[i], dp_packet_size(buffers[i]) - + std_len); - dp_packet_prealloc_tailroom(buffers[i], extra_len); - memcpy(dp_packet_tail(buffers[i]), rx->aux_bufs[i], extra_len); - dp_packet_set_size(buffers[i], dp_packet_size(buffers[i]) - + extra_len); - } else { - dp_packet_set_size(buffers[i], dp_packet_size(buffers[i]) - + mmsgs[i].msg_len); - } + /* Build a single linear TSO packet by prepending the data from + * std_len buffer to the aux_buf. */ + pkt = rx->aux_bufs[i]; + dp_packet_set_size(pkt, mmsgs[i].msg_len - std_len); + dp_packet_push(pkt, dp_packet_data(buffers[i]), std_len); + /* The headroom should be the same in buffers[i], pkt and + * DP_NETDEV_HEADROOM. */ + dp_packet_resize(pkt, DP_NETDEV_HEADROOM, 0); + dp_packet_delete(buffers[i]); + rx->aux_bufs[i] = NULL; + } else { + dp_packet_set_size(buffers[i], mmsgs[i].msg_len); + pkt = buffers[i]; + } - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(buffers[i])) { + if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); struct netdev_linux *netdev = netdev_linux_cast(netdev_); /* Unexpected error situation: the virtio header is not present * or corrupted. Drop the packet but continue in case next ones * are correct. */ - dp_packet_delete(buffers[i]); + dp_packet_delete(pkt); netdev->rx_dropped += 1; VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header", netdev_get_name(netdev_)); @@ -1325,16 +1323,16 @@ netdev_linux_batch_rxq_recv_sock(struct netdev_rxq_linux *rx, int mtu, struct eth_header *eth; bool double_tagged; - eth = dp_packet_data(buffers[i]); + eth = dp_packet_data(pkt); double_tagged = eth->eth_type == htons(ETH_TYPE_VLAN_8021Q); - eth_push_vlan(buffers[i], + eth_push_vlan(pkt, auxdata_to_vlan_tpid(aux, double_tagged), htons(aux->tp_vlan_tci)); break; } } - dp_packet_batch_add(batch, buffers[i]); + dp_packet_batch_add(batch, pkt); } /* Delete unused buffers. */ @@ -1354,7 +1352,6 @@ static int netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, struct dp_packet_batch *batch) { - struct dp_packet *buffer; int virtio_net_hdr_size; ssize_t retval; size_t std_len; @@ -1372,16 +1369,22 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, virtio_net_hdr_size = 0; } - std_len = VLAN_ETH_HEADER_LEN + mtu + virtio_net_hdr_size; + /* The length here needs to be accounted in the same way when the + * aux_buf is allocated so that it can be prepended to TSO buffer. */ + std_len = virtio_net_hdr_size + VLAN_ETH_HEADER_LEN + mtu; for (i = 0; i < NETDEV_MAX_BURST; i++) { + struct dp_packet *buffer; + struct dp_packet *pkt; struct iovec iov[IOV_TSO_SIZE]; /* Assume Ethernet port. No need to set packet_type. */ buffer = dp_packet_new_with_headroom(std_len, DP_NETDEV_HEADROOM); iov[IOV_PACKET].iov_base = dp_packet_data(buffer); iov[IOV_PACKET].iov_len = std_len; - iov[IOV_AUXBUF].iov_base = rx->aux_bufs[i]; - iov[IOV_AUXBUF].iov_len = LINUX_RXQ_TSO_MAX_LEN; + if (iovlen == IOV_TSO_SIZE) { + iov[IOV_AUXBUF].iov_base = dp_packet_data(rx->aux_bufs[i]); + iov[IOV_AUXBUF].iov_len = dp_packet_tailroom(rx->aux_bufs[i]); + } do { retval = readv(rx->fd, iov, iovlen); @@ -1393,33 +1396,36 @@ netdev_linux_batch_rxq_recv_tap(struct netdev_rxq_linux *rx, int mtu, } if (retval > std_len) { - /* Build a single linear TSO packet by expanding the current packet - * to append the data received in the aux_buf. */ - size_t extra_len = retval - std_len; - - dp_packet_set_size(buffer, dp_packet_size(buffer) + std_len); - dp_packet_prealloc_tailroom(buffer, extra_len); - memcpy(dp_packet_tail(buffer), rx->aux_bufs[i], extra_len); - dp_packet_set_size(buffer, dp_packet_size(buffer) + extra_len); + /* Build a single linear TSO packet by prepending the data from + * std_len buffer to the aux_buf. */ + pkt = rx->aux_bufs[i]; + dp_packet_set_size(pkt, retval - std_len); + dp_packet_push(pkt, dp_packet_data(buffer), std_len); + /* The headroom should be the same in buffers[i], pkt and + * DP_NETDEV_HEADROOM. */ + dp_packet_resize(pkt, DP_NETDEV_HEADROOM, 0); + dp_packet_delete(buffer); + rx->aux_bufs[i] = NULL; } else { dp_packet_set_size(buffer, dp_packet_size(buffer) + retval); + pkt = buffer; } - if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(buffer)) { + if (virtio_net_hdr_size && netdev_linux_parse_vnet_hdr(pkt)) { struct netdev *netdev_ = netdev_rxq_get_netdev(&rx->up); struct netdev_linux *netdev = netdev_linux_cast(netdev_); /* Unexpected error situation: the virtio header is not present * or corrupted. Drop the packet but continue in case next ones * are correct. */ - dp_packet_delete(buffer); + dp_packet_delete(pkt); netdev->rx_dropped += 1; VLOG_WARN_RL(&rl, "%s: Dropped packet: Invalid virtio net header", netdev_get_name(netdev_)); continue; } - dp_packet_batch_add(batch, buffer); + dp_packet_batch_add(batch, pkt); } if ((i == 0) && (retval < 0)) { @@ -1442,6 +1448,23 @@ netdev_linux_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch, mtu = ETH_PAYLOAD_MAX; } + if (userspace_tso_enabled()) { + /* Allocate TSO packets. The packet has enough headroom to store + * a full non-TSO packet. When a TSO packet is received, the data + * from non-TSO buffer (std_len) is prepended to the TSO packet + * (aux_buf). */ + size_t std_len = sizeof(struct virtio_net_hdr) + VLAN_ETH_HEADER_LEN + + DP_NETDEV_HEADROOM + mtu; + size_t data_len = LINUX_RXQ_TSO_MAX_LEN - std_len; + for (int i = 0; i < NETDEV_MAX_BURST; i++) { + if (rx->aux_bufs[i]) { + continue; + } + + rx->aux_bufs[i] = dp_packet_new_with_headroom(data_len, std_len); + } + } + dp_packet_batch_init(batch); retval = (rx->is_tap ? netdev_linux_batch_rxq_recv_tap(rx, mtu, batch) -- GitLab From 1223cf123ed141c0a0110ebed17572bdb2e3d0f4 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 6 Feb 2020 14:24:23 +0100 Subject: [PATCH 016/432] netdev-dpdk: Don't enable offloading on HW device if not requested. DPDK drivers has different implementations of transmit functions. Enabled offloading may cause driver to choose slower variant significantly affecting performance if userspace TSO wasn't requested. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Reported-by: David Marchand Acked-by: David Marchand Acked-by: Flavio Leitner Acked-by: Kevin Traynor Signed-off-by: Ilya Maximets --- lib/netdev-dpdk.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index eb1a7af94..6187129c0 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -1132,12 +1132,15 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) dev->hw_ol_features &= ~NETDEV_RX_HW_SCATTER; } - if ((info.tx_offload_capa & tx_tso_offload_capa) == tx_tso_offload_capa) { - dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; - } else { - dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; - VLOG_WARN("Tx TSO offload is not supported on %s port " - DPDK_PORT_ID_FMT, netdev_get_name(&dev->up), dev->port_id); + dev->hw_ol_features &= ~NETDEV_TX_TSO_OFFLOAD; + if (userspace_tso_enabled()) { + if ((info.tx_offload_capa & tx_tso_offload_capa) + == tx_tso_offload_capa) { + dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; + } else { + VLOG_WARN("%s: Tx TSO offload is not supported.", + netdev_get_name(&dev->up)); + } } n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq); -- GitLab From cb943a7730cc2b4d4c86e7f6efc9cec18e463b04 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 7 Feb 2020 10:43:33 -0800 Subject: [PATCH 017/432] AUTHORS: Add Martin Varghese. Signed-off-by: Ben Pfaff --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index fe3935fca..ea9d7097e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -259,6 +259,7 @@ Markos Chandras mchandras@suse.de Martin Casado casado@cs.stanford.edu Martin Fong mwfong@csl.sri.com Martino Fornasa mf@fornasa.it +Martin Varghese martin.varghese@nokia.com Martin Xu martinxu9.ovs@gmail.com Martin Zhang martinbj2008@gmail.com Maryam Tahhan maryam.tahhan@intel.com -- GitLab From e14fbdbb5ba137884095caa849735a78ffef0da0 Mon Sep 17 00:00:00 2001 From: Amber Hu via dev Date: Tue, 4 Feb 2020 05:03:03 +0000 Subject: [PATCH 018/432] datapath-windows: Append tunnel info to upcall for correct template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Formerly, there is no tunnel information appended in the upcall’s packet data, which is expected by IPFIX in userspace to calculate the template for exporting the sampled flow record of on egress tunnel port. To fix this, during performing OvsOutputUserspaceAction(), we would check whether it is initiated by the sampling on egress tunnel which would be indicated by the attribute as OVS_USERSPACE_ATTR_EGRESS_TUN_PORT in the nested attribute list. If so, we would append the tunKey in OvsForwardingContext indexed by OVS_PACKET_ATTR_EGRESS_TUN_KEY to the upcall. Besides, at this point, the source transport port and source ip address are not available in the structure, so we have to fill it in the way how the packet would be capsulated during performing OvsEncapGeneve(), which is following the OvsOutputUserspaceAction() unfortunately. I have tested the IPFIX functionality with the change, we could see the template is correct and the expected tunnel information could be packed in the IPFIX packet finally. The traffic for test is generated by PING utility. >From d727d051c9a44a4a93e5ee5f3da3ca9b125aad29 Mon Sep 17 00:00:00 2001 From: Amber Hu Date: Thu, 30 Jan 2020 18:01:32 -0800 Subject: [PATCH v3] datapath-windows: Append tunnel info to upcall for correct template Signed-off-by: Amber Hu Acked-by: Alin Gabriel Serdean Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Actions.c | 24 +++++++++++++++++++++--- datapath-windows/ovsext/Flow.c | 12 ++++++++++++ datapath-windows/ovsext/Tunnel.c | 2 +- datapath-windows/ovsext/User.c | 12 +++++++++--- datapath-windows/ovsext/User.h | 1 + 5 files changed, 44 insertions(+), 7 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 5c9b5c3a0..4a11cea5e 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1815,10 +1815,12 @@ OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx, { NTSTATUS status = NDIS_STATUS_SUCCESS; PNL_ATTR userdataAttr; - PNL_ATTR queueAttr; + PNL_ATTR egrTunAttr = NULL; POVS_PACKET_QUEUE_ELEM elem; POVS_PACKET_HDR_INFO layers = &ovsFwdCtx->layers; BOOLEAN isRecv = FALSE; + OVS_FWD_INFO fwdInfo; + OvsIPv4TunnelKey tunKey; POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext, ovsFwdCtx->srcVportNo); @@ -1830,13 +1832,29 @@ OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx, } } - queueAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_PID); userdataAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_USERDATA); + /* Indicate the packet is from egress-tunnel direction */ + egrTunAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_EGRESS_TUN_PORT); + + /* Fill tunnel key to export to usersspace to calculate the template id */ + if (egrTunAttr) { + RtlZeroMemory(&tunKey, sizeof tunKey); + RtlCopyMemory(&tunKey, &ovsFwdCtx->tunKey, sizeof tunKey); + if (!tunKey.src) { + status = OvsLookupIPFwdInfo(tunKey.src, tunKey.dst, &fwdInfo); + if (status == NDIS_STATUS_SUCCESS && tunKey.dst == fwdInfo.dstIpAddr) { + tunKey.src = fwdInfo.srcIpAddr; + } + } + tunKey.flow_hash = tunKey.flow_hash ? tunKey.flow_hash : MAXINT16; + } elem = OvsCreateQueueNlPacket(NlAttrData(userdataAttr), NlAttrGetSize(userdataAttr), OVS_PACKET_CMD_ACTION, - vport, key, ovsFwdCtx->curNbl, + vport, key, + egrTunAttr ? &(tunKey) : NULL, + ovsFwdCtx->curNbl, NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl), isRecv, layers); diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c index fdb101051..ac0582c18 100644 --- a/datapath-windows/ovsext/Flow.c +++ b/datapath-windows/ovsext/Flow.c @@ -1094,6 +1094,18 @@ MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf, goto done; } + if (!NlMsgPutTailU16(nlBuf, OVS_TUNNEL_KEY_ATTR_TP_SRC, + tunKey->flow_hash)) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + + if (!NlMsgPutTailU16(nlBuf, OVS_TUNNEL_KEY_ATTR_TP_DST, + tunKey->dst_port)) { + rc = STATUS_UNSUCCESSFUL; + goto done; + } + done: NlMsgEndNested(nlBuf, offset); error_nested_start: diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c index ad2c254f5..5d1be80f4 100644 --- a/datapath-windows/ovsext/Tunnel.c +++ b/datapath-windows/ovsext/Tunnel.c @@ -308,7 +308,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl, datapath->misses++; elem = OvsCreateQueueNlPacket(NULL, 0, OVS_PACKET_CMD_MISS, - vport, &key, pNbl, curNb, + vport, &key, NULL, pNbl, curNb, TRUE, &layers); if (elem) { /* Complete the packet since it was copied to user buffer. */ diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c index ed1fcbea8..ee0e38d99 100644 --- a/datapath-windows/ovsext/User.c +++ b/datapath-windows/ovsext/User.c @@ -830,7 +830,7 @@ OvsCreateAndAddPackets(PVOID userData, nb = NET_BUFFER_LIST_FIRST_NB(nbl); while (nb) { elem = OvsCreateQueueNlPacket(userData, userDataLen, - cmd, vport, key, nbl, nb, + cmd, vport, key, NULL, nbl, nb, isRecv, hdrInfo); if (elem) { InsertTailList(list, &elem->link); @@ -1013,6 +1013,7 @@ OvsCreateQueueNlPacket(PVOID userData, UINT32 cmd, POVS_VPORT_ENTRY vport, OvsFlowKey *key, + OvsIPv4TunnelKey *tunnelKey, PNET_BUFFER_LIST nbl, PNET_BUFFER nb, BOOLEAN isRecv, @@ -1025,7 +1026,6 @@ OvsCreateQueueNlPacket(PVOID userData, NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo; PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo = NULL; PVOID vlanTag; - OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey; UINT32 pid; UINT32 nlMsgSize; NL_BUFFER nlBuf; @@ -1127,7 +1127,13 @@ OvsCreateQueueNlPacket(PVOID userData, } } - /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */ + /* Set OVS_PACKET_ATTR_EGRESS_TUN_KEY attribute */ + if (tunnelKey) { + if (MapFlowTunKeyToNlKey(&nlBuf, tunnelKey, + OVS_PACKET_ATTR_EGRESS_TUN_KEY) != STATUS_SUCCESS) { + goto fail; + } + } if (userData){ if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA, userData, (UINT16)userDataLen)) { diff --git a/datapath-windows/ovsext/User.h b/datapath-windows/ovsext/User.h index 3a4288894..ccca0ba5f 100644 --- a/datapath-windows/ovsext/User.h +++ b/datapath-windows/ovsext/User.h @@ -75,6 +75,7 @@ POVS_PACKET_QUEUE_ELEM OvsCreateQueueNlPacket(PVOID userData, UINT32 cmd, POVS_VPORT_ENTRY vport, OvsFlowKey *key, + OvsIPv4TunnelKey *tunnelKey, PNET_BUFFER_LIST nbl, PNET_BUFFER nb, BOOLEAN isRecv, -- GitLab From c72401297078a305c85f2c78a3640ce84260bfae Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 7 Feb 2020 09:01:13 -0300 Subject: [PATCH 019/432] dp-packet: prefetch the next packet when cloning a batch. There is a cache miss when accessing mbuf->data_off while cloning a batch and using prefetch improved the throughput by ~2.3%. Before: 13709416.30 pps After: 14031475.80 pps Fixes: d48771848560 ("dp-packet: preserve headroom when cloning a pkt batch") Signed-off-by: Flavio Leitner Signed-off-by: Ben Pfaff --- lib/dp-packet.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 9a9d35183..9f8991faa 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -1048,6 +1048,10 @@ dp_packet_batch_clone(struct dp_packet_batch *dst, dp_packet_batch_init(dst); DP_PACKET_BATCH_FOR_EACH (i, packet, src) { + if (i + 1 < dp_packet_batch_size(src)) { + OVS_PREFETCH(src->packets[i + 1]); + } + uint32_t headroom = dp_packet_headroom(packet); struct dp_packet *pkt_clone; -- GitLab From 3865b07409c007d5d272aea30eb0718cfbf068ee Mon Sep 17 00:00:00 2001 From: Ihar Hrachyshka Date: Thu, 13 Feb 2020 12:27:27 -0500 Subject: [PATCH 020/432] docs: handle multi line headers for nroff Before the fix, headers split into multiple lines were producing bogus quote characters in nroff output and failed to indent headers properly. Specifically, it fixes a header and its indentation in ovn-architecture(7). Signed-off-by: Ihar Hrachyshka Signed-off-by: Ben Pfaff --- python/build/nroff.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/build/nroff.py b/python/build/nroff.py index a94907757..09795ab52 100644 --- a/python/build/nroff.py +++ b/python/build/nroff.py @@ -290,6 +290,11 @@ fillval = .2 \\}""" +def flatten_header(s): + s = s.strip() + return re.sub(r'\s+', ' ', s) + + def block_xml_to_nroff(nodes, para='.PP'): HEADER_TAGS = ('h1', 'h2', 'h3', 'h4') s = '' @@ -373,7 +378,9 @@ def block_xml_to_nroff(nodes, para='.PP'): to_upper = node.tagName == 'h1' s += ".%s \"" % nroffTag for child_node in node.childNodes: - s += inline_xml_to_nroff(child_node, font, to_upper) + s += flatten_header( + inline_xml_to_nroff(child_node, font, to_upper) + ) s += "\"\n" elif node.tagName == 'pre': fixed = node.getAttribute('fixed') -- GitLab From 19e99c83bb4da4617730f20392515d8aca5b61ba Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 12 Feb 2020 11:46:47 -0800 Subject: [PATCH 021/432] Documentation: Fix literal blocks formating Acked-by: Flavio Leitner Signed-off-by: Yi-Hung Wei Signed-off-by: Ben Pfaff --- Documentation/faq/openflow.rst | 2 +- Documentation/faq/qos.rst | 2 +- Documentation/howto/selinux.rst | 2 +- Documentation/howto/tunneling.rst | 2 +- Documentation/intro/install/rhel.rst | 2 +- Documentation/topics/userspace-tso.rst | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/faq/openflow.rst b/Documentation/faq/openflow.rst index 8c9489170..0111de78a 100644 --- a/Documentation/faq/openflow.rst +++ b/Documentation/faq/openflow.rst @@ -385,7 +385,7 @@ but OVS drops the packets instead. $ ovs-ofctl add-flow br0 actions=load:0->NXM_OF_IN_PORT[],2,3,4,5,6 If the input port is important, then one may save and restore it on the - stack: + stack:: $ ovs-ofctl add-flow br0 actions=push:NXM_OF_IN_PORT[],\ load:0->NXM_OF_IN_PORT[],\ diff --git a/Documentation/faq/qos.rst b/Documentation/faq/qos.rst index 53ad89809..33c319166 100644 --- a/Documentation/faq/qos.rst +++ b/Documentation/faq/qos.rst @@ -102,7 +102,7 @@ Q: How do I configure ingress policing? A: A policing policy can be configured on an interface to drop packets that arrive at a higher rate than the configured value. For example, the following commands will rate-limit traffic that vif1.0 may generate to - 10Mbps: + 10Mbps:: $ ovs-vsctl set interface vif1.0 ingress_policing_rate=10000 $ ovs-vsctl set interface vif1.0 ingress_policing_burst=8000 diff --git a/Documentation/howto/selinux.rst b/Documentation/howto/selinux.rst index 4809639bc..55c3e39ce 100644 --- a/Documentation/howto/selinux.rst +++ b/Documentation/howto/selinux.rst @@ -117,7 +117,7 @@ see in Open vSwitch log files "Permission Denied" errors:: However, not all "Permission denied" errors are caused by SELinux. So, before blaming too strict SELinux policy, make sure that indeed SELinux was the one -that denied OVS access to certain resources, for example, run: +that denied OVS access to certain resources, for example, run:: $ grep "openvswitch_t" /var/log/audit/audit.log | tail type=AVC msg=audit(1453235431.640:114671): avc: denied { getopt } for pid=4583 comm="ovs-vswitchd" scontext=system_u:system_r:openvswitch_t:s0 tcontext=system_u:system_r:openvswitch_t:s0 tclass=netlink_generic_socket permissive=0 diff --git a/Documentation/howto/tunneling.rst b/Documentation/howto/tunneling.rst index 2645b9043..2cbca977b 100644 --- a/Documentation/howto/tunneling.rst +++ b/Documentation/howto/tunneling.rst @@ -130,7 +130,7 @@ Create a mirrored configuration on `host2` using the same basic steps: $ ovs-vsctl add-port br0 tap1 #. Create the GRE tunnel on `host2`, this time using the IP address for - ``eth0`` on `host1` when specifying the ``remote_ip`` option: + ``eth0`` on `host1` when specifying the ``remote_ip`` option:: $ ovs-vsctl add-port br0 gre0 \ -- set interface gre0 type=gre options:remote_ip= diff --git a/Documentation/intro/install/rhel.rst b/Documentation/intro/install/rhel.rst index 31f0eec3a..b21b274b7 100644 --- a/Documentation/intro/install/rhel.rst +++ b/Documentation/intro/install/rhel.rst @@ -201,7 +201,7 @@ On RHEL 6, to build the Open vSwitch kernel module run:: $ rpmbuild -bb rhel/kmod-openvswitch-rhel6.spec -You might have to specify a kernel version and/or variants, e.g.: +You might have to specify a kernel version and/or variants, e.g.:: $ rpmbuild -bb \ -D "kversion 2.6.32-131.6.1.el6.x86_64" \ diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index 94eddc0b2..9da5d7ef2 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -53,7 +53,7 @@ Enabling TSO The TSO support may be enabled via a global config value ``userspace-tso-enable``. Setting this to ``true`` enables TSO support for -all ports. +all ports.:: $ ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true -- GitLab From 9efbdaa201530ab7023a69176aba54c32c468efb Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 13 Feb 2020 16:27:01 -0800 Subject: [PATCH 022/432] Set release date for 2.13.0. The "Valentine's Day" release. Acked-by: Flavio Leitner Signed-off-by: Ben Pfaff --- NEWS | 2 +- debian/changelog | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 9bbe71d9e..f62ef1f47 100644 --- a/NEWS +++ b/NEWS @@ -6,7 +6,7 @@ Post-v2.13.0 value of other-config:dp-sn in the Bridge table. -v2.13.0 - xx xxx xxxx +v2.13.0 - 14 Feb 2020 --------------------- - OVN: * OVN has been removed from this repository. It now exists as a diff --git a/debian/changelog b/debian/changelog index 4ec058d99..d5c1db839 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,10 +5,10 @@ openvswitch (2.13.90-1) unstable; urgency=low -- Open vSwitch team Tue, 21 Jan 2020 12:44:30 -0700 openvswitch (2.13.0-1) unstable; urgency=low - + [ Open vSwitch team] * New upstream version - -- Open vSwitch team Tue, 21 Jan 2020 12:24:09 -0700 + -- Open vSwitch team Fri, 14 Feb 2020 12:00:00 -0700 openvswitch (2.12.0-1) unstable; urgency=low [ Open vSwitch team ] -- GitLab From 486139d9e4b81dae04b2bb7487d45366865ac0ad Mon Sep 17 00:00:00 2001 From: Tomasz Konieczny Date: Wed, 12 Feb 2020 14:15:56 +0100 Subject: [PATCH 023/432] docs: Update DPDK version table Signed-off-by: Tomasz Konieczny Acked-by: Flavio Leitner Acked-by: Kevin Traynor Signed-off-by: Ian Stokes --- Documentation/faq/releases.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 6702c58a2..6ff47d788 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -185,8 +185,9 @@ Q: What DPDK version does each Open vSwitch release work with? 2.8.x 17.05.2 2.9.x 17.11.4 2.10.x 17.11.4 - 2.11.x 18.11.5 - 2.12.x 18.11.5 + 2.11.x 18.11.6 + 2.12.x 18.11.6 + 2.13.x 19.11.0 ============ ======= Q: Are all the DPDK releases that OVS versions work with maintained? -- GitLab From ac23d20fc90da3b1c9b2117d1e22102e99fba006 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Fri, 7 Feb 2020 14:55:06 -0800 Subject: [PATCH 024/432] conntrack: Fix TCP conntrack state If a TCP connection is in SYN_SENT state, receiving another SYN packet would just renew the timeout of that conntrack entry rather than create a new one. Thus, tcp_conn_update() should return CT_UPDATE_VALID_NEW. This also fixes regressions of a couple of OVN system tests. Fixes: a867c010ee91 ("conntrack: Fix conntrack new state") Reported-by: Dumitru Ceara Signed-off-by: Yi-Hung Wei Tested-by: Dumitru Ceara Signed-off-by: William Tu --- lib/conntrack-tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c index 416cb769d..47261c755 100644 --- a/lib/conntrack-tcp.c +++ b/lib/conntrack-tcp.c @@ -189,7 +189,7 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_, } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) { src->state = CT_DPIF_TCPS_SYN_SENT; conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now); - return CT_UPDATE_NEW; + return CT_UPDATE_VALID_NEW; } } -- GitLab From f4f7498a9e17fd25eedcb95a127b2b1c63cda962 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Fri, 21 Feb 2020 14:50:38 -0800 Subject: [PATCH 025/432] docs: Update conntrack established state description Patch a867c010ee91 ("conntrack: Fix conntrack new state") fixes the userspace conntrack behavior. This patch updates the corresponding conntrack state description. Fixes: a867c010ee91 ("conntrack: Fix conntrack new state") Reported-by: Roni Bar Yanai Acked-by: Roni Bar Yanai Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- lib/meta-flow.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index 90b405c73..2f9c5ee16 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -2566,8 +2566,8 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)
est (0x02)
- Part of an existing connection. Set to 1 if this is a committed - connection. + Part of an existing connection. Set to 1 if packets of a committed + connection have been seen by conntrack from both directions.
rel (0x04)
-- GitLab From 514950d37dabebbdfa40ddf87596a7293de2d87c Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 14 Feb 2020 10:03:34 -0300 Subject: [PATCH 026/432] netdev-dpdk: vhost: disable unsupported offload features. Disable ECN and UFO since this is not supported yet. Also, disable all other features when userspace_tso is not enabled. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Signed-off-by: Flavio Leitner Signed-off-by: Ilya Maximets --- lib/netdev-dpdk.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 6187129c0..192c174f3 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -5186,6 +5186,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); int err; uint64_t vhost_flags = 0; + uint64_t vhost_unsup_flags; bool zc_enabled; ovs_mutex_lock(&dev->mutex); @@ -5252,16 +5253,21 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; + vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN + | 1ULL << VIRTIO_NET_F_HOST_UFO; } else { - err = rte_vhost_driver_disable_features(dev->vhost_id, - 1ULL << VIRTIO_NET_F_HOST_TSO4 - | 1ULL << VIRTIO_NET_F_HOST_TSO6 - | 1ULL << VIRTIO_NET_F_CSUM); - if (err) { - VLOG_ERR("rte_vhost_driver_disable_features failed for " - "vhost user client port: %s\n", dev->up.name); - goto unlock; - } + /* This disables checksum offloading and all the features + * that depends on it (TSO, UFO, ECN) according to virtio + * specification. */ + vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM; + } + + err = rte_vhost_driver_disable_features(dev->vhost_id, + vhost_unsup_flags); + if (err) { + VLOG_ERR("rte_vhost_driver_disable_features failed for " + "vhost user client port: %s\n", dev->up.name); + goto unlock; } err = rte_vhost_driver_start(dev->vhost_id); -- GitLab From 8c5163fe81ea05313eaefcd61cf036dd3fd2ae07 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 14 Feb 2020 10:03:35 -0300 Subject: [PATCH 027/432] userspace TSO: Include UDP checksum offload. Virtio doesn't expose flags to control which protocols checksum offload needs to be enabled or disabled. This patch checks if the NIC supports UDP checksum offload and active it when TSO is enabled. Reported-by: Ilya Maximets Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Signed-off-by: Flavio Leitner Signed-off-by: Ilya Maximets --- lib/netdev-dpdk.c | 18 ++++++++++++------ lib/netdev-linux.c | 1 + lib/netdev-provider.h | 3 ++- lib/netdev.c | 24 ++++++++++++++++++++---- 4 files changed, 35 insertions(+), 11 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 192c174f3..ec8be64aa 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -152,6 +152,14 @@ typedef uint16_t dpdk_port_t; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) +/* List of required flags advertised by the hardware that will be + * used if TSO is enabled. */ +#define DPDK_TX_TSO_OFFLOAD_FLAGS (DEV_TX_OFFLOAD_TCP_TSO \ + | DEV_TX_OFFLOAD_TCP_CKSUM \ + | DEV_TX_OFFLOAD_UDP_CKSUM \ + | DEV_TX_OFFLOAD_IPV4_CKSUM) + + static const struct rte_eth_conf port_conf = { .rxmode = { .mq_mode = ETH_MQ_RX_RSS, @@ -997,9 +1005,7 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) } if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { - conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO; - conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM; - conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM; + conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; } /* Limit configured rss hash functions to only those supported @@ -1100,12 +1106,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) struct rte_ether_addr eth_addr; int diag; int n_rxq, n_txq; + uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS; uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM | DEV_RX_OFFLOAD_IPV4_CKSUM; - uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO | - DEV_TX_OFFLOAD_TCP_CKSUM | - DEV_TX_OFFLOAD_IPV4_CKSUM; rte_eth_dev_info_get(dev->port_id, &info); @@ -5110,6 +5114,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; } @@ -5252,6 +5257,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) if (userspace_tso_enabled()) { netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN | 1ULL << VIRTIO_NET_F_HOST_UFO; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index c6f3d2740..85f3a7367 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -923,6 +923,7 @@ netdev_linux_common_construct(struct netdev *netdev_) if (userspace_tso_enabled()) { netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 22f4cde33..00677dc9d 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -40,7 +40,8 @@ struct netdev_tnl_build_header_params; enum netdev_ol_flags { NETDEV_TX_OFFLOAD_IPV4_CKSUM = 1 << 0, NETDEV_TX_OFFLOAD_TCP_CKSUM = 1 << 1, - NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 2, + NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2, + NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 3, }; /* A network device (e.g. an Ethernet device). diff --git a/lib/netdev.c b/lib/netdev.c index f95b19af4..a55f77961 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -791,6 +791,8 @@ static bool netdev_send_prepare_packet(const uint64_t netdev_flags, struct dp_packet *packet, char **errormsg) { + uint64_t l4_mask; + if (dp_packet_hwol_is_tso(packet) && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) { /* Fall back to GSO in software. */ @@ -798,11 +800,25 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, return false; } - if (dp_packet_hwol_l4_mask(packet) - && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { - /* Fall back to L4 csum in software. */ - VLOG_ERR_BUF(errormsg, "No L4 checksum support"); + l4_mask = dp_packet_hwol_l4_mask(packet); + if (l4_mask) { + if (dp_packet_hwol_l4_is_tcp(packet)) { + if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) { + /* Fall back to TCP csum in software. */ + VLOG_ERR_BUF(errormsg, "No TCP checksum support"); + return false; + } + } else if (dp_packet_hwol_l4_is_udp(packet)) { + if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) { + /* Fall back to UDP csum in software. */ + VLOG_ERR_BUF(errormsg, "No UDP checksum support"); + return false; + } + } else { + VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, + l4_mask); return false; + } } return true; -- GitLab From 35b5586ba7ab2d7f53decb978df6bfea4600f6d4 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 14 Feb 2020 10:03:36 -0300 Subject: [PATCH 028/432] userspace TSO: SCTP checksum offload optional. Ideally SCTP checksum offload needs be advertised by the NIC when userspace TSO is enabled. However, very few drivers do that and it's not a widely used protocol. So, this patch enables SCTP checksum offload if available, otherwise userspace TSO can still be enabled but SCTP packets will be dropped on NICs without support. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Signed-off-by: Flavio Leitner Signed-off-by: Ilya Maximets --- Documentation/topics/userspace-tso.rst | 7 +++++++ lib/netdev-dpdk.c | 21 +++++++++++++++++++-- lib/netdev-linux.c | 1 + lib/netdev-provider.h | 3 ++- lib/netdev.c | 6 ++++++ 5 files changed, 35 insertions(+), 3 deletions(-) diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index 9da5d7ef2..0fbac93a5 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -91,6 +91,13 @@ The current OvS userspace `TSO` implementation supports flat and VLAN networks only (i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP, etc.]). +The NIC driver must support and advertise checksum offload for TCP and UDP. +However, SCTP is not mandatory because very few drivers advertised support +and it wasn't a widely used protocol at the moment this feature was introduced +in Open vSwitch. Currently, if the NIC supports that, then the feature is +enabled, otherwise TSO can still be enabled but SCTP packets sent to the NIC +will be dropped. + There is no software implementation of TSO, so all ports attached to the datapath must support TSO or packets using that feature will be dropped on ports without TSO support. That also means guests using vhost-user diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ec8be64aa..7ab81864d 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -152,8 +152,10 @@ typedef uint16_t dpdk_port_t; #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) -/* List of required flags advertised by the hardware that will be - * used if TSO is enabled. */ +/* List of required flags advertised by the hardware that will be used + * if TSO is enabled. Ideally this should include DEV_TX_OFFLOAD_SCTP_CKSUM. + * However, very few drivers supports that the moment and SCTP is not a + * widely used protocol as TCP and UDP, so it's optional. */ #define DPDK_TX_TSO_OFFLOAD_FLAGS (DEV_TX_OFFLOAD_TCP_TSO \ | DEV_TX_OFFLOAD_TCP_CKSUM \ | DEV_TX_OFFLOAD_UDP_CKSUM \ @@ -423,6 +425,7 @@ enum dpdk_hw_ol_features { NETDEV_RX_HW_CRC_STRIP = 1 << 1, NETDEV_RX_HW_SCATTER = 1 << 2, NETDEV_TX_TSO_OFFLOAD = 1 << 3, + NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4, }; /* @@ -1006,6 +1009,9 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS; + if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { + conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM; + } } /* Limit configured rss hash functions to only those supported @@ -1141,6 +1147,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) if ((info.tx_offload_capa & tx_tso_offload_capa) == tx_tso_offload_capa) { dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD; + if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) { + dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD; + } else { + VLOG_WARN("%s: Tx SCTP checksum offload is not supported, " + "SCTP packets sent to this device will be dropped", + netdev_get_name(&dev->up)); + } } else { VLOG_WARN("%s: Tx TSO offload is not supported.", netdev_get_name(&dev->up)); @@ -5116,6 +5129,9 @@ netdev_dpdk_reconfigure(struct netdev *netdev) netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; + if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) { + netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; + } } dev->tx_q = netdev_dpdk_alloc_txq(netdev->n_txq); @@ -5258,6 +5274,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; + netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN | 1ULL << VIRTIO_NET_F_HOST_UFO; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 85f3a7367..432645601 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -924,6 +924,7 @@ netdev_linux_common_construct(struct netdev *netdev_) netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM; netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM; + netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM; netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM; } diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 00677dc9d..6f509424b 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -41,7 +41,8 @@ enum netdev_ol_flags { NETDEV_TX_OFFLOAD_IPV4_CKSUM = 1 << 0, NETDEV_TX_OFFLOAD_TCP_CKSUM = 1 << 1, NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2, - NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 3, + NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3, + NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4, }; /* A network device (e.g. an Ethernet device). diff --git a/lib/netdev.c b/lib/netdev.c index a55f77961..8c44eee8e 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -814,6 +814,12 @@ netdev_send_prepare_packet(const uint64_t netdev_flags, VLOG_ERR_BUF(errormsg, "No UDP checksum support"); return false; } + } else if (dp_packet_hwol_l4_is_sctp(packet)) { + if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) { + /* Fall back to SCTP csum in software. */ + VLOG_ERR_BUF(errormsg, "No SCTP checksum support"); + return false; + } } else { VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64, l4_mask); -- GitLab From f7995da00b25a584e21af37d0d37fb819e3c3490 Mon Sep 17 00:00:00 2001 From: Yanqin Wei Date: Wed, 26 Feb 2020 12:46:36 +0800 Subject: [PATCH 029/432] dpif-netdev.at: Fix partial offloading test cases failure. Some partial offloading test cases are failing inconsistently. The root cause is that dummy netdev is assigned with "linux_tc" offloading API. dpif-netdev - partial hw offload - dummy dpif-netdev - partial hw offload - dummy-pmd dpif-netdev - partial hw offload with packet modifications - dummy dpif-netdev - partial hw offload with packet modifications - dummy-pmd This patch fixes this issue by changing 'options:ifindex=1' to some big value. It is a workaround to make "linux_tc" init flow api failure. All above cases can pass consistently after applying this patch. Suggested-by: Ilya Maximets Reviewed-by: Gavin Hu Reviewed-by: Lijian Zhang Signed-off-by: Yanqin Wei Signed-off-by: Ilya Maximets --- tests/dpif-netdev.at | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 0aeb4e788..12e468744 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -371,7 +371,7 @@ m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD], [AT_SETUP([dpif-netdev - partial hw offload - $1]) OVS_VSWITCHD_START( [add-port br0 p1 -- \ - set interface p1 type=$1 ofport_request=1 options:pstream=punix:$OVS_RUNDIR/p1.sock options:ifindex=1 -- \ + set interface p1 type=$1 ofport_request=1 options:pstream=punix:$OVS_RUNDIR/p1.sock options:ifindex=1100 -- \ set bridge br0 datapath-type=dummy \ other-config:datapath-id=1234 fail-mode=secure], [], [], [m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])]) @@ -434,7 +434,7 @@ m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS], [AT_SETUP([dpif-netdev - partial hw offload with packet modifications - $1]) OVS_VSWITCHD_START( [add-port br0 p1 -- \ - set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1 -- \ + set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1101 -- \ set bridge br0 datapath-type=dummy \ other-config:datapath-id=1234 fail-mode=secure], [], [], [m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])]) -- GitLab From 1baa102abb4be3d32e36ee9ea7f8aaae94b8e562 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Tue, 18 Feb 2020 13:49:12 +0000 Subject: [PATCH 030/432] dpif-netdev.at: VLAN id modification test for ARP partial HW offloading. Follow up to commit eb540c0f5fc8 ("flow: Fix parsing l3_ofs with partial offloading") that fixed the issue, add a unit-test for it. Signed-off-by: Eli Britstein Reviewed-by: Roi Dayan Signed-off-by: Ilya Maximets --- tests/dpif-netdev.at | 77 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 12e468744..9c0a42d00 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -506,3 +506,80 @@ udp,in_port=ANY,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09: DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS([dummy]) DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS([dummy-pmd]) + +m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP], + [AT_SETUP([dpif-netdev - partial hw offload with arp vlan id packet modifications - $1]) + OVS_VSWITCHD_START( + [add-port br0 p1 -- \ + set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1102 -- \ + set bridge br0 datapath-type=dummy \ + other-config:datapath-id=1234 fail-mode=secure], [], [], + [m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])]) + AT_CHECK([ovs-appctl vlog/set dpif:file:dbg dpif_netdev:file:dbg netdev_dummy:file:dbg]) + + AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=true]) + OVS_WAIT_UNTIL([grep "netdev: Flow API Enabled" ovs-vswitchd.log]) + + AT_CHECK([ovs-ofctl del-flows br0]) + + # Setting flow to modify vlan id with arp packet to be sure that + # offloaded packets has correctly initialized l3 offset. + AT_CHECK([ovs-ofctl add-flow br0 in_port=1,arp,dl_vlan=99,actions=mod_vlan_vid=11,output:IN_PORT]) + + packet="packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806),arp(sip=127.0.0.1,tip=127.0.0.1,op=1,sha=00:0b:0c:0d:0e:0f,tha=00:00:00:00:00:00))" + AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet --len 64], [0]) + + OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log]) + AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl +skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),dnl +packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806),arp(sip=127.0.0.1,tip=127.0.0.1,op=1,sha=00:0b:0c:0d:0e:0f,tha=00:00:00:00:00:00)) +]) + # Check that flow successfully offloaded. + OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) + AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl +p1: flow put[[create]]: flow match: recirc_id=0,eth,arp,in_port=1,dl_vlan=99,dl_vlan_pcp=7, mark: 0 +]) + # Check that datapath flow installed successfully. + AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806)), actions: +]) + # Inject the same packet again. + AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet --len 64], [0]) + + # Check for succesfull packet matching with installed offloaded flow. + AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl +p1: packet: arp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 dnl +matches with flow: recirc_id=0,eth,arp,dl_vlan=99,dl_vlan_pcp=7 with mark: 0 +]) + + ovs-appctl revalidator/wait + # Dump the datapath flow to see that actions was executed for a packet. + AT_CHECK([ovs-appctl dpif/dump-flows br0 | strip_timers], [0], [dnl +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806)), dnl +packets:1, bytes:64, used:0.0s, actions:pop_vlan,push_vlan(vid=11,pcp=7),1 +]) + + # Wait for datapath flow expiration. + ovs-appctl time/stop + ovs-appctl time/warp 15000 + ovs-appctl revalidator/wait + + # Check that flow successfully deleted from HW. + OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log]) + AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl +p1: flow del: mark: 0 +]) + + # Check that VLAN ID was correctly modified in output packets. + AT_CHECK([ovs-ofctl parse-pcap p1.pcap], [0], [dnl +arp,in_port=ANY,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 +arp,in_port=ANY,dl_vlan=11,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 +arp,in_port=ANY,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 +arp,in_port=ANY,dl_vlan=11,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 +]) + + OVS_VSWITCHD_STOP + AT_CLEANUP]) + +DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy]) +DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy-pmd]) -- GitLab From bae24b4ecf4a8d85aa09245fea283a04134165fc Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 28 Feb 2020 10:54:02 -0800 Subject: [PATCH 031/432] ovs-vswitchd: Fix typo in manpage. Reviewed-by: Greg Rose Signed-off-by: Ben Pfaff --- vswitchd/ovs-vswitchd.8.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index ac66ed7bb..0ad8bd2bc 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -107,7 +107,7 @@ how to configure Open vSwitch. .SS "GENERAL COMMANDS" .IP "\fBexit\fR \fI--cleanup\fR" Causes \fBovs\-vswitchd\fR to gracefully terminate. If \fI--cleanup\fR -is specified, deletes flows from datpaths and releases other datapath +is specified, deletes flows from datapaths and releases other datapath resources configured by \fBovs\-vswitchd\fR. Otherwise, datapath flows and other resources remains undeleted. Resources of datapaths that are integrated into \fBovs\-vswitchd\fR (e.g. the \fBnetdev\fR -- GitLab From 7cc77b301f80a63cd4893198d82be0eef303f731 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 28 Feb 2020 09:15:32 -0800 Subject: [PATCH 032/432] ofproto-dpif: Only delete tunnel backer ports along with the dpif. The admin can choose whether or not to delete flows from datapaths when they stop ovs-vswitchd. The goal of not deleting flows it to allow existing traffic to continue being forwarded until ovs-vswitchd is restarted. Until now, regardless of this choice, ovs-vswitchd has always deleted tunnel ports from the datapath. When flows are not deleted, this nevertheless prevents tunnel traffic from being forwarded. With this patch, ovs-vswitchd no longer deletes tunnel ports in the case where it does not delete flows, allowing tunnel traffic to continue being forwarded. Reported-by: Antonin Bas Tested-by: Antonin Bas Tested-by: txfh2007 Signed-off-by: Ben Pfaff --- ofproto/ofproto-dpif.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 0222ec82f..d56cece95 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -698,8 +698,10 @@ close_dpif_backer(struct dpif_backer *backer, bool del) udpif_destroy(backer->udpif); - SIMAP_FOR_EACH (node, &backer->tnl_backers) { - dpif_port_del(backer->dpif, u32_to_odp(node->data), false); + if (del) { + SIMAP_FOR_EACH (node, &backer->tnl_backers) { + dpif_port_del(backer->dpif, u32_to_odp(node->data), false); + } } simap_destroy(&backer->tnl_backers); ovs_rwlock_destroy(&backer->odp_to_ofport_lock); -- GitLab From 6211ad57089e16fe0c84cf5ba0f6a03b4df3ceb8 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Sat, 29 Feb 2020 20:29:35 -0300 Subject: [PATCH 033/432] netdev-linux: Enable TSO in the TAP device. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use ioctl TUNSETOFFLOAD if kernel supports to enable TSO offloading in the tap device. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Reported-by: "Yi Yang (杨�D)-云服务集团" Tested-by: William Tu Signed-off-by: Flavio Leitner Signed-off-by: William Tu --- lib/netdev-linux.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 432645601..c6e46f188 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -1012,6 +1012,23 @@ netdev_linux_construct_tap(struct netdev *netdev_) goto error_close; } + if (userspace_tso_enabled()) { + /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is + * available, it will return EINVAL when a flag is unknown. + * Therefore, try enabling offload with no flags to check + * if TUNSETOFFLOAD support is available or not. */ + if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) { + unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6; + + if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) { + VLOG_WARN("%s: enabling tap offloading failed: %s", name, + ovs_strerror(errno)); + error = errno; + goto error_close; + } + } + } + netdev->present = true; return 0; -- GitLab From ffbe63cd4702ffa9fc7cd97f69ba6fbba308b0b3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 3 Mar 2020 10:37:16 -0800 Subject: [PATCH 034/432] packets: Fix typo in comment. Acked-by: Han Zhou Reported-by: Toms Atteka Signed-off-by: Ben Pfaff --- lib/packets.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/packets.h b/lib/packets.h index 5d7f82c45..4c1e91dee 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -963,7 +963,7 @@ union ovs_16aligned_in6_addr { ovs_16aligned_be32 be32[4]; }; -/* Like struct in6_hdr, but whereas that struct requires 32-bit alignment, this +/* Like struct ip6_hdr, but whereas that struct requires 32-bit alignment, this * one only requires 16-bit alignment. */ struct ovs_16aligned_ip6_hdr { union { -- GitLab From 704ae35726cbc6e4bd1d3c68b0fe30aa5af45fb8 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Mon, 2 Mar 2020 11:05:06 -0500 Subject: [PATCH 035/432] ovs-dpctl-top: python3 compatibility During the transition to python3 support, some syntax errors weren't adequately cleaned. This addresses the various errors, plus one minor issue with string type conversion. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1809184 Tested-by: Flavio Leitner Acked-by: Flavio Leitner Signed-off-by: Aaron Conole Signed-off-by: Ben Pfaff --- utilities/ovs-dpctl-top.in | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/utilities/ovs-dpctl-top.in b/utilities/ovs-dpctl-top.in index f2cc3f7f2..011cc64b7 100755 --- a/utilities/ovs-dpctl-top.in +++ b/utilities/ovs-dpctl-top.in @@ -592,7 +592,7 @@ def flows_read(ihdl, flow_db): try: flow_db.flow_line_add(line) - except ValueError, arg: + except ValueError as arg: logging.error(arg) return flow_db @@ -958,6 +958,9 @@ class FlowDB: change order of fields of the same flow. """ + if not isinstance(line, str): + line = str(line) + line = line.rstrip("\n") (fields, stats, _) = flow_line_split(line) @@ -988,7 +991,7 @@ class FlowDB: self.flow_event(fields_dict, stats_old_dict, stats_dict) - except ValueError, arg: + except ValueError as arg: logging.error(arg) self._error_count += 1 raise @@ -1192,7 +1195,7 @@ def flows_top(args): flows_read(ihdl, flow_db) finally: ihdl.close() - except OSError, arg: + except OSError as arg: logging.critical(arg) break @@ -1220,7 +1223,7 @@ def flows_top(args): # repeat output for (count, line) in lines: - print line + print(line) def flows_script(args): @@ -1249,7 +1252,7 @@ def flows_script(args): render = Render(console_width, Render.FIELD_SELECT_SCRIPT) for line in render.format(flow_db): - print line + print(line) def main(): -- GitLab From 4cf89cb0742419cda7b290816fbf3b9f6edfa1fc Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 28 Feb 2020 17:33:07 +0100 Subject: [PATCH 036/432] dpdk: Remove deprecated pdump support. DPDK pdump was deprecated in 2.13 release and didn't actually work since 2.11. Removing it. More details in commit 4ae8c4617fd3 ("dpdk: Deprecate pdump support.") Acked-by: Aaron Conole Acked-by: David Marchand Acked-by: Ian Stokes Signed-off-by: Ilya Maximets --- .travis/linux-build.sh | 6 --- Documentation/automake.mk | 1 - Documentation/topics/dpdk/index.rst | 1 - Documentation/topics/dpdk/pdump.rst | 74 ----------------------------- NEWS | 2 + acinclude.m4 | 19 -------- lib/dpdk.c | 12 ----- 7 files changed, 2 insertions(+), 113 deletions(-) delete mode 100644 Documentation/topics/dpdk/pdump.rst diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index bb47b3ee1..359f7773b 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -124,10 +124,6 @@ function install_dpdk() sed -i '/CONFIG_RTE_EAL_IGB_UIO=y/s/=y/=n/' build/.config sed -i '/CONFIG_RTE_KNI_KMOD=y/s/=y/=n/' build/.config - # Enable pdump support in DPDK. - sed -i '/CONFIG_RTE_LIBRTE_PMD_PCAP=n/s/=n/=y/' build/.config - sed -i '/CONFIG_RTE_LIBRTE_PDUMP=n/s/=n/=y/' build/.config - make -j4 CC=gcc EXTRA_CFLAGS='-fPIC' EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build" echo "Installed DPDK source in $(pwd)" @@ -168,8 +164,6 @@ if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then DPDK_VER="19.11" fi install_dpdk $DPDK_VER - # Enable pdump support in OVS. - EXTRA_OPTS="${EXTRA_OPTS} --enable-dpdk-pdump" if [ "$CC" = "clang" ]; then # Disregard cast alignment errors until DPDK is fixed CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -Wno-cast-align" diff --git a/Documentation/automake.mk b/Documentation/automake.mk index 22976a3cd..691f345ec 100644 --- a/Documentation/automake.mk +++ b/Documentation/automake.mk @@ -36,7 +36,6 @@ DOC_SOURCE = \ Documentation/topics/dpdk/bridge.rst \ Documentation/topics/dpdk/jumbo-frames.rst \ Documentation/topics/dpdk/memory.rst \ - Documentation/topics/dpdk/pdump.rst \ Documentation/topics/dpdk/phy.rst \ Documentation/topics/dpdk/pmd.rst \ Documentation/topics/dpdk/qos.rst \ diff --git a/Documentation/topics/dpdk/index.rst b/Documentation/topics/dpdk/index.rst index f2862ea70..336dcc56b 100644 --- a/Documentation/topics/dpdk/index.rst +++ b/Documentation/topics/dpdk/index.rst @@ -38,6 +38,5 @@ DPDK Support /topics/dpdk/vdev /topics/dpdk/pmd /topics/dpdk/qos - /topics/dpdk/pdump /topics/dpdk/jumbo-frames /topics/dpdk/memory diff --git a/Documentation/topics/dpdk/pdump.rst b/Documentation/topics/dpdk/pdump.rst deleted file mode 100644 index ce03b327a..000000000 --- a/Documentation/topics/dpdk/pdump.rst +++ /dev/null @@ -1,74 +0,0 @@ -.. - Licensed under the Apache License, Version 2.0 (the "License"); you may - not use this file except in compliance with the License. You may obtain - a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - License for the specific language governing permissions and limitations - under the License. - - Convention for heading levels in Open vSwitch documentation: - - ======= Heading 0 (reserved for the title in a document) - ------- Heading 1 - ~~~~~~~ Heading 2 - +++++++ Heading 3 - ''''''' Heading 4 - - Avoid deeper levels because they do not render well. - -===== -pdump -===== - -.. versionadded:: 2.6.0 - -.. warning:: - - DPDK pdump support is deprecated in OVS and will be removed in next - releases. - -pdump allows you to listen on DPDK ports and view the traffic that is passing -on them. To use this utility, one must have libpcap installed on the system. -Furthermore, DPDK must be built with ``CONFIG_RTE_LIBRTE_PDUMP=y`` and -``CONFIG_RTE_LIBRTE_PMD_PCAP=y``. OVS should be built with -``--enable-dpdk-pdump`` configuration option. - -.. warning:: - - A performance decrease is expected when using a monitoring application like - the DPDK pdump app. - -To use pdump, simply launch OVS as usual, then navigate to the ``app/pdump`` -directory in DPDK, ``make`` the application and run like so:: - - $ sudo ./build/app/dpdk-pdump -- \ - --pdump port=0,queue=0,rx-dev=/tmp/pkts.pcap - -The above command captures traffic received on queue 0 of port 0 and stores it -in ``/tmp/pkts.pcap``. Other combinations of port numbers, queues numbers and -pcap locations are of course also available to use. For example, to capture all -packets that traverse port 0 in a single pcap file:: - - $ sudo ./build/app/dpdk-pdump -- \ - --pdump 'port=0,queue=*,rx-dev=/tmp/pkts.pcap,tx-dev=/tmp/pkts.pcap' - -.. note:: - - ``XDG_RUNTIME_DIR`` environment variable might need to be adjusted to - OVS runtime directory (``/var/run/openvswitch`` in most cases) for - ``dpdk-pdump`` utility if OVS started by non-root user. - -Many tools are available to view the contents of the pcap file. Once example is -tcpdump. Issue the following command to view the contents of ``pkts.pcap``:: - - $ tcpdump -r pkts.pcap - -More information on the pdump app and its usage can be found in the `DPDK -documentation`__. - -__ http://dpdk.org/doc/guides/tools/pdump.html diff --git a/NEWS b/NEWS index f62ef1f47..c58a9014e 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,8 @@ Post-v2.13.0 - OpenFlow: * The OpenFlow ofp_desc/serial_num may now be configured by setting the value of other-config:dp-sn in the Bridge table. + - DPDK: + * Deprecated DPDK pdump packet capture support removed. v2.13.0 - 14 Feb 2020 diff --git a/acinclude.m4 b/acinclude.m4 index 1212a463e..9338af947 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -359,25 +359,6 @@ AC_DEFUN([OVS_CHECK_DPDK], [ AC_DEFINE([VHOST_NUMA], [1], [NUMA Aware vHost support detected in DPDK.]) ], [], [[#include ]]) - AC_MSG_CHECKING([whether DPDK pdump support is enabled]) - AC_ARG_ENABLE( - [dpdk-pdump], - [AC_HELP_STRING([--enable-dpdk-pdump], - [Enable DPDK pdump packet capture support])], - [AC_MSG_RESULT([yes]) - AC_MSG_WARN([DPDK pdump is deprecated, consider using ovs-tcpdump instead]) - AC_CHECK_DECL([RTE_LIBRTE_PMD_PCAP], [ - OVS_FIND_DEPENDENCY([pcap_dump], [pcap], [libpcap]) - AC_CHECK_DECL([RTE_LIBRTE_PDUMP], [ - AC_DEFINE([DPDK_PDUMP], [1], [DPDK pdump enabled in OVS.]) - ], [ - AC_MSG_ERROR([RTE_LIBRTE_PDUMP is not defined in rte_config.h]) - ], [[#include ]]) - ], [ - AC_MSG_ERROR([RTE_LIBRTE_PMD_PCAP is not defined in rte_config.h]) - ], [[#include ]])], - [AC_MSG_RESULT([no])]) - AC_CHECK_DECL([RTE_LIBRTE_MLX5_PMD], [dnl found OVS_FIND_DEPENDENCY([mnl_attr_put], [mnl], [libmnl]) AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found diff --git a/lib/dpdk.c b/lib/dpdk.c index 37ea2973c..31450d470 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -26,9 +26,6 @@ #include #include #include -#ifdef DPDK_PDUMP -#include -#endif #include "dirs.h" #include "fatal-signal.h" @@ -431,15 +428,6 @@ dpdk_init__(const struct smap *ovs_other_config) /* We are called from the main thread here */ RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; -#ifdef DPDK_PDUMP - VLOG_WARN("DPDK pdump support is deprecated and " - "will be removed in next OVS releases."); - err = rte_pdump_init(); - if (err) { - VLOG_INFO("Error initialising DPDK pdump"); - } -#endif - /* Finally, register the dpdk classes */ netdev_dpdk_register(); netdev_register_flow_api_provider(&netdev_offload_dpdk); -- GitLab From 82c9d9993d47f8990af23721c049f65369177dfa Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 28 Feb 2020 17:56:46 +0100 Subject: [PATCH 037/432] netdev-dpdk: Remove deprecated ring port type. 'dpdkr' ring ports was deprecated in 2.13 release and was not actually used for a long time. Remove support now. More details in commit b4c5f00c339b ("netdev-dpdk: Deprecate ring ports.") Acked-by: Aaron Conole Acked-by: David Marchand Acked-by: Ian Stokes Signed-off-by: Ilya Maximets --- Documentation/automake.mk | 1 - Documentation/topics/dpdk/index.rst | 1 - Documentation/topics/dpdk/ring.rst | 92 -------- NEWS | 1 + lib/netdev-dpdk.c | 189 ----------------- rhel/README.RHEL.rst | 3 - rhel/etc_sysconfig_network-scripts_ifdown-ovs | 2 +- rhel/etc_sysconfig_network-scripts_ifup-ovs | 7 - tests/.gitignore | 1 - tests/automake.mk | 7 - tests/dpdk/ring_client.c | 200 ------------------ 11 files changed, 2 insertions(+), 502 deletions(-) delete mode 100644 Documentation/topics/dpdk/ring.rst delete mode 100644 tests/dpdk/ring_client.c diff --git a/Documentation/automake.mk b/Documentation/automake.mk index 691f345ec..f85c4320e 100644 --- a/Documentation/automake.mk +++ b/Documentation/automake.mk @@ -39,7 +39,6 @@ DOC_SOURCE = \ Documentation/topics/dpdk/phy.rst \ Documentation/topics/dpdk/pmd.rst \ Documentation/topics/dpdk/qos.rst \ - Documentation/topics/dpdk/ring.rst \ Documentation/topics/dpdk/vdev.rst \ Documentation/topics/dpdk/vhost-user.rst \ Documentation/topics/fuzzing/index.rst \ diff --git a/Documentation/topics/dpdk/index.rst b/Documentation/topics/dpdk/index.rst index 336dcc56b..a5be5e344 100644 --- a/Documentation/topics/dpdk/index.rst +++ b/Documentation/topics/dpdk/index.rst @@ -34,7 +34,6 @@ DPDK Support /topics/dpdk/bridge /topics/dpdk/phy /topics/dpdk/vhost-user - /topics/dpdk/ring /topics/dpdk/vdev /topics/dpdk/pmd /topics/dpdk/qos diff --git a/Documentation/topics/dpdk/ring.rst b/Documentation/topics/dpdk/ring.rst deleted file mode 100644 index 9d91498c7..000000000 --- a/Documentation/topics/dpdk/ring.rst +++ /dev/null @@ -1,92 +0,0 @@ -.. - Licensed under the Apache License, Version 2.0 (the "License"); you may - not use this file except in compliance with the License. You may obtain - a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - License for the specific language governing permissions and limitations - under the License. - - Convention for heading levels in Open vSwitch documentation: - - ======= Heading 0 (reserved for the title in a document) - ------- Heading 1 - ~~~~~~~ Heading 2 - +++++++ Heading 3 - ''''''' Heading 4 - - Avoid deeper levels because they do not render well. - -=============== -DPDK Ring Ports -=============== - -.. warning:: - - DPDK ring ports are considered *deprecated*. Please migrate to - virtio-based interfaces, e.g. :doc:`vhost-user ` ports, - ``net_virtio_user`` :doc:`DPDK vdev `. - -.. warning:: - - DPDK ring interfaces cannot be used for guest communication and are retained - mainly for backwards compatibility purposes. In nearly all cases, - :doc:`vhost-user ports ` are a better choice and should be used - instead. - -OVS userspace switching supports ring ports implemented using DPDK's -``librte_ring`` library. For more information on this library, refer -to the `DPDK documentation`_. - -.. important:: - - To use any DPDK-backed interface, you must ensure your bridge is configured - correctly. For more information, refer to :doc:`bridge`. - -Quick Example -------------- - -This example demonstrates how to add a ``dpdkr`` port to an existing bridge -called ``br0``:: - - $ ovs-vsctl add-port br0 dpdkr0 -- set Interface dpdkr0 type=dpdkr - -dpdkr ------ - -To use ring ports, you must first add said ports to the switch. Unlike -:doc:`vhost-user ports `, ring port names must take a specific -format, ``dpdkrNN``, where ``NN`` is the port ID. For example:: - - $ ovs-vsctl add-port br0 dpdkr0 -- set Interface dpdkr0 type=dpdkr - -Once the port has been added to the switch, they can be used by host processes. -A sample loopback application - ``test-dpdkr`` - is included with Open vSwitch. -To use this, run the following:: - - $ ./tests/test-dpdkr -c 1 -n 4 --proc-type=secondary -- -n 0 - -Further functionality would require developing your own application. Refer to -the `DPDK documentation`_ for more information on how to do this. - -Adding dpdkr ports to the guest -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -It is **not** recommended to use ring ports from guests. Historically, this was -possible using a patched version of QEMU and the IVSHMEM feature provided with -DPDK. However, this functionality was removed because: - -- The IVSHMEM library was removed from DPDK in DPDK 16.11 - -- Support for IVSHMEM was never upstreamed to QEMU and has been publicly - rejected by the QEMU community - -- :doc:`vhost-user interfaces ` are the de facto DPDK-based path to - guests - -.. _DPDK documentation: - https://doc.dpdk.org/guides-19.11/prog_guide/ring_lib.html diff --git a/NEWS b/NEWS index c58a9014e..8710a0233 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,7 @@ Post-v2.13.0 value of other-config:dp-sn in the Bridge table. - DPDK: * Deprecated DPDK pdump packet capture support removed. + * Deprecated DPDK ring ports (dpdkr) are no longer supported. v2.13.0 - 14 Feb 2020 diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 7ab81864d..44ebf96da 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include @@ -216,10 +215,6 @@ struct netdev_dpdk_sw_stats { uint64_t tx_invalid_hwol_drops; }; -enum { DPDK_RING_SIZE = 256 }; -BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE)); -enum { DRAIN_TSC = 200000ULL }; - enum dpdk_dev_type { DPDK_DEV_ETH = 0, DPDK_DEV_VHOST = 1, @@ -397,22 +392,6 @@ struct dpdk_tx_queue { ); }; -/* dpdk has no way to remove dpdk ring ethernet devices - so we have to keep them around once they've been created -*/ - -static struct ovs_list dpdk_ring_list OVS_GUARDED_BY(dpdk_mutex) - = OVS_LIST_INITIALIZER(&dpdk_ring_list); - -struct dpdk_ring { - /* For the client rings */ - struct rte_ring *cring_tx; - struct rte_ring *cring_rx; - unsigned int user_port_id; /* User given port no, parsed from port name */ - dpdk_port_t eth_port_id; /* ethernet device port id */ - struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex); -}; - struct ingress_policer { struct rte_meter_srtcm_params app_srtcm_params; struct rte_meter_srtcm in_policer; @@ -1299,27 +1278,6 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, return 0; } -/* dev_name must be the prefix followed by a positive decimal number. - * (no leading + or - signs are allowed) */ -static int -dpdk_dev_parse_name(const char dev_name[], const char prefix[], - unsigned int *port_no) -{ - const char *cport; - - if (strncmp(dev_name, prefix, strlen(prefix))) { - return ENODEV; - } - - cport = dev_name + strlen(prefix); - - if (str_to_uint(cport, 10, port_no)) { - return 0; - } else { - return ENODEV; - } -} - /* Get the number of OVS interfaces which have the same DPDK * rte device (e.g. same pci bus address). * FIXME: avoid direct access to DPDK internal array rte_eth_devices. @@ -2059,19 +2017,6 @@ out: return err; } -static int -netdev_dpdk_ring_set_config(struct netdev *netdev, const struct smap *args, - char **errp OVS_UNUSED) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - - ovs_mutex_lock(&dev->mutex); - dpdk_set_rxq_config(dev, args); - ovs_mutex_unlock(&dev->mutex); - - return 0; -} - static int netdev_dpdk_vhost_client_set_config(struct netdev *netdev, const struct smap *args, @@ -4257,131 +4202,6 @@ netdev_dpdk_class_init(void) return 0; } -/* Client Rings */ - -static int -dpdk_ring_create(const char dev_name[], unsigned int port_no, - dpdk_port_t *eth_port_id) -{ - struct dpdk_ring *ring_pair; - char *ring_name; - int port_id; - - ring_pair = dpdk_rte_mzalloc(sizeof *ring_pair); - if (!ring_pair) { - return ENOMEM; - } - - /* XXX: Add support for multiquque ring. */ - ring_name = xasprintf("%s_tx", dev_name); - - /* Create single producer tx ring, netdev does explicit locking. */ - ring_pair->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0, - RING_F_SP_ENQ); - free(ring_name); - if (ring_pair->cring_tx == NULL) { - rte_free(ring_pair); - return ENOMEM; - } - - ring_name = xasprintf("%s_rx", dev_name); - - /* Create single consumer rx ring, netdev does explicit locking. */ - ring_pair->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0, - RING_F_SC_DEQ); - free(ring_name); - if (ring_pair->cring_rx == NULL) { - rte_free(ring_pair); - return ENOMEM; - } - - port_id = rte_eth_from_rings(dev_name, &ring_pair->cring_rx, 1, - &ring_pair->cring_tx, 1, SOCKET0); - - if (port_id < 0) { - rte_free(ring_pair); - return ENODEV; - } - - ring_pair->user_port_id = port_no; - ring_pair->eth_port_id = port_id; - *eth_port_id = port_id; - - ovs_list_push_back(&dpdk_ring_list, &ring_pair->list_node); - - return 0; -} - -static int -dpdk_ring_open(const char dev_name[], dpdk_port_t *eth_port_id) - OVS_REQUIRES(dpdk_mutex) -{ - struct dpdk_ring *ring_pair; - unsigned int port_no; - int err = 0; - - /* Names always start with "dpdkr" */ - err = dpdk_dev_parse_name(dev_name, "dpdkr", &port_no); - if (err) { - return err; - } - - /* Look through our list to find the device */ - LIST_FOR_EACH (ring_pair, list_node, &dpdk_ring_list) { - if (ring_pair->user_port_id == port_no) { - VLOG_INFO("Found dpdk ring device %s:", dev_name); - /* Really all that is needed */ - *eth_port_id = ring_pair->eth_port_id; - return 0; - } - } - /* Need to create the device rings */ - return dpdk_ring_create(dev_name, port_no, eth_port_id); -} - -static int -netdev_dpdk_ring_send(struct netdev *netdev, int qid, - struct dp_packet_batch *batch, bool concurrent_txq) -{ - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); - struct dp_packet *packet; - - /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that - * the offload fields are clear. This is because the same mbuf may be - * modified by the consumer of the ring and return into the datapath - * without recalculating the RSS hash or revalidating the checksums. */ - DP_PACKET_BATCH_FOR_EACH (i, packet, batch) { - dp_packet_reset_offload(packet); - } - - netdev_dpdk_send__(dev, qid, batch, concurrent_txq); - return 0; -} - -static int -netdev_dpdk_ring_construct(struct netdev *netdev) -{ - dpdk_port_t port_no = 0; - int err = 0; - - VLOG_WARN_ONCE("dpdkr a.k.a. ring ports are considered deprecated. " - "Please migrate to virtio-based interfaces, e.g. " - "dpdkvhostuserclient ports, net_virtio_user DPDK vdev."); - - ovs_mutex_lock(&dpdk_mutex); - - err = dpdk_ring_open(netdev->name, &port_no); - if (err) { - goto unlock_dpdk; - } - - err = common_construct(netdev, port_no, DPDK_DEV_ETH, - rte_eth_dev_socket_id(port_no)); -unlock_dpdk: - ovs_mutex_unlock(&dpdk_mutex); - return err; -} - /* QoS Functions */ /* @@ -5459,14 +5279,6 @@ static const struct netdev_class dpdk_class = { .send = netdev_dpdk_eth_send, }; -static const struct netdev_class dpdk_ring_class = { - .type = "dpdkr", - NETDEV_DPDK_CLASS_BASE, - .construct = netdev_dpdk_ring_construct, - .set_config = netdev_dpdk_ring_set_config, - .send = netdev_dpdk_ring_send, -}; - static const struct netdev_class dpdk_vhost_class = { .type = "dpdkvhostuser", NETDEV_DPDK_CLASS_COMMON, @@ -5502,7 +5314,6 @@ void netdev_dpdk_register(void) { netdev_register_provider(&dpdk_class); - netdev_register_provider(&dpdk_ring_class); netdev_register_provider(&dpdk_vhost_class); netdev_register_provider(&dpdk_vhost_client_class); } diff --git a/rhel/README.RHEL.rst b/rhel/README.RHEL.rst index 1cd2065ef..98175dfd3 100644 --- a/rhel/README.RHEL.rst +++ b/rhel/README.RHEL.rst @@ -36,9 +36,6 @@ TYPE * ``OVSDPDKPort``, if ```` is a physical DPDK NIC port (name must start with ``dpdk`` and end with portid, eg ``dpdk0``) - * ``OVSDPDKRPort``, if ```` is a DPDK ring port (name must start with - ``dpdkr`` and end with portid, e.g. ``dpdkr0``) - * ``OVSDPDKVhostUserPort`` if ```` is a DPDK vhost-user port * ``OVSDPDKBond`` if ```` is an OVS DPDK bond. diff --git a/rhel/etc_sysconfig_network-scripts_ifdown-ovs b/rhel/etc_sysconfig_network-scripts_ifdown-ovs index 63d048b22..343ac0945 100755 --- a/rhel/etc_sysconfig_network-scripts_ifdown-ovs +++ b/rhel/etc_sysconfig_network-scripts_ifdown-ovs @@ -59,7 +59,7 @@ case "$TYPE" in OVSPatchPort|OVSTunnel) ovs-vsctl -t ${TIMEOUT} -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE" ;; - OVSDPDKPort|OVSDPDKRPort|OVSDPDKVhostUserPort|OVSDPDKBond) + OVSDPDKPort|OVSDPDKVhostUserPort|OVSDPDKBond) ovs-vsctl -t ${TIMEOUT} -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE" ;; *) diff --git a/rhel/etc_sysconfig_network-scripts_ifup-ovs b/rhel/etc_sysconfig_network-scripts_ifup-ovs index b01461cc4..0955c0e1f 100755 --- a/rhel/etc_sysconfig_network-scripts_ifup-ovs +++ b/rhel/etc_sysconfig_network-scripts_ifup-ovs @@ -180,13 +180,6 @@ case "$TYPE" in ${OTHERSCRIPT} "$OVS_BRIDGE" fi ;; - OVSDPDKRPort) - ifup_ovs_bridge - ovs-vsctl -t ${TIMEOUT} \ - -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE" \ - -- add-port "$OVS_BRIDGE" "$DEVICE" $OVS_OPTIONS \ - -- set Interface "$DEVICE" type=dpdkr ${OVS_EXTRA+-- $OVS_EXTRA} - ;; OVSDPDKVhostUserPort) ifup_ovs_bridge PORT_TYPE="dpdkvhostuser" diff --git a/tests/.gitignore b/tests/.gitignore index c5abb32d0..99fdf70d5 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -7,7 +7,6 @@ /idltest.h /idltest.ovsidl /ovstest -/test-dpdkr /ovs-pki.log /ovsdb-cluster-testsuite /ovsdb-cluster-testsuite.dir/ diff --git a/tests/automake.mk b/tests/automake.mk index 9c7ebdce9..81eb2a9b8 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -414,13 +414,6 @@ tests/idltest.ovsidl: $(IDLTEST_IDL_FILES) tests/idltest.c: tests/idltest.h -if DPDK_NETDEV -noinst_PROGRAMS += tests/test-dpdkr -tests_test_dpdkr_SOURCES = \ - tests/dpdk/ring_client.c -tests_test_dpdkr_LDADD = lib/libopenvswitch.la $(LIBS) -endif - noinst_PROGRAMS += tests/ovstest tests_ovstest_SOURCES = \ tests/ovstest.c \ diff --git a/tests/dpdk/ring_client.c b/tests/dpdk/ring_client.c deleted file mode 100644 index 8cc3fb533..000000000 --- a/tests/dpdk/ring_client.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * BSD LICENSE - * - * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "util.h" - -/* Number of packets to attempt to read from queue. */ -#define PKT_READ_SIZE ((uint16_t)32) - -/* Define common names for structures shared between ovs_dpdk and client. */ -#define MP_CLIENT_RXQ_NAME "dpdkr%u_tx" -#define MP_CLIENT_TXQ_NAME "dpdkr%u_rx" - -#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1 - -/* Our client id number - tells us which rx queue to read, and tx - * queue to write to. - */ -static unsigned int client_id; - -/* - * Given the rx queue name template above, get the queue name. - */ -static inline const char * -get_rx_queue_name(unsigned int id) -{ - /* Buffer for return value. */ - static char buffer[RTE_RING_NAMESIZE]; - - snprintf(buffer, sizeof(buffer), MP_CLIENT_RXQ_NAME, id); - return buffer; -} - -/* - * Given the tx queue name template above, get the queue name. - */ -static inline const char * -get_tx_queue_name(unsigned int id) -{ - /* Buffer for return value. */ - static char buffer[RTE_RING_NAMESIZE]; - - snprintf(buffer, sizeof(buffer), MP_CLIENT_TXQ_NAME, id); - return buffer; -} - -/* - * Print a usage message. - */ -static void -usage(const char *progname) -{ - printf("\nUsage: %s [EAL args] -- -n \n", progname); -} - -/* - * Convert the client id number from a string to an usigned int. - */ -static int -parse_client_num(const char *client) -{ - if (str_to_uint(client, 10, &client_id)) { - return 0; - } else { - return -1; - } -} - -/* - * Parse the application arguments to the client app. - */ -static int -parse_app_args(int argc, char *argv[]) -{ - int option_index = 0, opt = 0; - char **argvopt = argv; - const char *progname = NULL; - static struct option lgopts[] = { - {NULL, 0, NULL, 0 } - }; - progname = argv[0]; - - while ((opt = getopt_long(argc, argvopt, "n:", lgopts, - &option_index)) != EOF) { - switch (opt) { - case 'n': - if (parse_client_num(optarg) != 0) { - usage(progname); - return -1; - } - break; - default: - usage(progname); - return -1; - } - } - - return 0; -} - -/* - * Application main function - loops through - * receiving and processing packets. Never returns - */ -int -main(int argc, char *argv[]) -{ - struct rte_ring *rx_ring = NULL; - struct rte_ring *tx_ring = NULL; - int retval = 0; - void *pkts[PKT_READ_SIZE]; - int rslt = 0; - - if ((retval = rte_eal_init(argc, argv)) < 0) { - return -1; - } - - argc -= retval; - argv += retval; - - if (parse_app_args(argc, argv) < 0) { - rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n"); - } - - rx_ring = rte_ring_lookup(get_rx_queue_name(client_id)); - if (rx_ring == NULL) { - rte_exit(EXIT_FAILURE, - "Cannot get RX ring - is server process running?\n"); - } - - tx_ring = rte_ring_lookup(get_tx_queue_name(client_id)); - if (tx_ring == NULL) { - rte_exit(EXIT_FAILURE, - "Cannot get TX ring - is server process running?\n"); - } - - RTE_LOG(INFO, APP, "Finished Process Init.\n"); - - printf("\nClient process %u handling packets\n", client_id); - printf("[Press Ctrl-C to quit ...]\n"); - - for (;;) { - unsigned rx_pkts = PKT_READ_SIZE; - - /* Try dequeuing max possible packets first, if that fails, get the - * most we can. Loop body should only execute once, maximum. - */ - while (unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, - rx_pkts, NULL) != 0) && rx_pkts > 0) { - rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE); - } - - if (rx_pkts > 0) { - /* blocking enqueue */ - do { - rslt = rte_ring_enqueue_bulk(tx_ring, pkts, rx_pkts, NULL); - } while (rslt == -ENOBUFS); - } - } -} -- GitLab From 492600a21fcfd43d275bd7261de451c98ca80f21 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 14 Feb 2020 16:18:50 -0800 Subject: [PATCH 038/432] release-process: Describe how to branch and how to make a release. Acked-by: Justin Pettit Signed-off-by: Ben Pfaff --- Documentation/internals/release-process.rst | 57 +++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst index 89c117724..acbfc0665 100644 --- a/Documentation/internals/release-process.rst +++ b/Documentation/internals/release-process.rst @@ -122,6 +122,63 @@ approximate: | T + 5.5 | Aug 15, Feb 15 | Release version x.y.0 | +---------------+----------------+--------------------------------------+ +How to Branch +------------- + +To branch "master" for the eventual release of OVS version x.y.0, +prepare two patches against master: + +1. "Prepare for x.y.0." following the model of commit 836d1973c56e + ("Prepare for 2.11.0."). + +2. "Prepare for post-x.y.0 (x.y.90)." following the model of commit + fe2870c574db ("Prepare for post-2.11.0 (2.11.90).") + +Post both patches to ovs-dev. Get them reviewed in the usual way. + +Apply both patches to master, and create branch-x.y by pushing only +the first patch. The following command illustrates how to do both of +these at once assuming the local repository HEAD points to the +"Prepare for post-x.y.0" commit: + + git push origin HEAD:master HEAD^:refs/heads/branch-x.y + +Branching should be announced on ovs-dev. + +How to Release +-------------- + +Follow these steps to release version x.y.z of OVS from branch-x.y. + +1. Prepare two patches against branch-x.y: + + a. "Set release date for x.y.z". For z = 0, follow the model of + commit d11f4cbbfe05 ("Set release date for 2.12.0."); for z > 0, + follow the model of commit 53d5c18118b0 ("Set release date for + 2.11.3."). + + b. "Prepare for x.y.(z+1)." following the model of commit + db02dd23e48a ("Prepare for 2.11.1."). + +3. Post the patches to ovs-dev. Get them reviewed in the usual way. + +4. Apply the patches to branch-x.y. + +5. If z = 0, apply the first patch (only) to master. + +6. Sign a tag vx.y.z "Open vSwitch version x.y.z" and push it to the + repo. + +7. Update http://www.openvswitch.org/download/. See commit + 31eaa72cafac ("Add 2.12.0 and older release announcements.") in the + website repo (https://github.com/openvswitch/openvswitch.github.io) + for an example. + +8. Consider updating the Wikipedia page for Open vSwitch at + https://en.wikipedia.org/wiki/Open_vSwitch + +9. Tweet. + Contact ------- -- GitLab From d57b89f3268dd918cf5800f2887f6e4dd6e3b967 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 6 Mar 2020 13:25:12 -0800 Subject: [PATCH 039/432] release-process: Fix indentation. Signed-off-by: Ben Pfaff --- Documentation/internals/release-process.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst index acbfc0665..63080caab 100644 --- a/Documentation/internals/release-process.rst +++ b/Documentation/internals/release-process.rst @@ -141,7 +141,7 @@ the first patch. The following command illustrates how to do both of these at once assuming the local repository HEAD points to the "Prepare for post-x.y.0" commit: - git push origin HEAD:master HEAD^:refs/heads/branch-x.y + git push origin HEAD:master HEAD^:refs/heads/branch-x.y Branching should be announced on ovs-dev. -- GitLab From 44810e6d411e36efc56e949fd28d804e9750633e Mon Sep 17 00:00:00 2001 From: Vishal Deep Ajmera Date: Mon, 3 Feb 2020 11:32:46 +0100 Subject: [PATCH 040/432] ofproto: Add support to watch controller port liveness in fast-failover group Currently fast-failover group does not support checking liveness of controller port (OFPP_CONTROLLER). However this feature can be useful for selecting alternate pipeline when controller connection itself is down for e.g. by using local DHCP server to reply for any DHCP request originating from VMs. This patch adds the support for watching controller port liveness in fast- failover group. Controller port is considered live when atleast one of-connection is alive. Example usage: ovs-ofctl add-group br-int 'group_id=1234,type=ff, bucket=watch_port:CONTROLLER,actions:, bucket=watch_port:1,actions: Signed-off-by: Vishal Deep Ajmera Signed-off-by: Ben Pfaff --- NEWS | 1 + lib/ofp-group.c | 3 ++- ofproto/ofproto-dpif-xlate.c | 5 ++++- ofproto/ofproto-dpif.c | 10 ++++++++++ ofproto/ofproto-dpif.h | 3 +++ ofproto/ofproto.c | 3 ++- 6 files changed, 22 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 8710a0233..872e1863c 100644 --- a/NEWS +++ b/NEWS @@ -4,6 +4,7 @@ Post-v2.13.0 - OpenFlow: * The OpenFlow ofp_desc/serial_num may now be configured by setting the value of other-config:dp-sn in the Bridge table. + * Added support to watch CONTROLLER port status in fast failover group. - DPDK: * Deprecated DPDK pdump packet capture support removed. * Deprecated DPDK ring ports (dpdkr) are no longer supported. diff --git a/lib/ofp-group.c b/lib/ofp-group.c index b675e802c..bf0f8af54 100644 --- a/lib/ofp-group.c +++ b/lib/ofp-group.c @@ -660,7 +660,8 @@ parse_bucket_str(struct ofputil_bucket *bucket, char *str_, } else if (!strcasecmp(key, "watch_port")) { if (!ofputil_port_from_string(value, port_map, &bucket->watch_port) || (ofp_to_u16(bucket->watch_port) >= ofp_to_u16(OFPP_MAX) - && bucket->watch_port != OFPP_ANY)) { + && bucket->watch_port != OFPP_ANY + && bucket->watch_port != OFPP_CONTROLLER)) { error = xasprintf("%s: invalid watch_port", value); } } else if (!strcasecmp(key, "watch_group")) { diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 0b45ecf3d..adf57a5e8 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1888,9 +1888,12 @@ bucket_is_alive(const struct xlate_ctx *ctx, return (!ofputil_bucket_has_liveness(bucket) || (bucket->watch_port != OFPP_ANY + && bucket->watch_port != OFPP_CONTROLLER && odp_port_is_alive(ctx, bucket->watch_port)) || (bucket->watch_group != OFPG_ANY - && group_is_alive(ctx, bucket->watch_group, depth + 1))); + && group_is_alive(ctx, bucket->watch_group, depth + 1)) + || (bucket->watch_port == OFPP_CONTROLLER + && ofproto_is_alive(&ctx->xbridge->ofproto->up))); } static void diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d56cece95..d21874b46 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -1800,6 +1800,7 @@ run(struct ofproto *ofproto_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_); uint64_t new_seq, new_dump_seq; + bool is_connected; if (mbridge_need_revalidate(ofproto->mbridge)) { ofproto->backer->need_revalidate = REV_RECONFIGURE; @@ -1868,6 +1869,15 @@ run(struct ofproto *ofproto_) ofproto->backer->need_revalidate = REV_MCAST_SNOOPING; } + /* Check if controller connection is toggled. */ + is_connected = ofproto_is_alive(&ofproto->up); + if (ofproto->is_controller_connected != is_connected) { + ofproto->is_controller_connected = is_connected; + /* Trigger revalidation as fast failover group monitoring + * controller port may need to check liveness again. */ + ofproto->backer->need_revalidate = REV_RECONFIGURE; + } + new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif)); if (ofproto->dump_seq != new_dump_seq) { struct rule *rule, *next_rule; diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index c9d5df34b..aee61d61d 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -342,6 +342,9 @@ struct ofproto_dpif { struct guarded_list ams; /* Contains "struct ofproto_async_msgs"s. */ struct seq *ams_seq; /* For notifying 'ams' reception. */ uint64_t ams_seqno; + + bool is_controller_connected; /* True if any controller admitted this + * switch connection. */ }; struct ofproto_dpif *ofproto_dpif_lookup_by_name(const char *name); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index e2591287d..0fbd6c380 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1906,7 +1906,8 @@ ofproto_wait(struct ofproto *p) bool ofproto_is_alive(const struct ofproto *p) { - return connmgr_has_controllers(p->connmgr); + return (connmgr_has_controllers(p->connmgr) + && connmgr_is_any_controller_admitted(p->connmgr)); } /* Adds some memory usage statistics for 'ofproto' into 'usage', for use with -- GitLab From 692a09cb5e2a1ba8aaddd3340d80ae47fcda3ae2 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:04 -0800 Subject: [PATCH 041/432] raft-rpc: Fix message format. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft-rpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c index 18c83fe9c..dd14d8109 100644 --- a/ovsdb/raft-rpc.c +++ b/ovsdb/raft-rpc.c @@ -544,8 +544,8 @@ raft_format_install_snapshot_request( ds_put_format(s, " last_index=%"PRIu64, rq->last_index); ds_put_format(s, " last_term=%"PRIu64, rq->last_term); ds_put_format(s, " last_eid="UUID_FMT, UUID_ARGS(&rq->last_eid)); - ds_put_cstr(s, " last_servers="); ds_put_format(s, " election_timer=%"PRIu64, rq->election_timer); + ds_put_cstr(s, " last_servers="); struct hmap servers; struct ovsdb_error *error = -- GitLab From bda1f6b60588a45b71fa812f260921793df39aef Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:05 -0800 Subject: [PATCH 042/432] ovsdb-server: Don't disconnect clients after raft install_snapshot. When "schema" field is found in read_db(), there can be two cases: 1. There is a schema change in clustered DB and the "schema" is the new one. 2. There is a install_snapshot RPC happened, which caused log compaction on the server and the next log is just the snapshot, which always constains "schema" field, even though the schema hasn't been changed. The current implementation doesn't handle case 2), and always assume the schema is changed hence disconnect all clients of the server. It can cause stability problem when there are big number of clients connected when this happens in a large scale environment. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/ovsdb-server.c | 3 ++- tests/ovsdb-cluster.at | 56 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index b6957d730..d416f1b60 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -543,7 +543,8 @@ parse_txn(struct server_config *config, struct db *db, struct ovsdb_schema *schema, const struct json *txn_json, const struct uuid *txnid) { - if (schema) { + if (schema && (!db->db->schema || strcmp(schema->version, + db->db->schema->version))) { /* We're replacing the schema (and the data). Destroy the database * (first grabbing its storage), then replace it with the new schema. * The transaction must also include the replacement data. diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 3a0bd4579..5b6188b96 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -273,6 +273,62 @@ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Ele AT_CLEANUP + +AT_BANNER([OVSDB cluster install snapshot RPC]) + +AT_SETUP([OVSDB cluster - install snapshot RPC]) +AT_KEYWORDS([ovsdb server positive unix cluster snapshot]) + +n=3 +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +ordinal_schema > schema +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) +cid=`ovsdb-tool db-cid s1.db` +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) +done + +on_exit 'kill `cat *.pid`' +for i in `seq $n`; do + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) +done +for i in `seq $n`; do + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +# Kill one follower (s2) and write some data to cluster, so that the follower is falling behind +printf "\ns2: stopping\n" +OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid]) + +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", + {"op": "insert", + "table": "simple", + "row": {"i": 1}}]]'], [0], [ignore], [ignore]) + +# Compact leader online to generate snapshot +AT_CHECK([ovs-appctl -t "`pwd`"/s1 ovsdb-server/compact]) + +# Start the follower s2 again. +AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s2.log --pidfile=s2.pid --unixctl=s2 --remote=punix:s2.ovsdb s2.db]) +AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected]) + +# A client transaction through s2. During this transaction, there will be a +# install_snapshot RPC because s2 detects it is behind and s1 doesn't have the +# pre_log_index requested by s2 because it is already compacted. +# After the install_snapshot RPC process, the transaction through s2 should +# succeed. +AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest", + {"op": "insert", + "table": "simple", + "row": {"i": 1}}]]'], [0], [ignore], [ignore]) + +for i in `seq $n`; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) +done + +AT_CLEANUP + OVS_START_SHELL_HELPERS -- GitLab From 2833885f7ab565ce07f40de2ab8d415dc0390329 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:06 -0800 Subject: [PATCH 043/432] raft: Fix raft_is_connected() when there is no leader yet. If there is never a leader known by the current server, it's status should be "disconnected" to the cluster. Without this patch, when a server in cluster is restarted, before it successfully connecting back to the cluster it will appear as connected, which is wrong. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft.c | 10 ++++++++-- tests/ovsdb-cluster.at | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 4789bc4f2..6cd7b0041 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -298,6 +298,11 @@ struct raft { bool had_leader; /* There has been leader elected since last election initiated. This is to help setting candidate_retrying. */ + + /* For all. */ + bool ever_had_leader; /* There has been leader elected since the raft + is initialized, meaning it is ever + connected. */ }; /* All Raft structures. */ @@ -1024,7 +1029,8 @@ raft_is_connected(const struct raft *raft) && !raft->joining && !raft->leaving && !raft->left - && !raft->failed); + && !raft->failed + && raft->ever_had_leader); VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false"); return ret; } @@ -2519,7 +2525,7 @@ static void raft_set_leader(struct raft *raft, const struct uuid *sid) { raft->leader_sid = *sid; - raft->had_leader = true; + raft->ever_had_leader = raft->had_leader = true; raft->candidate_retrying = false; } diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 5b6188b96..0aa456448 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -179,6 +179,41 @@ AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) ovsdb_test_cluster_disconnect 5 leader yes AT_CLEANUP +AT_SETUP([OVSDB cluster - initial status should be disconnected]) +AT_KEYWORDS([ovsdb server negative unix cluster disconnect]) + +n=3 +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +ordinal_schema > schema +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) +cid=`ovsdb-tool db-cid s1.db` +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) +done + +on_exit 'kill `cat *.pid`' +for i in `seq $n`; do + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) +done +for i in `seq $n`; do + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +# Stop all servers, and start the s1 only, to test initial connection status +# when there is no leader yet. +for i in `seq 1 $n`; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) +done +i=1 +AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) + +# The initial status should be disconnected. So wait should fail. +AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore]) +OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) + +AT_CLEANUP + AT_BANNER([OVSDB cluster election timer change]) -- GitLab From bb66a0a6eb7971556504a294f5cf796d1d72db25 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:07 -0800 Subject: [PATCH 044/432] raft: Avoid busy loop during leader election. When a server doesn't see a leader yet, e.g. during leader re-election, if a transaction comes from a client, it will cause 100% CPU busy loop. With debug log enabled it is like: 2020-02-28T04:04:35.631Z|00059|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 2020-02-28T04:04:35.631Z|00062|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 2020-02-28T04:04:35.631Z|00065|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 2020-02-28T04:04:35.631Z|00068|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 2020-02-28T04:04:35.631Z|00071|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 2020-02-28T04:04:35.631Z|00074|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 2020-02-28T04:04:35.631Z|00077|poll_loop|DBG|wakeup due to 0-ms timeout at ../ovsdb/trigger.c:164 ... The problem is that in ovsdb_trigger_try(), all cluster errors are treated as temporary error and retry immediately. This patch fixes it by introducing 'run_triggers_now', which tells if a retry is needed immediately. When the cluster error is with detail 'not leader', we don't immediately retry, but will wait for the next poll event to trigger the retry. When 'not leader' status changes, there must be a event, i.e. raft RPC that changes the status, so the trigger is guaranteed to be triggered, without busy loop. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/ovsdb.c | 2 +- ovsdb/ovsdb.h | 1 + ovsdb/transaction.c | 2 +- ovsdb/trigger.c | 11 +++++++++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index cfc96b32f..7e683e681 100644 --- a/ovsdb/ovsdb.c +++ b/ovsdb/ovsdb.c @@ -414,7 +414,7 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage) db->storage = storage; ovs_list_init(&db->monitors); ovs_list_init(&db->triggers); - db->run_triggers = false; + db->run_triggers_now = db->run_triggers = false; shash_init(&db->tables); if (schema) { diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h index 32e533316..5c30a83d9 100644 --- a/ovsdb/ovsdb.h +++ b/ovsdb/ovsdb.h @@ -83,6 +83,7 @@ struct ovsdb { /* Triggers. */ struct ovs_list triggers; /* Contains "struct ovsdb_trigger"s. */ bool run_triggers; + bool run_triggers_now; struct ovsdb_table *rbac_role; diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c index 369436bff..8ffefcf7c 100644 --- a/ovsdb/transaction.c +++ b/ovsdb/transaction.c @@ -967,7 +967,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn) { if (!ovsdb_txn_is_empty(txn)) { - txn->db->run_triggers = true; + txn->db->run_triggers_now = txn->db->run_triggers = true; ovsdb_monitors_commit(txn->db, txn); ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs)); ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit)); diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c index 7e62e90ae..0372302af 100644 --- a/ovsdb/trigger.c +++ b/ovsdb/trigger.c @@ -141,7 +141,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now) struct ovsdb_trigger *t, *next; bool run_triggers = db->run_triggers; - db->run_triggers = false; + db->run_triggers_now = db->run_triggers = false; bool disconnect_all = false; @@ -160,7 +160,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now) void ovsdb_trigger_wait(struct ovsdb *db, long long int now) { - if (db->run_triggers) { + if (db->run_triggers_now) { poll_immediate_wake(); } else { long long int deadline = LLONG_MAX; @@ -319,9 +319,16 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now) if (!strcmp(ovsdb_error_get_tag(error), "cluster error")) { /* Temporary error. Transition back to "initialized" state to * try again. */ + char *err_s = ovsdb_error_to_string(error); + VLOG_DBG("cluster error %s", err_s); + jsonrpc_msg_destroy(t->reply); t->reply = NULL; t->db->run_triggers = true; + if (!strstr(err_s, "not leader")) { + t->db->run_triggers_now = true; + } + free(err_s); ovsdb_error_destroy(error); } else { /* Permanent error. Transition to "completed" state to report -- GitLab From b5e8810443a552b0adc5ff05b483c30de63f5ab9 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:08 -0800 Subject: [PATCH 045/432] raft: Avoid sending unnecessary heartbeat when becoming leader. When a node becomes leader, it sends out heartbeat to all followers and then sends out another append-request for a no-op command execution to all followers again immediately. This causes 2 continously append-requests sent out to each followers, and the first heartbeat append-request is unnecessary. This patch removes the heartbeat. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft.c | 1 - 1 file changed, 1 deletion(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 6cd7b0041..0eb8644f3 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -2553,7 +2553,6 @@ raft_become_leader(struct raft *raft) raft->election_timer_new = 0; raft_update_our_match_index(raft, raft->log_end - 1); - raft_send_heartbeats(raft); /* Write the fact that we are leader to the log. This is not used by the * algorithm (although it could be, for quick restart), but it is used for -- GitLab From 99c2dc8d04b3b697edfa02b06e127edad6ad5b28 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:09 -0800 Subject: [PATCH 046/432] raft: Send all missing logs in one single append_request. When a follower needs to "catch up", leader can send N entries in a single append_request instead of only one entry by each message. The function raft_send_append_request() already supports this, so this patch just calculate the correct "n" and use it. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 0eb8644f3..c5c1d49a8 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -3344,7 +3344,7 @@ raft_handle_append_reply(struct raft *raft, raft_send_install_snapshot_request(raft, s, NULL); } else if (s->next_index < raft->log_end) { /* Case 2. */ - raft_send_append_request(raft, s, 1, NULL); + raft_send_append_request(raft, s, raft->log_end - s->next_index, NULL); } else { /* Case 3. */ if (s->phase == RAFT_PHASE_CATCHUP) { -- GitLab From 315e88cb4dd9c524ac111323f9d064678cf06a5e Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 28 Feb 2020 18:07:10 -0800 Subject: [PATCH 047/432] raft: Fix next_index in install_snapshot reply handling. When a leader handles install_snapshot reply, the next_index for the follower should be log_start instead of log_end, because there can be new entries added in leader's log after initiating the install_snapshot procedure. Also, it should send all the accumulated entries to follower in the following append-request message, instead of sending 0 entries, to speed up the converge. Without this fix, there is no functional problem, but it takes uncessary extra rounds of append-requests responsed with "inconsistency" by follower, although finally will be converged. Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index c5c1d49a8..161273487 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -3997,8 +3997,9 @@ raft_handle_install_snapshot_reply( VLOG_INFO_RL(&rl, "cluster "CID_FMT": installed snapshot on server %s " " up to %"PRIu64":%"PRIu64, CID_ARGS(&raft->cid), s->nickname, rpy->last_term, rpy->last_index); - s->next_index = raft->log_end; - raft_send_append_request(raft, s, 0, "snapshot installed"); + s->next_index = raft->log_start; + raft_send_append_request(raft, s, raft->log_end - s->next_index, + "snapshot installed"); } /* Returns true if 'raft' has grown enough since the last snapshot that -- GitLab From 93ee420935547da00de54d70efe4f4f02a36e8b3 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Thu, 5 Mar 2020 23:48:45 -0800 Subject: [PATCH 048/432] raft: Fix the problem of stuck in candidate role forever. Sometimes a server can stay in candidate role forever, even if the server already see the new leader and handles append-requests normally. However, because of the wrong role, it appears as disconnected from cluster and so the clients are disconnected. This problem happens when 2 servers become candidates in the same term, and one of them is elected as leader in that term. It can be reproduced by the test cases added in this patch. The root cause is that the current implementation only changes role to follower when a bigger term is observed (in raft_receive_term__()). According to the RAFT paper, if another candidate becomes leader with the same term, the candidate should change to follower. This patch fixes it by changing the role to follower when leader is being updated in raft_update_leader(). Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft.c | 19 +++++++++++++-- tests/ovsdb-cluster.at | 55 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 161273487..3dfef8245 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -73,7 +73,8 @@ enum raft_failure_test { FT_CRASH_BEFORE_SEND_EXEC_REQ, FT_CRASH_AFTER_SEND_EXEC_REQ, FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE, - FT_DELAY_ELECTION + FT_DELAY_ELECTION, + FT_DONT_SEND_VOTE_REQUEST }; static enum raft_failure_test failure_test; @@ -1647,6 +1648,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer) } ovs_assert(raft->role != RAFT_LEADER); + raft->role = RAFT_CANDIDATE; /* If there was no leader elected since last election, we know we are * retrying now. */ @@ -1690,7 +1692,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer) .leadership_transfer = leadership_transfer, }, }; - raft_send(raft, &rq); + if (failure_test != FT_DONT_SEND_VOTE_REQUEST) { + raft_send(raft, &rq); + } } /* Vote for ourselves. */ @@ -2965,6 +2969,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid) }; ignore(ovsdb_log_write_and_free(raft->log, raft_record_to_json(&r))); } + if (raft->role == RAFT_CANDIDATE) { + /* Section 3.4: While waiting for votes, a candidate may + * receive an AppendEntries RPC from another server claiming to + * be leader. If the leader’s term (included in its RPC) is at + * least as large as the candidate’s current term, then the + * candidate recognizes the leader as legitimate and returns to + * follower state. */ + raft->role = RAFT_FOLLOWER; + } return true; } @@ -4673,6 +4686,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, raft_reset_election_timer(raft); } } + } else if (!strcmp(test, "dont-send-vote-request")) { + failure_test = FT_DONT_SEND_VOTE_REQUEST; } else if (!strcmp(test, "clear")) { failure_test = FT_NO_TEST; unixctl_command_reply(conn, "test dismissed"); diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 0aa456448..971454515 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -527,6 +527,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn]) ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update AT_CLEANUP + +AT_SETUP([OVSDB cluster - competing candidates]) +AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates]) + +n=3 +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +ordinal_schema > schema +AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr]) +cid=`ovsdb-tool db-cid s1.db` +schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` +for i in `seq 2 $n`; do + AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft]) +done + +on_exit 'kill `cat *.pid`' +for i in `seq $n`; do + AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db]) +done +for i in `seq $n`; do + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +# We need to simulate the situation when 2 candidates starts election with same +# term. +# +# Before triggering leader election, tell follower s2 don't send vote request (simulating +# vote-request lost or not handled in time), and tell follower s3 to delay +# election timer to make sure s3 doesn't send vote-request before s2 enters +# term 2. +AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore]) +AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore]) + +# Restart leader, which will become follower, and both old followers will start +# election as candidate. The new follower (old leader) will vote one of them, +# and the other candidate should step back as follower as again. +kill -9 `cat s1.pid` +AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db]) + +# Tell s1 to delay election timer so that it won't start election before s3 +# becomes candidate. +AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore]) + +OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"]) + +for i in `seq $n`; do + OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"]) + AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) +done + +for i in `seq $n`; do + OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) +done + +AT_CLEANUP + AT_BANNER([OVSDB - cluster tests]) -- GitLab From cdae6100f89d04c5c29dc86a490b936a204622b7 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Thu, 5 Mar 2020 23:48:46 -0800 Subject: [PATCH 049/432] raft: Unset leader when starting election. During election, there shouldn't be any leader. This change makes sure that a server in candidate role always report leader as "unknown". Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- ovsdb/raft.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 3dfef8245..6391eeb13 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -1649,6 +1649,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer) ovs_assert(raft->role != RAFT_LEADER); + raft->leader_sid = UUID_ZERO; raft->role = RAFT_CANDIDATE; /* If there was no leader elected since last election, we know we are * retrying now. */ -- GitLab From ea58abe51557ea7004db4267fcf36f588e6f17c3 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 6 Mar 2020 14:37:13 -0800 Subject: [PATCH 050/432] acinclude: Enable Linux kernel 5.5 Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- acinclude.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 9338af947..03d3484c9 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -151,10 +151,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [ AC_MSG_RESULT([$kversion]) if test "$version" -ge 5; then - if test "$version" = 5 && test "$patchlevel" -le 0; then + if test "$version" = 5 && test "$patchlevel" -le 5; then : # Linux 5.x else - AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 5.0.x is not supported (please refer to the FAQ for advice)]) + AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 5.5.x is not supported (please refer to the FAQ for advice)]) fi elif test "$version" = 4; then : # Linux 4.x -- GitLab From f77d85b109f049a45c73fd2bbafe4d4220ca7c78 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 6 Mar 2020 14:37:14 -0800 Subject: [PATCH 051/432] compat: Fix up changes to inet frags in 5.1+ Since Linux kernel release 5.1 the fragments field of the inet_frag_queue structure is removed and now only the rb_fragments structure with an rb_node pointer is used for both ipv4 and ipv6. In addition, the atomic_sub and atomic_add functions are replaced with their equivalent long counterparts. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- acinclude.m4 | 2 ++ datapath/linux/compat/include/net/inet_frag.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index 03d3484c9..3a555ad93 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1048,6 +1048,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ [OVS_DEFINE([HAVE_RBTREE_RB_LINK_NODE_RCU])]) OVS_GREP_IFELSE([$KSRC/include/net/dst_ops.h], [bool confirm_neigh], [OVS_DEFINE([HAVE_DST_OPS_CONFIRM_NEIGH])]) + OVS_GREP_IFELSE([$KSRC/include/net/inet_frag.h], [fqdir], + [OVS_DEFINE([HAVE_INET_FRAG_FQDIR])]) if cmp -s datapath/linux/kcompat.h.new \ datapath/linux/kcompat.h >/dev/null 2>&1; then diff --git a/datapath/linux/compat/include/net/inet_frag.h b/datapath/linux/compat/include/net/inet_frag.h index 124c8bea7..00784da2b 100644 --- a/datapath/linux/compat/include/net/inet_frag.h +++ b/datapath/linux/compat/include/net/inet_frag.h @@ -12,6 +12,7 @@ #define qp_flags(qp) (qp->q.flags) #endif +#ifndef HAVE_CORRECT_MRU_HANDLING #ifndef HAVE_INET_FRAG_EVICTING static inline bool inet_frag_evicting(struct inet_frag_queue *q) { @@ -22,6 +23,7 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) #endif /* HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR */ } #endif /* HAVE_INET_FRAG_EVICTING */ +#endif /* HAVE_CORRECT_MRU_HANDLING */ /* Upstream commit 3fd588eb90bf ("inet: frag: remove lru list") dropped this * function, but we call it from our compat code. Provide a noop version. */ @@ -29,6 +31,10 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q) #define inet_frag_lru_move(q) #endif +#ifdef HAVE_INET_FRAG_FQDIR +#define netns_frags fqdir +#endif + #ifndef HAVE_SUB_FRAG_MEM_LIMIT_ARG_STRUCT_NETNS_FRAGS #ifdef HAVE_FRAG_PERCPU_COUNTER_BATCH static inline void rpl_sub_frag_mem_limit(struct netns_frags *nf, int i) @@ -45,13 +51,21 @@ static inline void rpl_add_frag_mem_limit(struct netns_frags *nf, int i) #else /* !frag_percpu_counter_batch */ static inline void rpl_sub_frag_mem_limit(struct netns_frags *nf, int i) { +#ifdef HAVE_INET_FRAG_FQDIR + atomic_long_sub(i, &nf->mem); +#else atomic_sub(i, &nf->mem); +#endif } #define sub_frag_mem_limit rpl_sub_frag_mem_limit static inline void rpl_add_frag_mem_limit(struct netns_frags *nf, int i) { +#ifdef HAVE_INET_FRAG_FQDIR + atomic_long_add(i, &nf->mem); +#else atomic_add(i, &nf->mem); +#endif } #define add_frag_mem_limit rpl_add_frag_mem_limit #endif /* frag_percpu_counter_batch */ -- GitLab From 2ef0f1c23cfaf26294288c45102ec79bb8f3071c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Mar 2020 14:37:15 -0800 Subject: [PATCH 052/432] compat: Move genl_ops policy to genl_family Upstream commit: commit 3b0f31f2b8c9fb348e4530b88f6b64f9621f83d6 Author: Johannes Berg Date: Thu Mar 21 22:51:02 2019 +0100 genetlink: make policy common to family Since maxattr is common, the policy can't really differ sanely, so make it common as well. The only user that did in fact manage to make a non-common policy is taskstats, which has to be really careful about it (since it's still using a common maxattr!). This is no longer supported, but we can fake it using pre_doit. This reduces the size of e.g. nl80211.o (which has lots of commands): text data bss dec hex filename 398745 14323 2240 415308 6564c net/wireless/nl80211.o (before) 397913 14331 2240 414484 65314 net/wireless/nl80211.o (after) -------------------------------- -832 +8 0 -824 Which is obviously just 8 bytes for each command, and an added 8 bytes for the new policy pointer. I'm not sure why the ops list is counted as .text though. Most of the code transformations were done using the following spatch: @ops@ identifier OPS; expression POLICY; @@ struct genl_ops OPS[] = { ..., { - .policy = POLICY, }, ... }; @@ identifier ops.OPS; expression ops.POLICY; identifier fam; expression M; @@ struct genl_family fam = { .ops = OPS, .maxattr = M, + .policy = POLICY, ... }; This also gets rid of devlink_nl_cmd_region_read_dumpit() accessing the cb->data as ops, which we want to change in a later genl patch. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Since commit 3b0f31f2b8c9f ("genetlink: make policy common to family") the policy field of the genl_ops structure has been moved into the genl_family structure. Add necessary compat layer infrastructure to still support older kernels. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- acinclude.m4 | 3 +++ datapath/conntrack.c | 9 +++++++++ datapath/datapath.c | 38 ++++++++++++++++++++++++++++++++++++++ datapath/meter.c | 11 +++++++++++ 4 files changed, 61 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index 3a555ad93..729d2c65d 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1050,6 +1050,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ [OVS_DEFINE([HAVE_DST_OPS_CONFIRM_NEIGH])]) OVS_GREP_IFELSE([$KSRC/include/net/inet_frag.h], [fqdir], [OVS_DEFINE([HAVE_INET_FRAG_FQDIR])]) + OVS_FIND_FIELD_IFELSE([$KSRC/include/net/genetlink.h], [genl_ops], + [policy], + [OVS_DEFINE([HAVE_GENL_OPS_POLICY])]) if cmp -s datapath/linux/kcompat.h.new \ datapath/linux/kcompat.h >/dev/null 2>&1; then diff --git a/datapath/conntrack.c b/datapath/conntrack.c index 838cf63c9..b7eb53f93 100644 --- a/datapath/conntrack.c +++ b/datapath/conntrack.c @@ -2312,7 +2312,9 @@ static struct genl_ops ct_limit_genl_ops[] = { #endif .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = ct_limit_policy, +#endif .doit = ovs_ct_limit_cmd_set, }, { .cmd = OVS_CT_LIMIT_CMD_DEL, @@ -2321,7 +2323,9 @@ static struct genl_ops ct_limit_genl_ops[] = { #endif .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = ct_limit_policy, +#endif .doit = ovs_ct_limit_cmd_del, }, { .cmd = OVS_CT_LIMIT_CMD_GET, @@ -2329,7 +2333,9 @@ static struct genl_ops ct_limit_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = 0, /* OK for unprivileged users. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = ct_limit_policy, +#endif .doit = ovs_ct_limit_cmd_get, }, }; @@ -2343,6 +2349,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = { .name = OVS_CT_LIMIT_FAMILY, .version = OVS_CT_LIMIT_VERSION, .maxattr = OVS_CT_LIMIT_ATTR_MAX, +#ifndef HAVE_GENL_OPS_POLICY + .policy = ct_limit_policy, +#endif .netnsok = true, .parallel_ops = true, .ops = ct_limit_genl_ops, diff --git a/datapath/datapath.c b/datapath/datapath.c index 853bfb5af..6f74c8feb 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -657,7 +657,9 @@ static struct genl_ops dp_packet_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = packet_policy, +#endif .doit = ovs_packet_cmd_execute } }; @@ -667,6 +669,9 @@ static struct genl_family dp_packet_genl_family __ro_after_init = { .name = OVS_PACKET_FAMILY, .version = OVS_PACKET_VERSION, .maxattr = OVS_PACKET_ATTR_MAX, +#ifndef HAVE_GENL_OPS_POLICY + .policy = packet_policy, +#endif .netnsok = true, .parallel_ops = true, .ops = dp_packet_genl_ops, @@ -1449,7 +1454,9 @@ static const struct genl_ops dp_flow_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = flow_policy, +#endif .doit = ovs_flow_cmd_new }, { .cmd = OVS_FLOW_CMD_DEL, @@ -1457,7 +1464,9 @@ static const struct genl_ops dp_flow_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = flow_policy, +#endif .doit = ovs_flow_cmd_del }, { .cmd = OVS_FLOW_CMD_GET, @@ -1465,7 +1474,9 @@ static const struct genl_ops dp_flow_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = 0, /* OK for unprivileged users. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = flow_policy, +#endif .doit = ovs_flow_cmd_get, .dumpit = ovs_flow_cmd_dump }, @@ -1474,7 +1485,9 @@ static const struct genl_ops dp_flow_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = flow_policy, +#endif .doit = ovs_flow_cmd_set, }, }; @@ -1484,6 +1497,9 @@ static struct genl_family dp_flow_genl_family __ro_after_init = { .name = OVS_FLOW_FAMILY, .version = OVS_FLOW_VERSION, .maxattr = OVS_FLOW_ATTR_MAX, +#ifndef HAVE_GENL_OPS_POLICY + .policy = flow_policy, +#endif .netnsok = true, .parallel_ops = true, .ops = dp_flow_genl_ops, @@ -1853,7 +1869,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = datapath_policy, +#endif .doit = ovs_dp_cmd_new }, { .cmd = OVS_DP_CMD_DEL, @@ -1861,7 +1879,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = datapath_policy, +#endif .doit = ovs_dp_cmd_del }, { .cmd = OVS_DP_CMD_GET, @@ -1869,7 +1889,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = 0, /* OK for unprivileged users. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = datapath_policy, +#endif .doit = ovs_dp_cmd_get, .dumpit = ovs_dp_cmd_dump }, @@ -1878,7 +1900,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = datapath_policy, +#endif .doit = ovs_dp_cmd_set, }, }; @@ -1888,6 +1912,9 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { .name = OVS_DATAPATH_FAMILY, .version = OVS_DATAPATH_VERSION, .maxattr = OVS_DP_ATTR_MAX, +#ifndef HAVE_GENL_OPS_POLICY + .policy = datapath_policy, +#endif .netnsok = true, .parallel_ops = true, .ops = dp_datapath_genl_ops, @@ -2310,7 +2337,9 @@ static const struct genl_ops dp_vport_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = vport_policy, +#endif .doit = ovs_vport_cmd_new }, { .cmd = OVS_VPORT_CMD_DEL, @@ -2318,7 +2347,9 @@ static const struct genl_ops dp_vport_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = vport_policy, +#endif .doit = ovs_vport_cmd_del }, { .cmd = OVS_VPORT_CMD_GET, @@ -2326,7 +2357,9 @@ static const struct genl_ops dp_vport_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = 0, /* OK for unprivileged users. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = vport_policy, +#endif .doit = ovs_vport_cmd_get, .dumpit = ovs_vport_cmd_dump }, @@ -2335,7 +2368,9 @@ static const struct genl_ops dp_vport_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = vport_policy, +#endif .doit = ovs_vport_cmd_set, }, }; @@ -2345,6 +2380,9 @@ struct genl_family dp_vport_genl_family __ro_after_init = { .name = OVS_VPORT_FAMILY, .version = OVS_VPORT_VERSION, .maxattr = OVS_VPORT_ATTR_MAX, +#ifndef HAVE_GENL_OPS_POLICY + .policy = vport_policy, +#endif .netnsok = true, .parallel_ops = true, .ops = dp_vport_genl_ops, diff --git a/datapath/meter.c b/datapath/meter.c index 7d8f51a8f..8cecd5a34 100644 --- a/datapath/meter.c +++ b/datapath/meter.c @@ -542,7 +542,9 @@ static struct genl_ops dp_meter_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = 0, /* OK for unprivileged users. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = meter_policy, +#endif .doit = ovs_meter_cmd_features }, { .cmd = OVS_METER_CMD_SET, @@ -552,7 +554,9 @@ static struct genl_ops dp_meter_genl_ops[] = { .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = meter_policy, +#endif .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, @@ -560,7 +564,9 @@ static struct genl_ops dp_meter_genl_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, #endif .flags = 0, /* OK for unprivileged users. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = meter_policy, +#endif .doit = ovs_meter_cmd_get, }, { .cmd = OVS_METER_CMD_DEL, @@ -570,7 +576,9 @@ static struct genl_ops dp_meter_genl_ops[] = { .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN * privilege. */ +#ifdef HAVE_GENL_OPS_POLICY .policy = meter_policy, +#endif .doit = ovs_meter_cmd_del }, }; @@ -584,6 +592,9 @@ struct genl_family dp_meter_genl_family __ro_after_init = { .name = OVS_METER_FAMILY, .version = OVS_METER_VERSION, .maxattr = OVS_METER_ATTR_MAX, +#ifndef HAVE_GENL_OPS_POLICY + .policy = meter_policy, +#endif .netnsok = true, .parallel_ops = true, .ops = dp_meter_genl_ops, -- GitLab From 384868caafc3b188319a3a9b9b3ede4ef910273d Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 6 Mar 2020 14:37:16 -0800 Subject: [PATCH 053/432] compat: Remove flex_array code Flex array support is removed since kernel 5.1. Convert to use kvmalloc_array instead. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- datapath/linux/compat/stt.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c index 21fef09f4..7b46d1a20 100644 --- a/datapath/linux/compat/stt.c +++ b/datapath/linux/compat/stt.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include @@ -136,7 +135,7 @@ struct pkt_frag { }; struct stt_percpu { - struct flex_array *frag_hash; + struct pkt_frag *frag_hash; struct list_head frag_lru; unsigned int frag_mem_used; @@ -1079,8 +1078,7 @@ static struct pkt_frag *lookup_frag(struct net *net, int i; for (i = 0; i < FRAG_HASH_SEGS; i++) { - frag = flex_array_get(stt_percpu->frag_hash, - hash & (FRAG_HASH_ENTRIES - 1)); + frag = &stt_percpu->frag_hash[hash & (FRAG_HASH_ENTRIES - 1)]; if (frag->skbs && time_before(jiffies, frag->timestamp + FRAG_EXP_TIME) && @@ -1533,7 +1531,7 @@ static void clean_percpu(struct work_struct *work) for (j = 0; j < FRAG_HASH_ENTRIES; j++) { struct pkt_frag *frag; - frag = flex_array_get(stt_percpu->frag_hash, j); + frag = &stt_percpu->frag_hash[j]; if (!frag->skbs || time_before(jiffies, frag->timestamp + FRAG_EXP_TIME)) continue; @@ -1631,26 +1629,20 @@ static int stt_start(struct net *net) for_each_possible_cpu(i) { struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i); - struct flex_array *frag_hash; + struct pkt_frag *frag_hash; spin_lock_init(&stt_percpu->lock); INIT_LIST_HEAD(&stt_percpu->frag_lru); get_random_bytes(&per_cpu(pkt_seq_counter, i), sizeof(u32)); - frag_hash = flex_array_alloc(sizeof(struct pkt_frag), - FRAG_HASH_ENTRIES, - GFP_KERNEL | __GFP_ZERO); + frag_hash = kvmalloc_array(sizeof(struct pkt_frag), + FRAG_HASH_ENTRIES, + GFP_KERNEL | __GFP_ZERO); if (!frag_hash) { err = -ENOMEM; goto free_percpu; } stt_percpu->frag_hash = frag_hash; - - err = flex_array_prealloc(stt_percpu->frag_hash, 0, - FRAG_HASH_ENTRIES, - GFP_KERNEL | __GFP_ZERO); - if (err) - goto free_percpu; } schedule_clean_percpu(); n_tunnels++; @@ -1691,7 +1683,7 @@ free_percpu: struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i); if (stt_percpu->frag_hash) - flex_array_free(stt_percpu->frag_hash); + kvfree(stt_percpu->frag_hash); } free_percpu(stt_percpu_data); @@ -1718,11 +1710,11 @@ static void stt_cleanup(struct net *net) for (j = 0; j < FRAG_HASH_ENTRIES; j++) { struct pkt_frag *frag; - frag = flex_array_get(stt_percpu->frag_hash, j); + frag = &stt_percpu->frag_hash[j]; kfree_skb_list(frag->skbs); } - flex_array_free(stt_percpu->frag_hash); + kvfree(stt_percpu->frag_hash); } free_percpu(stt_percpu_data); -- GitLab From 9cfa471811be29e88ca89dc698690611cea1707a Mon Sep 17 00:00:00 2001 From: Pankaj Bharadiya Date: Fri, 6 Mar 2020 14:37:17 -0800 Subject: [PATCH 054/432] datapath: Use sizeof_field macro Upstream commit: commit c593642c8be046915ca3a4a300243a68077cd207 Author: Pankaj Bharadiya Date: Mon Dec 9 10:31:43 2019 -0800 treewide: Use sizeof_field() macro Replace all the occurrences of FIELD_SIZEOF() with sizeof_field() except at places where these are defined. Later patches will remove the unused definition of FIELD_SIZEOF(). This patch is generated using following script: EXCLUDE_FILES="include/linux/stddef.h|include/linux/kernel.h" git grep -l -e "\bFIELD_SIZEOF\b" | while read file; do if [[ "$file" =~ $EXCLUDE_FILES ]]; then continue fi sed -i -e 's/\bFIELD_SIZEOF\b/sizeof_field/g' $file; done Signed-off-by: Pankaj Bharadiya Link: https://lore.kernel.org/r/20190924105839.110713-3-pankaj.laxminarayan.bharadiya@intel.com Co-developed-by: Kees Cook Signed-off-by: Kees Cook Acked-by: David Miller # for net Also added a compatibility layer macro for older kernels that still use FIELD_SIZEOF Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- datapath/datapath.c | 2 +- datapath/flow.h | 4 ++-- datapath/linux/compat/geneve.c | 2 +- datapath/linux/compat/gso.c | 2 +- datapath/linux/compat/include/linux/kernel.h | 4 ++++ datapath/linux/compat/include/net/ip_tunnels.h | 4 ++-- datapath/linux/compat/ip6_gre.c | 4 ++-- datapath/linux/compat/ip_gre.c | 4 ++-- datapath/linux/compat/vxlan.c | 4 ++-- 9 files changed, 17 insertions(+), 13 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 6f74c8feb..f0c345723 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -2515,7 +2515,7 @@ static int __init dp_init(void) { int err; - BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb)); pr_info("Open vSwitch switching datapath %s\n", VERSION); diff --git a/datapath/flow.h b/datapath/flow.h index 4ad5363e3..584d9f565 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -50,7 +50,7 @@ enum sw_flow_mac_proto { * matching for small options. */ #define TUN_METADATA_OFFSET(opt_len) \ - (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len) + (sizeof_field(struct sw_flow_key, tun_opts) - opt_len) #define TUN_METADATA_OPTS(flow_key, opt_len) \ ((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len))) @@ -65,7 +65,7 @@ struct vlan_head { #define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ - FIELD_SIZEOF(struct sw_flow_key, recirc_id)) + sizeof_field(struct sw_flow_key, recirc_id)) struct ovs_key_nsh { struct ovs_nsh_key_base base; diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c index c044b1489..5b183963d 100644 --- a/datapath/linux/compat/geneve.c +++ b/datapath/linux/compat/geneve.c @@ -1407,7 +1407,7 @@ static void geneve_setup(struct net_device *dev) static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_ID] = { .type = NLA_U32 }, - [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) }, [IFLA_GENEVE_TTL] = { .type = NLA_U8 }, [IFLA_GENEVE_TOS] = { .type = NLA_U8 }, diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c index 48a56b9f5..65da5d876 100644 --- a/datapath/linux/compat/gso.c +++ b/datapath/linux/compat/gso.c @@ -171,7 +171,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb, __be16 proto = skb->protocol; char cb[sizeof(skb->cb)]; - BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb)); + BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > sizeof_field(struct sk_buff, cb)); OVS_GSO_CB(skb)->ipv6 = (sa_family == AF_INET6); /* setup whole inner packet to get protocol. */ __skb_pull(skb, mac_offset); diff --git a/datapath/linux/compat/include/linux/kernel.h b/datapath/linux/compat/include/linux/kernel.h index 2e81abc2f..106b5940a 100644 --- a/datapath/linux/compat/include/linux/kernel.h +++ b/datapath/linux/compat/include/linux/kernel.h @@ -32,4 +32,8 @@ #define U32_MAX ((u32)~0U) #endif +#ifndef sizeof_field +#define sizeof_field(t, f) (sizeof(((t*)0)->f)) +#endif + #endif /* linux/kernel.h */ diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h index da64a94ad..617a753c7 100644 --- a/datapath/linux/compat/include/net/ip_tunnels.h +++ b/datapath/linux/compat/include/net/ip_tunnels.h @@ -139,8 +139,8 @@ struct tnl_ptk_info { /* Used to memset ipv4 address padding. */ #define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst) #define IP_TUNNEL_KEY_IPV4_PAD_LEN \ - (FIELD_SIZEOF(struct ip_tunnel_key, u) - \ - FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4)) + (sizeof_field(struct ip_tunnel_key, u) - \ + sizeof_field(struct ip_tunnel_key, u.ipv4)) struct ip_tunnel_key { __be64 tun_id; diff --git a/datapath/linux/compat/ip6_gre.c b/datapath/linux/compat/ip6_gre.c index 7fd345309..da0fa432b 100644 --- a/datapath/linux/compat/ip6_gre.c +++ b/datapath/linux/compat/ip6_gre.c @@ -2311,8 +2311,8 @@ static const struct nla_policy ip6gre_policy[RPL_IFLA_GRE_MAX + 1] = { [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_IKEY] = { .type = NLA_U32 }, [IFLA_GRE_OKEY] = { .type = NLA_U32 }, - [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, - [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) }, [IFLA_GRE_TTL] = { .type = NLA_U8 }, [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c index 04f994f97..41379b19a 100644 --- a/datapath/linux/compat/ip_gre.c +++ b/datapath/linux/compat/ip_gre.c @@ -1096,8 +1096,8 @@ static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = { [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, [IFLA_GRE_IKEY] = { .type = NLA_U32 }, [IFLA_GRE_OKEY] = { .type = NLA_U32 }, - [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, - [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_GRE_TTL] = { .type = NLA_U8 }, [IFLA_GRE_TOS] = { .type = NLA_U8 }, [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 23118e8b6..6090f4290 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -1680,10 +1680,10 @@ static void vxlan_raw_setup(struct net_device *dev) static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_ID] = { .type = NLA_U32 }, - [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) }, [IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_LINK] = { .type = NLA_U32 }, - [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) }, [IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) }, [IFLA_VXLAN_TOS] = { .type = NLA_U8 }, [IFLA_VXLAN_TTL] = { .type = NLA_U8 }, -- GitLab From 0017700103d99823ceb003e81f629f05b9a5752a Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 6 Mar 2020 14:37:18 -0800 Subject: [PATCH 055/432] datapath: Kbuild: Add kcompat.h header to front of NOSTDINC Since this commit in the Linux upstream kernel: 'commit 9b9a3f20cbe0 ("kbuild: split final module linking out into Makefile.modfinal")' The openvswitch kernel module fails to build against the upstream Linux kernel. The cause of the build failure is that the include of the KBUILD_EXTMOD variable was dropped in Makefile.modfinal when it was split out from Makefile.modpost. Our Kbuild was setting the ccflags-y variable to include our kcompat.h header as the first header file. The Linux kernel maintainer has said that it is incorrect to rely on the ccflags-y variable for the modfinal phase of the build so that is why KBUILD_EXTMOD is not included. We fix this by breaking a different Linux kernel make rule. We add '-include $(builddir)/kcompat.h' to the front of the NOSTDINC variable setting in our Kbuild makefile. As noted already in the comment for the NOSTDINC setting: \# These include directories have to go before -I$(KSRC)/include. \# NOSTDINC_FLAGS just happens to be a variable that goes in the \# right place, even though it's conceptually incorrect. So we continue the misuse of the NOSTDINC variable to fix this issue as well. The assumption of the Linux kernel maintainers is that any local, out-of-tree build include files can be added to the end of the command line. In our case that is wrong of course, but there is nothing we can do about it that I know of other than using some utility like unifdef to strip out offending chunks of our compatibility layer code before invocation of Makefile.modfinal. That is a big change that would take a lot of work to implement. We could ask the Linux kernel maintainers to provide some way for out-of-tree kernel modules to include their own header files first in a proper manner. I consider that to be a very low probability of success but something we could ask about. For now we cheat and take the easy way out. Reported-by: David Ahern Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- datapath/linux/Kbuild.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath/linux/Kbuild.in b/datapath/linux/Kbuild.in index 9e3259f19..395b0cbc0 100644 --- a/datapath/linux/Kbuild.in +++ b/datapath/linux/Kbuild.in @@ -16,7 +16,7 @@ ccflags-y += -include $(builddir)/kcompat.h # These include directories have to go before -I$(KSRC)/include. # NOSTDINC_FLAGS just happens to be a variable that goes in the # right place, even though it's conceptually incorrect. -NOSTDINC_FLAGS += -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include +NOSTDINC_FLAGS += -include $(builddir)/kcompat.h -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include obj-m := $(subst _,-,$(patsubst %,%.o,$(build_modules))) -- GitLab From 6db0f72df38df9e722556ed8943a70e52f4f163a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Mar 2020 14:37:19 -0800 Subject: [PATCH 056/432] compat: Use nla_parse deprecated functions Upstream commit: commit 8cb081746c031fb164089322e2336a0bf5b3070c Author: Johannes Berg Date: Fri Apr 26 14:07:28 2019 +0200 netlink: make validation more configurable for future strictness We currently have two levels of strict validation: 1) liberal (default) - undefined (type >= max) & NLA_UNSPEC attributes accepted - attribute length >= expected accepted - garbage at end of message accepted 2) strict (opt-in) - NLA_UNSPEC attributes accepted - attribute length >= expected accepted Split out parsing strictness into four different options: * TRAILING - check that there's no trailing data after parsing attributes (in message or nested) * MAXTYPE - reject attrs > max known type * UNSPEC - reject attributes with NLA_UNSPEC policy entries * STRICT_ATTRS - strictly validate attribute size The default for future things should be *everything*. The current *_strict() is a combination of TRAILING and MAXTYPE, and is renamed to _deprecated_strict(). The current regular parsing has none of this, and is renamed to *_parse_deprecated(). Additionally it allows us to selectively set one of the new flags even on old policies. Notably, the UNSPEC flag could be useful in this case, since it can be arranged (by filling in the policy) to not be an incompatible userspace ABI change, but would then going forward prevent forgetting attribute entries. Similar can apply to the POLICY flag. We end up with the following renames: * nla_parse -> nla_parse_deprecated * nla_parse_strict -> nla_parse_deprecated_strict * nlmsg_parse -> nlmsg_parse_deprecated * nlmsg_parse_strict -> nlmsg_parse_deprecated_strict * nla_parse_nested -> nla_parse_nested_deprecated * nla_validate_nested -> nla_validate_nested_deprecated Using spatch, of course: @@ expression TB, MAX, HEAD, LEN, POL, EXT; @@ -nla_parse(TB, MAX, HEAD, LEN, POL, EXT) +nla_parse_deprecated(TB, MAX, HEAD, LEN, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression NLH, HDRLEN, TB, MAX, POL, EXT; @@ -nlmsg_parse_strict(NLH, HDRLEN, TB, MAX, POL, EXT) +nlmsg_parse_deprecated_strict(NLH, HDRLEN, TB, MAX, POL, EXT) @@ expression TB, MAX, NLA, POL, EXT; @@ -nla_parse_nested(TB, MAX, NLA, POL, EXT) +nla_parse_nested_deprecated(TB, MAX, NLA, POL, EXT) @@ expression START, MAX, POL, EXT; @@ -nla_validate_nested(START, MAX, POL, EXT) +nla_validate_nested_deprecated(START, MAX, POL, EXT) @@ expression NLH, HDRLEN, MAX, POL, EXT; @@ -nlmsg_validate(NLH, HDRLEN, MAX, POL, EXT) +nlmsg_validate_deprecated(NLH, HDRLEN, MAX, POL, EXT) For this patch, don't actually add the strict, non-renamed versions yet so that it breaks compile if I get it wrong. Also, while at it, make nla_validate and nla_parse go down to a common __nla_validate_parse() function to avoid code duplication. Ultimately, this allows us to have very strict validation for every new caller of nla_parse()/nlmsg_parse() etc as re-introduced in the next patch, while existing things will continue to work as is. In effect then, this adds fully strict validation for any new command. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller Backport portions of this commit applicable to openvswitch and added necessary compatibility layer changes to support older kernels. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- acinclude.m4 | 3 +++ datapath/datapath.c | 4 ++-- datapath/flow_netlink.c | 9 +++++---- datapath/linux/compat/include/net/netlink.h | 12 ++++++++++-- datapath/meter.c | 8 +++++--- datapath/vport-vxlan.c | 4 ++-- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 729d2c65d..02efea6de 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1053,6 +1053,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_FIND_FIELD_IFELSE([$KSRC/include/net/genetlink.h], [genl_ops], [policy], [OVS_DEFINE([HAVE_GENL_OPS_POLICY])]) + OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], + [nla_parse_deprecated_strict], + [OVS_DEFINE([HAVE_NLA_PARSE_DEPRECATED_STRICT])]) if cmp -s datapath/linux/kcompat.h.new \ datapath/linux/kcompat.h >/dev/null 2>&1; then diff --git a/datapath/datapath.c b/datapath/datapath.c index f0c345723..a7af7849a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1401,8 +1401,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) u32 ufid_flags; int err; - err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a, - OVS_FLOW_ATTR_MAX, flow_policy, NULL); + err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a, + OVS_FLOW_ATTR_MAX, flow_policy, NULL); if (err) return err; ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index 9fc1a1922..d3fd77106 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -2859,8 +2859,8 @@ static int validate_userspace(const struct nlattr *attr) struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; - error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr, - userspace_policy, NULL); + error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr, + userspace_policy, NULL); if (error) return error; @@ -2891,8 +2891,9 @@ static int validate_and_copy_check_pkt_len(struct net *net, int nested_acts_start; int start, err; - err = nla_parse_nested(a, OVS_CHECK_PKT_LEN_ATTR_MAX, attr, - cpl_policy, NULL); + err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, + nla_data(attr), nla_len(attr), + cpl_policy, NULL); if (err) return err; diff --git a/datapath/linux/compat/include/net/netlink.h b/datapath/linux/compat/include/net/netlink.h index 34fc3460d..84e073974 100644 --- a/datapath/linux/compat/include/net/netlink.h +++ b/datapath/linux/compat/include/net/netlink.h @@ -143,6 +143,11 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value, #endif +#ifndef HAVE_NLA_PARSE_DEPRECATED_STRICT +#define nla_parse_nested_deprecated nla_parse_nested +#define nla_parse_deprecated_strict nla_parse +#define genlmsg_parse_deprecated genlmsg_parse + #ifndef HAVE_NETLINK_EXT_ACK struct netlink_ext_ack; @@ -153,7 +158,8 @@ static inline int rpl_nla_parse_nested(struct nlattr *tb[], int maxtype, { return nla_parse_nested(tb, maxtype, nla, policy); } -#define nla_parse_nested rpl_nla_parse_nested +#undef nla_parse_nested_deprecated +#define nla_parse_nested_deprecated rpl_nla_parse_nested static inline int rpl_nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, @@ -162,8 +168,10 @@ static inline int rpl_nla_parse(struct nlattr **tb, int maxtype, { return nla_parse(tb, maxtype, head, len, policy); } -#define nla_parse rpl_nla_parse +#undef nla_parse_deprecated_strict +#define nla_parse_deprecated_strict rpl_nla_parse #endif +#endif /* HAVE_NLA_PARSE_DEPRECATED_STRICT */ #ifndef HAVE_NLA_NEST_START_NOFLAG static inline struct nlattr *rpl_nla_nest_start_noflag(struct sk_buff *skb, diff --git a/datapath/meter.c b/datapath/meter.c index 8cecd5a34..92c9c3671 100644 --- a/datapath/meter.c +++ b/datapath/meter.c @@ -239,9 +239,11 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; u32 band_max_delta_t; - err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, - nla_data(nla), nla_len(nla), band_policy, - NULL); + err = nla_parse_deprecated_strict((struct nlattr **)&attr, + OVS_BAND_ATTR_MAX, + nla_data(nla), + nla_len(nla), + band_policy, NULL); if (err) goto exit_free_meter; diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c index 70ed376e3..79331c968 100644 --- a/datapath/vport-vxlan.c +++ b/datapath/vport-vxlan.c @@ -99,8 +99,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr, if (nla_len(attr) < sizeof(struct nlattr)) return -EINVAL; - err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy, - NULL); + err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr, + exts_policy, NULL); if (err < 0) return err; -- GitLab From 73a6e112a5395c28b22126a205ce3f9ed17739ca Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 6 Mar 2020 14:37:20 -0800 Subject: [PATCH 057/432] datapath: conntrack: mark expected switch fall-through Upstream commit: commit 279badc2a85be83e0187b8c566e3b476b76a87a2 Author: Gustavo A. R. Silva Date: Thu Oct 19 12:55:03 2017 -0500 openvswitch: conntrack: mark expected switch fall-through In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. Notice that in this particular case I placed a "fall through" comment on its own line, which is what GCC is expecting to find. Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- datapath/conntrack.c | 1 + 1 file changed, 1 insertion(+) diff --git a/datapath/conntrack.c b/datapath/conntrack.c index b7eb53f93..5b4d6cce0 100644 --- a/datapath/conntrack.c +++ b/datapath/conntrack.c @@ -844,6 +844,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } } /* Non-ICMP, fall thru to initialize if needed. */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. -- GitLab From 59e994426645358a271a0c9f485e9defafffd474 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 6 Mar 2020 14:37:21 -0800 Subject: [PATCH 058/432] datapath: Update kernel test list, news and FAQ We are adding support for Linux kernels up to 5.5 so update the Travis test list, NEWS and FAQ. Signed-off-by: Greg Rose Signed-off-by: Ben Pfaff --- .travis.yml | 2 +- Documentation/faq/releases.rst | 1 + NEWS | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index abd2a9117..ef9f86755 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,7 +38,7 @@ env: - TESTSUITE=1 OPTS="--enable-shared" - TESTSUITE=1 DPDK=1 - TESTSUITE=1 LIBS=-ljemalloc - - KERNEL_LIST="5.0 4.20 4.19 4.18 4.17 4.16" + - KERNEL_LIST="5.5 4.20 4.19 4.18 4.17 4.16" - KERNEL_LIST="4.15 4.14 4.9 4.4 3.19 3.16" - AFXDP=1 KERNEL=5.3 - M32=1 OPTS="--disable-ssl" diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 6ff47d788..748540b91 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -70,6 +70,7 @@ Q: What Linux kernel versions does each Open vSwitch release work with? 2.10.x 3.10 to 4.17 2.11.x 3.10 to 4.18 2.12.x 3.10 to 5.0 + 2.14.x 3.10 to 5.5 ============ ============== Open vSwitch userspace should also work with the Linux kernel module built diff --git a/NEWS b/NEWS index 872e1863c..32ca2e0c6 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,8 @@ Post-v2.13.0 - DPDK: * Deprecated DPDK pdump packet capture support removed. * Deprecated DPDK ring ports (dpdkr) are no longer supported. + - Linux datapath: + * Support for kernel versions up to 5.5.x. v2.13.0 - 14 Feb 2020 -- GitLab From 451dba559e03fdb9ca72f7a055362423447a5303 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 12 Mar 2020 10:57:44 +0100 Subject: [PATCH 059/432] travis: Disable sindex build in sparse. Sparse introduced a new utility 'sindex' for semantic search, but unfortunately it fails to build in Travis environment. Disabling it explicitly as we don't need it anyway. Acked-by: Numan Siddique Signed-off-by: Ilya Maximets --- .travis/linux-prepare.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh index fda13e7d2..8cbbd5623 100755 --- a/.travis/linux-prepare.sh +++ b/.travis/linux-prepare.sh @@ -7,9 +7,11 @@ set -ev # Explicitly disable sparse support for llvm because some travis # environments claim to have LLVM (llvm-config exists and works) but # linking against it fails. +# Disabling sqlite support because sindex build fails and we don't +# really need this utility being installed. git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git cd sparse -make -j4 HAVE_LLVM= install +make -j4 HAVE_LLVM= HAVE_SQLITE= install cd .. pip3 install --disable-pip-version-check --user flake8 hacking -- GitLab From 6f37078481bc31b53090aa85bc45ce10206c3569 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Sun, 8 Mar 2020 14:50:23 +0200 Subject: [PATCH 060/432] tc: Fix nat port range when offloading ct action Port range struct is currently union so the last min/max port assignment wins, and kernel doesn't receive the range. Change it to struct type. Fixes: 2bf6ffb76ac6 ("netdev-offload-tc: Add conntrack nat support") Signed-off-by: Paul Blakey Signed-off-by: Ben Pfaff --- lib/tc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tc.h b/lib/tc.h index d31c0953e..24a4994fd 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -235,7 +235,7 @@ struct tc_action { } ipv6; }; - union { + struct { ovs_be16 min; ovs_be16 max; } port; -- GitLab From 4e1ce6f66e0a97781284e3ea41a3ad3e9a0f83a3 Mon Sep 17 00:00:00 2001 From: Yanqin Wei Date: Fri, 28 Feb 2020 00:12:21 +0800 Subject: [PATCH 061/432] pvector: Use acquire-release semantics for size. Read/write concurrency of pvector library is implemented by a temp vector and RCU protection. Considering performance reason, insertion does not follow this scheme. In insertion function, a thread fence ensures size increment is done after new entry is stored. But there is no barrier in the iteration fuction(pvector_cursor_init). Entry point access may be reordered before loading vector size, so the invalid entry point may be loaded when vector iteration. This patch fixes it by acquire-release pair. It can guarantee new size is observed by reader after new entry stored by writer. And this is implemented by one-way barrier instead of two-way memory fence. Fixes: fe7cfa5c3f19 ("lib/pvector: Non-intrusive RCU priority vector.") Reviewed-by: Gavin Hu Reviewed-by: Lijian Zhang Signed-off-by: Yanqin Wei Signed-off-by: Ilya Maximets --- lib/pvector.c | 18 +++++++++++------- lib/pvector.h | 13 +++++++++---- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/lib/pvector.c b/lib/pvector.c index aaeee9214..cc527fdc4 100644 --- a/lib/pvector.c +++ b/lib/pvector.c @@ -33,7 +33,7 @@ pvector_impl_alloc(size_t size) struct pvector_impl *impl; impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]); - impl->size = 0; + atomic_init(&impl->size, 0); impl->allocated = size; return impl; @@ -117,18 +117,22 @@ pvector_insert(struct pvector *pvec, void *ptr, int priority) { struct pvector_impl *temp = pvec->temp; struct pvector_impl *old = pvector_impl_get(pvec); + size_t size; ovs_assert(ptr != NULL); + /* There is no possible concurrent writer. Insertions must be protected + * by mutex or be always excuted from the same thread. */ + atomic_read_relaxed(&old->size, &size); + /* Check if can add to the end without reallocation. */ - if (!temp && old->allocated > old->size && - (!old->size || priority <= old->vector[old->size - 1].priority)) { - old->vector[old->size].ptr = ptr; - old->vector[old->size].priority = priority; + if (!temp && old->allocated > size && + (!size || priority <= old->vector[size - 1].priority)) { + old->vector[size].ptr = ptr; + old->vector[size].priority = priority; /* Size increment must not be visible to the readers before the new * entry is stored. */ - atomic_thread_fence(memory_order_release); - ++old->size; + atomic_store_explicit(&old->size, size + 1, memory_order_release); } else { if (!temp) { temp = pvector_impl_dup(old); diff --git a/lib/pvector.h b/lib/pvector.h index b990ed9d5..0d3290dc3 100644 --- a/lib/pvector.h +++ b/lib/pvector.h @@ -69,8 +69,8 @@ struct pvector_entry { }; struct pvector_impl { - size_t size; /* Number of entries in the vector. */ - size_t allocated; /* Number of allocated entries. */ + atomic_size_t size; /* Number of entries in the vector. */ + size_t allocated; /* Number of allocated entries. */ struct pvector_entry vector[]; }; @@ -181,12 +181,17 @@ pvector_cursor_init(const struct pvector *pvec, { const struct pvector_impl *impl; struct pvector_cursor cursor; + size_t size; impl = ovsrcu_get(struct pvector_impl *, &pvec->impl); - ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]); + /* Use memory_order_acquire to ensure entry access can not be + * reordered to happen before size read. */ + atomic_read_explicit(&CONST_CAST(struct pvector_impl *, impl)->size, + &size, memory_order_acquire); + ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]); - cursor.size = impl->size; + cursor.size = size; cursor.vector = impl->vector; cursor.entry_idx = -1; -- GitLab From ef32a1a334b14b20f7ad7abc5ea13bb59e9d1ac4 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 21 Feb 2020 15:41:50 +0100 Subject: [PATCH 062/432] dpif-netdev: Enter quiescent state after each offloading operation. If the offloading queue is big and filled continuously, offloading thread may have no chance to quiesce blocking rcu callbacks and other threads waiting for synchronization. Fix that by entering momentary quiescent state after each operation since we're not holding any rcu-protected memory here. Fixes: 02bb2824e51d ("dpif-netdev: do hw flow offload in a thread") Reported-by: Eli Britstein Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-February/049768.html Acked-by: Eli Britstein Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d393aab5e..a798db45d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2512,6 +2512,7 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED) VLOG_DBG("%s to %s netdev flow\n", ret == 0 ? "succeed" : "failed", op); dp_netdev_free_flow_offload(offload); + ovsrcu_quiesce(); } return NULL; -- GitLab From 65b84d4a32bdcb6ed8605988f4cedb58a753e184 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 11 Mar 2020 13:39:34 +0800 Subject: [PATCH 063/432] dpif-netlink: avoid netlink modify flow put op failed after tc modify flow put op failed. The tc modify flow put always delete the original flow first and then add the new flow. If the modfiy flow put operation failed, the flow put operation will change from modify to create if success to delete the original flow in tc (which will be always failed with ENOENT, the flow is already be deleted before add the new flow in tc). Finally, the modify flow put will failed to add in kernel datapath. Signed-off-by: wenxu Acked-by: Roi Dayan Signed-off-by: Simon Horman --- lib/dpif-netlink.c | 7 ++++++- lib/netdev-offload-tc.c | 2 +- lib/netdev-offload.h | 3 +++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 5b5c96d72..6d9427b57 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2091,6 +2091,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) info.tunnel_csum_on = csum_on; info.recirc_id_shared_with_tc = (dpif->user_features & OVS_DP_F_TC_RECIRC_SHARING); + info.tc_modify_flow_deleted = false; err = netdev_flow_put(dev, &match, CONST_CAST(struct nlattr *, put->actions), put->actions_len, @@ -2141,7 +2142,11 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) out: if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) { /* Modified rule can't be offloaded, try and delete from HW */ - int del_err = netdev_flow_del(dev, put->ufid, put->stats); + int del_err = 0; + + if (!info.tc_modify_flow_deleted) { + del_err = netdev_flow_del(dev, put->ufid, put->stats); + } if (!del_err) { /* Delete from hw success, so old flow was offloaded. diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 550e440b3..5e7b873c8 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1727,7 +1727,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, if (get_ufid_tc_mapping(ufid, &id) == 0) { VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", id.handle, id.prio); - del_filter_and_ufid_mapping(&id, ufid); + info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid); } prio = get_prio_for_tc_flower(&flower); diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index cd6dfdfff..b4b882a56 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -74,6 +74,9 @@ struct offload_info { * it will be in the pkt meta data. */ uint32_t flow_mark; + + bool tc_modify_flow_deleted; /* Indicate the tc modify flow put success + * to delete the original flow. */ }; int netdev_flow_flush(struct netdev *); -- GitLab From 8c544570cb3a1e09009e2d637cbd7075afc7ed03 Mon Sep 17 00:00:00 2001 From: Usman Ansari Date: Thu, 19 Mar 2020 14:47:17 -0700 Subject: [PATCH 064/432] hmap: Fix Coverity false positive Coverity reports a false positive below: Incorrect expression, Assign_where_compare_meant: use of "=" where "==" may have been intended. Fixed it by rewriting '(NODE = NULL)' as '((NODE = NULL), false)'. "make check" passes for this change Coverity reports over 500 errors resolved Suggested-by: Ben Pfaff Signed-off-by: Usman Ansari Signed-off-by: Ben Pfaff --- include/openvswitch/hmap.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h index 8aea9c22d..4e001cc69 100644 --- a/include/openvswitch/hmap.h +++ b/include/openvswitch/hmap.h @@ -136,12 +136,14 @@ struct hmap_node *hmap_random_node(const struct hmap *); */ #define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \ for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \ + (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ + || ((NODE = NULL), false); \ ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \ MEMBER)) #define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \ for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \ + (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ + || ((NODE = NULL), false); \ ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER)) static inline struct hmap_node *hmap_first_with_hash(const struct hmap *, @@ -170,7 +172,8 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0) #define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \ for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \ + (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ + || ((NODE = NULL), false); \ ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) /* Safe when NODE may be freed (not needed when NODE may be removed from the @@ -179,7 +182,8 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0) #define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \ for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \ - ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL) \ + ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ + || ((NODE = NULL), false) \ ? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \ : 0); \ (NODE) = (NEXT)) @@ -190,7 +194,8 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *); #define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \ for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \ __VA_ARGS__; \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \ + (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ + || ((NODE = NULL), false); \ ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER)) static inline struct hmap_node * @@ -211,7 +216,8 @@ hmap_pop_helper__(struct hmap *hmap, size_t *bucket) { #define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \ for (size_t bucket__ = 0; \ INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \ - (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL);) + (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \ + || ((NODE = NULL), false);) static inline struct hmap_node *hmap_first(const struct hmap *); static inline struct hmap_node *hmap_next(const struct hmap *, -- GitLab From 075e1c4967e5872a7c58aebeca34df1dd78b5e23 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 11 Mar 2020 10:49:17 -0700 Subject: [PATCH 065/432] Documentation: Add note about iproute2 requirements for check-kmod On many systems the check-kmod and check-kernel test suites have many failures due to the lack of feature support in the older iproute2 utility packages shipped with those systems. Add a note indicating that it might be necessary to update the iproute2 utility package in order to fix those errors. Signed-off-by: Greg Rose Signed-off-by: William Tu --- Documentation/topics/testing.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/topics/testing.rst b/Documentation/topics/testing.rst index 161e9d442..b9fa94dda 100644 --- a/Documentation/topics/testing.rst +++ b/Documentation/topics/testing.rst @@ -381,6 +381,17 @@ The results of the testsuite are in ``tests/system-kmod-testsuite.dir``. All the features documented under `Unit Tests`_ are available for the kernel datapath testsuite. +.. note:: + Many of the kernel tests are dependent on the utilities present in the + iproute2 package, especially the 'ip' command. If there are many + otherwise unexplained errors it may be necessary to update the iproute2 + package utilities on the system. It is beyond the scope of this + documentation to explain all that is necessary to build and install + an updated iproute2 utilities package. The package is available from + the Linux kernel organization open source git repositories. + + https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git + .. _testing-static-analysis: Static Code Analysis -- GitLab From 9a8a18f9fa27f72cfdb9da75f10bffc9ab621e33 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 17 Mar 2020 16:12:21 -0700 Subject: [PATCH 066/432] conntrack: Fix NULL pointer dereference. Coverity CID 279957 reports NULL pointer derefence when 'conn' is NULL and calling ct_print_conn_info. Cc: Usman Ansari Signed-off-by: William Tu Acked-by: Dumitru Ceara --- lib/conntrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/conntrack.c b/lib/conntrack.c index ff5a89457..001a37ff6 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -1302,7 +1302,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, if (!conn) { pkt->md.ct_state |= CS_TRACKED | CS_INVALID; char *log_msg = xasprintf("Missing master conn %p", rev_conn); - ct_print_conn_info(conn, log_msg, VLL_INFO, true, true); + ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); free(log_msg); return; } -- GitLab From 323ae1e808e6ac503f5c7ddd50a79d908fdd0e41 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 19 Mar 2020 17:53:10 -0700 Subject: [PATCH 067/432] ofproto-dpif-xlate: Fix recirculation when in_port is OFPP_CONTROLLER. Recirculation usually requires finding the pre-recirculation input port. Packets sent by the controller, with in_port of OFPP_CONTROLLER or OFPP_NONE, do not have a real input port data structure, only a port number. The code in xlate_lookup_ofproto_() mishandled this case, failing to return the ofproto data structure. This commit fixes the problem and adds a test to guard against regression. Reported-by: Numan Siddique Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-March/368642.html Tested-by: Numan Siddique Acked-by: Numan Siddique Signed-off-by: Ben Pfaff --- ofproto/ofproto-dpif-xlate.c | 25 +++++++++++++++++++++---- tests/ofproto-dpif.at | 30 ++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 4 deletions(-) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index adf57a5e8..28dcc67dd 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1520,15 +1520,32 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer, return NULL; } - /* If recirculation was initiated due to bond (in_port = OFPP_NONE) - * then frozen state is static and xport_uuid is not defined, so xport - * cannot be restored from frozen state. */ - if (recirc_id_node->state.metadata.in_port != OFPP_NONE) { + ofp_port_t in_port = recirc_id_node->state.metadata.in_port; + if (in_port != OFPP_NONE && in_port != OFPP_CONTROLLER) { struct uuid xport_uuid = recirc_id_node->state.xport_uuid; xport = xport_lookup_by_uuid(xcfg, &xport_uuid); if (xport && xport->xbridge && xport->xbridge->ofproto) { goto out; } + } else { + /* OFPP_NONE and OFPP_CONTROLLER are not real ports. They indicate + * that the packet originated from the controller via an OpenFlow + * "packet-out". The right thing to do is to find just the + * ofproto. There is no xport, which is OK. + * + * OFPP_NONE can also indicate that a bond caused recirculation. */ + struct uuid uuid = recirc_id_node->state.ofproto_uuid; + const struct xbridge *bridge = xbridge_lookup_by_uuid(xcfg, &uuid); + if (bridge && bridge->ofproto) { + if (errorp) { + *errorp = NULL; + } + *xportp = NULL; + if (ofp_in_port) { + *ofp_in_port = in_port; + } + return bridge->ofproto; + } } } diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index ff1cc9370..d444cf084 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -5171,6 +5171,36 @@ AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2 OVS_VSWITCHD_STOP AT_CLEANUP +# Checks for regression against a bug in which OVS dropped packets +# with in_port=CONTROLLER when they were recirculated (because +# CONTROLLER isn't a real port and could not be looked up). +AT_SETUP([ofproto-dpif - packet-out recirculation]) +OVS_VSWITCHD_START +add_of_ports br0 1 2 + +AT_DATA([flows.txt], [dnl +table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1) +table=1 ip actions=ct(commit),output:2 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000 +AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller packet=$packet actions=table"]) + +# Dumps out the flow table, extracts the number of packets that have gone +# through the (single) flow in table 1, and returns success if it's exactly 1. +# +# If this remains 0, then the recirculation isn't working properly since the +# packet never goes through flow in table 1. +check_flows () { + n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p') + echo "n_packets=$n" + test "$n" = 1 +} +OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0]) + +OVS_VSWITCHD_STOP +AT_CLEANUP AT_SETUP([ofproto-dpif - debug_slow action]) OVS_VSWITCHD_START -- GitLab From 047b920ea66d128575211e79f9f91be89bdce270 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 17 Mar 2020 16:31:55 -0700 Subject: [PATCH 068/432] ofp-actions: Fix memory leak. Coverity CID 279274 reports leaking previously allocated 'error' buffer when 'return xasprintf("input too big");'. Cc: Usman Ansari Signed-off-by: William Tu Reviewed-by: Greg Rose --- lib/ofp-actions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index ddef3b0c8..ef8b2b452 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -6657,6 +6657,7 @@ parse_CT(char *arg, const struct ofpact_parse_params *pp) } if (ofpbuf_oversized(pp->ofpacts)) { + free(error); return xasprintf("input too big"); } -- GitLab From 9435b0b8e6b89ddaec7f0a23ce613f4ae5a1f70b Mon Sep 17 00:00:00 2001 From: Terry Wilson Date: Fri, 20 Mar 2020 15:22:38 +0000 Subject: [PATCH 069/432] Handle refTable values with setkey() For columns like QoS.queues where we have a map containing refTable values, assigning w/ __setattr__ e.g. qos.queues={1: $queue_row} works, but using using qos.setkey('queues', 1, $queue_row) results in an Exception. The opdat argument can essentially just be the JSON representation of the map column instead of trying to build it. Signed-off-by: Terry Wilson Signed-off-by: Ben Pfaff --- python/ovs/db/idl.py | 3 +-- tests/idltest.ovsschema | 15 +++++++++++++++ tests/ovsdb-idl.at | 13 +++++++++++++ tests/test-ovsdb.py | 23 ++++++++++++++++++++++- 4 files changed, 51 insertions(+), 3 deletions(-) diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py index 020291d48..5850ac7ab 100644 --- a/python/ovs/db/idl.py +++ b/python/ovs/db/idl.py @@ -1567,10 +1567,9 @@ class Transaction(object): for col, val in row._mutations['_inserts'].items(): column = row._table.columns[col] if column.type.is_map(): - opdat = ["map"] datum = data.Datum.from_python(column.type, val, _row_to_uuid) - opdat.append(datum.as_list()) + opdat = self._substitute_uuids(datum.to_json()) else: opdat = ["set"] inner_opdat = [] diff --git a/tests/idltest.ovsschema b/tests/idltest.ovsschema index bee79fc50..e02b975bc 100644 --- a/tests/idltest.ovsschema +++ b/tests/idltest.ovsschema @@ -171,6 +171,21 @@ }, "isRoot" : false }, + "simple5": { + "columns" : { + "name": {"type": "string"}, + "irefmap": { + "type": { + "key": {"type": "integer"}, + "value": {"type": "uuid", + "refTable": "simple3"}, + "min": 0, + "max": "unlimited" + } + } + }, + "isRoot": true + }, "singleton" : { "columns" : { "name" : { diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index cc38d69c1..564ef4c78 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -955,6 +955,7 @@ AT_CHECK([sort stdout | uuidfilt], [0], # Check that ovsdb-idl figured out that table link2 and column l2 are missing. AT_CHECK([grep ovsdb_idl stderr | sort], [0], [dnl test-ovsdb|ovsdb_idl|idltest database lacks link2 table (database needs upgrade?) +test-ovsdb|ovsdb_idl|idltest database lacks simple5 table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks singleton table (database needs upgrade?) test-ovsdb|ovsdb_idl|link1 table in idltest database lacks l2 column (database needs upgrade?) ]) @@ -1288,6 +1289,18 @@ OVSDB_CHECK_IDL_PY([partial-map idl], 009: done ]]) +OVSDB_CHECK_IDL_PY([partial-map update set refmap idl], +[['["idltest", {"op":"insert", "table":"simple3", "row":{"name":"myString1"}}, + {"op":"insert", "table":"simple5", "row":{"name":"myString2"}}]']], +['partialmapmutateirefmap'], +[[000: name=myString1 uset=[] +000: name=myString2 irefmap=[] +001: commit, status=success +002: name=myString1 uset=[] +002: name=myString2 irefmap=[(1 <0>)] +003: done +]]) + m4_define([OVSDB_CHECK_IDL_PARTIAL_UPDATE_SET_COLUMN], [AT_SETUP([$1 - C]) AT_KEYWORDS([ovsdb server idl partial update set column positive $5]) diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py index 1b94b79a0..a19680274 100644 --- a/tests/test-ovsdb.py +++ b/tests/test-ovsdb.py @@ -28,6 +28,7 @@ import ovs.util import ovs.vlog from ovs.db import data from ovs.db import error +from ovs.db.idl import _row_to_uuid as row_to_uuid from ovs.fatal_signal import signal_alarm vlog = ovs.vlog.Vlog("test-ovsdb") @@ -159,7 +160,8 @@ def get_simple_printable_row_string(row, columns): is ovs.db.data.Atom): value = getattr(row, column) if isinstance(value, dict): - value = sorted(value.items()) + value = sorted((row_to_uuid(k), row_to_uuid(v)) + for k, v in value.items()) s += "%s=%s " % (column, value) s = s.strip() s = re.sub('""|,|u?\'', "", s) @@ -212,6 +214,14 @@ def print_idl(idl, step): print(s) n += 1 + if "simple5" in idl.tables: + simple5 = idl.tables["simple5"].rows + for row in simple5.values(): + s = "%03d: " % step + s += get_simple_printable_row_string(row, ["name", "irefmap"]) + print(s) + n += 1 + if "link1" in idl.tables: l1 = idl.tables["link1"].rows for row in l1.values(): @@ -303,6 +313,11 @@ def idltest_find_simple3(idl, i): return next(idl.index_equal("simple3", "simple3_by_name", i), None) +def idltest_find(idl, table, col, match): + return next((r for r in idl.tables[table].rows.values() if + getattr(r, col) == match), None) + + def idl_set(idl, commands, step): txn = ovs.db.idl.Transaction(idl) increment = False @@ -524,6 +539,12 @@ def idl_set(idl, commands, step): setattr(new_row3, 'name', 'String3') new_row3.addvalue('uset', new_row41.uuid) assert len(getattr(new_row3, 'uset', [])) == 1 + elif name == 'partialmapmutateirefmap': + row3 = idltest_find_simple3(idl, "myString1") + row5 = idltest_find(idl, "simple5", "name", "myString2") + row5.setkey('irefmap', 1, row3.uuid) + maplen = len(row5.irefmap) + assert maplen == 1, "expected 1, got %d" % maplen else: sys.stderr.write("unknown command %s\n" % name) sys.exit(1) -- GitLab From 21005175b68b64b6b7f205ee34d2d5cbdb8ead22 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 20 Mar 2020 08:29:13 -0700 Subject: [PATCH 070/432] AUTHORS: Add Usman Ansari. Signed-off-by: William Tu --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index ea9d7097e..61a3f6117 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -383,6 +383,7 @@ Tuan Nguyen tuan.nguyen@veriksystems.com Tyler Coumbes coumbes@gmail.com Tony van der Peet tony.vanderpeet@alliedtelesis.co.nz Tonghao Zhang xiangxia.m.yue@gmail.com +Usman Ansari ua1422@gmail.com Valient Gough vgough@pobox.com Venkata Anil Kommaddi vkommadi@redhat.com Vishal Deep Ajmera vishal.deep.ajmera@ericsson.com -- GitLab From e86ec1e7e079718aed8d08aad4d976909030f4c6 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 19 Mar 2020 16:02:56 -0700 Subject: [PATCH 071/432] ofproto: Fix typo in manpage fragment. There was a missing ] and an extra space. Acked-by: Numan Siddique Signed-off-by: Ben Pfaff --- ofproto/ofproto-unixctl.man | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man index 925752343..095afd57c 100644 --- a/ofproto/ofproto-unixctl.man +++ b/ofproto/ofproto-unixctl.man @@ -9,7 +9,7 @@ that may be used on \fBofproto/trace\fR. .IP "\fBofproto/trace\fR [\fIoptions\fR] [\fIdpname\fR] \fIodp_flow\fR [\fIpacket\fR] .IQ "\fBofproto/trace\fR [\fIoptions\fR] \fIbridge\fR \fIbr_flow\fR [\fIpacket\fR]] .IQ "\fBofproto/trace\-packet\-out\fR [\fIoptions\fR] [\fIdpname\fR] \fIodp_flow\fR [\fIpacket\fR] \fIactions\fR" -.IQ "\fBofproto/trace\-packet\-out\fR [\fIoptions\fR \fIbridge\fR \fIbr_flow\fR [\fIpacket\fR] \fIactions\fR" +.IQ "\fBofproto/trace\-packet\-out\fR [\fIoptions\fR] \fIbridge\fR \fIbr_flow\fR [\fIpacket\fR] \fIactions\fR" Traces the path of an imaginary packet through \fIswitch\fR and reports the path that it took. The initial treatment of the packet varies based on the command: -- GitLab From e7d6922c0e58c2858bd6c6f3480d1654bf5d8694 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 20 Mar 2020 13:54:50 -0700 Subject: [PATCH 072/432] trivial: Fix indentation. Add extra space to fix indentation. Signed-off-by: William Tu Acked-by: Ben Pfaff --- lib/fatal-signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c index 09f7c6ecf..ba7f5bfd3 100644 --- a/lib/fatal-signal.c +++ b/lib/fatal-signal.c @@ -184,7 +184,7 @@ send_backtrace_to_monitor(void) { unw_get_reg(&cursor, UNW_REG_IP, &unw_bt[dep].ip); unw_get_proc_name(&cursor, unw_bt[dep].func, UNW_MAX_FUNCN, &unw_bt[dep].offset); - dep++; + dep++; } ignore(write(daemonize_fd, unw_bt, dep * sizeof(struct unw_backtrace))); -- GitLab From c59922767ea3812763686613444a342e5daa67f7 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 23 Mar 2020 07:56:47 -0700 Subject: [PATCH 073/432] trivial: Fix typo in comments. s/daemon_complete/daemonize_complete/ Signed-off-by: William Tu --- lib/daemon-unix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c index 7e48630f0..6b2a5b9bd 100644 --- a/lib/daemon-unix.c +++ b/lib/daemon-unix.c @@ -434,8 +434,8 @@ monitor_daemon(pid_t daemon_pid) /* If daemonization is configured, then starts daemonization, by forking and * returning in the child process. The parent process hangs around until the * child lets it know either that it completed startup successfully (by calling - * daemon_complete()) or that it failed to start up (by exiting with a nonzero - * exit code). */ + * daemonize_complete()) or that it failed to start up (by exiting with a + * nonzero exit code). */ void daemonize_start(bool access_datapath) { -- GitLab From ecd4a8fcdff2ebf35cdb36355167d34e99df6dd5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 23 Mar 2020 07:44:48 -0700 Subject: [PATCH 074/432] fatal-signal: Log backtrace when no monitor daemon. Currently the backtrace logging is only available when monitor daemon is running. This patch enables backtrace logging when no monitor daemon exists. At signal handling context, it detects whether monitor daemon exists. If not, write directly the backtrace to the vlog fd. Note that using VLOG_* macro doesn't work due to it's buffer I/O, so this patch directly issue write() syscall to the file descriptor. For some system we stop using monitor daemon and use systemd to monitor ovs-vswitchd, thus need this patch. Example of ovs-vswitchd.log (note that there is no timestamp printed): 2020-03-23T14:42:12.949Z|00049|memory|INFO|175332 kB peak resident 2020-03-23T14:42:12.949Z|00050|memory|INFO|handlers:2 ports:3 reva SIGSEGV detected, backtrace: 0x0000000000486969 0x00007f7f5e57f4b0 0x000000000047daa8 0x0000000000504edd 0x00007f7f5f0476ba 0x00007f7f5e65141d 0x0000000000000000 <+0x0> Acked-by: Ben Pfaff Signed-off-by: William Tu --- include/openvswitch/vlog.h | 3 +++ lib/daemon-private.h | 1 + lib/daemon-unix.c | 2 +- lib/fatal-signal.c | 27 ++++++++++++++++++++++++++- lib/vlog.c | 6 ++++++ 5 files changed, 37 insertions(+), 2 deletions(-) diff --git a/include/openvswitch/vlog.h b/include/openvswitch/vlog.h index 19da4ab62..476bf3d6d 100644 --- a/include/openvswitch/vlog.h +++ b/include/openvswitch/vlog.h @@ -143,6 +143,9 @@ void vlog_set_syslog_method(const char *method); /* Configure syslog target. */ void vlog_set_syslog_target(const char *target); +/* Return the log_fd. */ +int vlog_get_fd(void); + /* Initialization. */ void vlog_init(void); void vlog_enable_async(void); diff --git a/lib/daemon-private.h b/lib/daemon-private.h index 4e0667601..2b90e0042 100644 --- a/lib/daemon-private.h +++ b/lib/daemon-private.h @@ -20,6 +20,7 @@ extern bool detach; extern char *pidfile; extern int daemonize_fd; +extern bool monitor; char *make_pidfile_name(const char *name); diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c index 6b2a5b9bd..ae59ecf2c 100644 --- a/lib/daemon-unix.c +++ b/lib/daemon-unix.c @@ -80,7 +80,7 @@ int daemonize_fd = -1; /* --monitor: Should a supervisory process monitor the daemon and restart it if * it dies due to an error signal? */ -static bool monitor; +bool monitor; /* --user: Only root can use this option. Switch to new uid:gid after * initially running as root. */ diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c index ba7f5bfd3..4965c1ae8 100644 --- a/lib/fatal-signal.c +++ b/lib/fatal-signal.c @@ -187,7 +187,32 @@ send_backtrace_to_monitor(void) { dep++; } - ignore(write(daemonize_fd, unw_bt, dep * sizeof(struct unw_backtrace))); + if (monitor) { + ignore(write(daemonize_fd, unw_bt, + dep * sizeof(struct unw_backtrace))); + } else { + /* Since there is no monitor daemon running, write backtrace + * in current process. This is not asyn-signal-safe due to + * use of snprintf(). + */ + char str[] = "SIGSEGV detected, backtrace:\n"; + + if (vlog_get_fd() < 0) { + return; + } + + ignore(write(vlog_get_fd(), str, strlen(str))); + + for (int i = 0; i < dep; i++) { + char line[64]; + + snprintf(line, 64, "0x%016"PRIxPTR" <%s+0x%"PRIxPTR">\n", + unw_bt[i].ip, + unw_bt[i].func, + unw_bt[i].offset); + ignore(write(vlog_get_fd(), line, strlen(line))); + } + } } #else static inline void diff --git a/lib/vlog.c b/lib/vlog.c index 559943d87..502b33fc8 100644 --- a/lib/vlog.c +++ b/lib/vlog.c @@ -612,6 +612,12 @@ vlog_set_syslog_target(const char *target) ovs_rwlock_unlock(&pattern_rwlock); } +int +vlog_get_fd(void) +{ + return log_fd; +} + /* Returns 'false' if 'facility' is not a valid string. If 'facility' * is a valid string, sets 'value' with the integer value of 'facility' * and returns 'true'. */ -- GitLab From ae2d6e3f5b066fe0f64a3c03b68501022450ad30 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 23 Mar 2020 16:34:37 -0700 Subject: [PATCH 075/432] lockfile: Fix OVS_REQUIRES macro. Pass lock objects, not their addresses, to the annotation macros. Fixes: f21fa45f3085 ("lockfile: Minor code cleanup.") Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666098338 Signed-off-by: William Tu Acked-by: Ben Pfaff --- lib/lockfile.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/lockfile.c b/lib/lockfile.c index 36728ff91..42782d29e 100644 --- a/lib/lockfile.c +++ b/lib/lockfile.c @@ -61,9 +61,9 @@ static struct hmap *const lock_table OVS_GUARDED_BY(lock_table_mutex) static void lockfile_unhash(struct lockfile *); static int lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep) - OVS_REQUIRES(&lock_table_mutex); + OVS_REQUIRES(lock_table_mutex); static void lockfile_do_unlock(struct lockfile * lockfile) - OVS_REQUIRES(&lock_table_mutex); + OVS_REQUIRES(lock_table_mutex); /* Returns the name of the lockfile that would be created for locking a file * named 'filename_'. The caller is responsible for freeing the returned name, @@ -188,7 +188,7 @@ lockfile_hash(dev_t device, ino_t inode) } static struct lockfile * -lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(&lock_table_mutex) +lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(lock_table_mutex) { struct lockfile *lockfile; @@ -202,7 +202,7 @@ lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(&lock_table_mutex) } static void -lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(&lock_table_mutex) +lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(lock_table_mutex) { if (lockfile->fd >= 0) { close(lockfile->fd); @@ -213,7 +213,7 @@ lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(&lock_table_mutex) static struct lockfile * lockfile_register(const char *name, dev_t device, ino_t inode, int fd) - OVS_REQUIRES(&lock_table_mutex) + OVS_REQUIRES(lock_table_mutex) { struct lockfile *lockfile; @@ -236,7 +236,7 @@ lockfile_register(const char *name, dev_t device, ino_t inode, int fd) #ifdef _WIN32 static void lockfile_do_unlock(struct lockfile *lockfile) - OVS_REQUIRES(&lock_table_mutex) + OVS_REQUIRES(lock_table_mutex) { if (lockfile->fd >= 0) { OVERLAPPED overl; @@ -252,7 +252,7 @@ lockfile_do_unlock(struct lockfile *lockfile) static int lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep) - OVS_REQUIRES(&lock_table_mutex) + OVS_REQUIRES(lock_table_mutex) { HANDLE lock_handle; BOOL retval; @@ -306,7 +306,7 @@ lockfile_do_unlock(struct lockfile *lockfile) static int lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep) - OVS_REQUIRES(&lock_table_mutex) + OVS_REQUIRES(lock_table_mutex) { struct flock l; struct stat s; -- GitLab From ecbc7f0aa2e112afc5ce63cf8a20ebd41e20b73b Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 24 Mar 2020 07:17:02 -0700 Subject: [PATCH 076/432] fatal-signal: Fix clang error due to lock. Due to not acquiring lock, clang reports: lib/vlog.c:618:12: error: reading variable 'log_fd' requires holding mutex 'log_file_mutex' [-Werror,-Wthread-safety-analysis] return log_fd; The patch fixes it by creating a function in vlog.c to write directly to log file unsafely. Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666165883 Fixes: ecd4a8fcdff2 ("fatal-signal: Log backtrace when no monitor daemon.") Suggested-by: Ilya Maximets Acked-by: Ilya Maximets Signed-off-by: William Tu --- include/openvswitch/vlog.h | 4 ++-- lib/fatal-signal.c | 8 ++------ lib/vlog.c | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/openvswitch/vlog.h b/include/openvswitch/vlog.h index 476bf3d6d..886fce5e0 100644 --- a/include/openvswitch/vlog.h +++ b/include/openvswitch/vlog.h @@ -143,8 +143,8 @@ void vlog_set_syslog_method(const char *method); /* Configure syslog target. */ void vlog_set_syslog_target(const char *target); -/* Return the log_fd. */ -int vlog_get_fd(void); +/* Write directly to log file. */ +void vlog_direct_write_to_log_file_unsafe(const char *s); /* Initialization. */ void vlog_init(void); diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c index 4965c1ae8..51cf628d9 100644 --- a/lib/fatal-signal.c +++ b/lib/fatal-signal.c @@ -197,11 +197,7 @@ send_backtrace_to_monitor(void) { */ char str[] = "SIGSEGV detected, backtrace:\n"; - if (vlog_get_fd() < 0) { - return; - } - - ignore(write(vlog_get_fd(), str, strlen(str))); + vlog_direct_write_to_log_file_unsafe(str); for (int i = 0; i < dep; i++) { char line[64]; @@ -210,7 +206,7 @@ send_backtrace_to_monitor(void) { unw_bt[i].ip, unw_bt[i].func, unw_bt[i].offset); - ignore(write(vlog_get_fd(), line, strlen(line))); + vlog_direct_write_to_log_file_unsafe(line); } } } diff --git a/lib/vlog.c b/lib/vlog.c index 502b33fc8..6d17d4837 100644 --- a/lib/vlog.c +++ b/lib/vlog.c @@ -612,10 +612,19 @@ vlog_set_syslog_target(const char *target) ovs_rwlock_unlock(&pattern_rwlock); } -int -vlog_get_fd(void) +/* + * This function writes directly to log file without using async writer or + * taking a lock. Caller must hold 'log_file_mutex' or be sure that it's + * not necessary. Could be used in exceptional cases like dumping of backtrace + * on fatal signals. + */ +void +vlog_direct_write_to_log_file_unsafe(const char *s) + OVS_NO_THREAD_SAFETY_ANALYSIS { - return log_fd; + if (log_fd >= 0) { + ignore(write(log_fd, s, strlen(s))); + } } /* Returns 'false' if 'facility' is not a valid string. If 'facility' -- GitLab From af8169b42d796365ed798a7bcd3af483521a2bee Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 19 Mar 2020 20:21:16 +0100 Subject: [PATCH 077/432] conntrack: Reset ct_state when entering a new zone. When a new conntrack zone is entered, the ct_state field is zeroed in order to avoid using state information from different zones. One such scenario is when a packet is double NATed. Assuming two zones and 3 flows performing the following actions in order on the packet: 1. ct(zone=5,nat), recirc 2. ct(zone=1), recirc 3. ct(zone=1,nat) If at step #1 the packet matches an existing NAT entry, it will get translated and pkt->md.ct_state is set to CS_DST_NAT or CS_SRC_NAT. At step #2 the new tuple might match an existing connection and pkt->md.ct_zone is set to 1. If at step #3 the packet matches an existing NAT entry in zone 1, handle_nat() will be called to perform the translation but it will return early because the packet's zone matches the conntrack zone and the ct_state field still contains CS_DST_NAT or CS_SRC_NAT from the translations in zone 5. In order to reliably detect when a packet enters a new conntrack zone we also need to make sure that the pkt->md.ct_zone is properly initialized if pkt->md.ct_state is non-zero. This already happens for most cases. The only exception is when matched conntrack connection is of type CT_CONN_TYPE_UN_NAT and the master connection is missing. To cover this path we now call write_ct_md() in that case too. Remove setting the CS_TRACKED flag as in this case as it will be done by the new call to write_ct_md(). CC: Darrell Ball Fixes: 286de2729955 ("dpdk: Userspace Datapath: Introduce NAT Support.") Acked-by: Ilya Maximets Acked-by: Aaron Conole Signed-off-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- lib/conntrack.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/conntrack.c b/lib/conntrack.c index 001a37ff6..0cbc8f6d2 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -1277,6 +1277,11 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, const struct nat_action_info_t *nat_action_info, ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper) { + /* Reset ct_state whenever entering a new zone. */ + if (pkt->md.ct_state && pkt->md.ct_zone != zone) { + pkt->md.ct_state = 0; + } + bool create_new_conn = false; conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply); struct conn *conn = ctx->conn; @@ -1300,7 +1305,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply); if (!conn) { - pkt->md.ct_state |= CS_TRACKED | CS_INVALID; + pkt->md.ct_state |= CS_INVALID; + write_ct_md(pkt, zone, NULL, NULL, NULL); char *log_msg = xasprintf("Missing master conn %p", rev_conn); ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); free(log_msg); -- GitLab From 7fb890db313cd3e4218a529c8e1226d4ea088ed5 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 24 Mar 2020 08:42:02 -0700 Subject: [PATCH 078/432] compat: Fix nf_ip_hook parameters for RHEL 8 A RHEL release version check was only checking for RHEL releases greater than 7.0 so that ended up including a compat fixup that is not needed for 8.0. Fix up the version check. Signed-off-by: Greg Rose Acked-by: Yi-Hung Wei Signed-off-by: William Tu --- datapath/linux/compat/stt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c index 7b46d1a20..8a5853f19 100644 --- a/datapath/linux/compat/stt.c +++ b/datapath/linux/compat/stt.c @@ -1559,7 +1559,7 @@ static void clean_percpu(struct work_struct *work) #endif #ifdef HAVE_NF_HOOK_STATE -#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0) +#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0) && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0) /* RHEL nfhook hacks. */ #ifndef __GENKSYMS__ #define LAST_PARAM const struct net_device *in, const struct net_device *out, \ -- GitLab From a82083ee309187d7a5dc866778478905d8140338 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 24 Mar 2020 08:42:03 -0700 Subject: [PATCH 079/432] Documentation: Add extra repo info for RHEL 8 The extra development repo for RHEL 8 has changed. Document it. Signed-off-by: Greg Rose Acked-by: Yi-Hung Wei Signed-off-by: William Tu --- Documentation/intro/install/fedora.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/intro/install/fedora.rst b/Documentation/intro/install/fedora.rst index 6fe1fb5b2..e5324e1df 100644 --- a/Documentation/intro/install/fedora.rst +++ b/Documentation/intro/install/fedora.rst @@ -69,6 +69,10 @@ repositories to help yum-builddep, e.g.:: $ subscription-manager repos --enable=rhel-7-server-extras-rpms $ subscription-manager repos --enable=rhel-7-server-optional-rpms +or for RHEL 8:: + $ subscription-manager repos \ + --enable=codeready-builder-for-rhel-8-x86_64-rpms + DNF:: $ dnf builddep /tmp/ovs.spec -- GitLab From 1b6db6a5fa4b09444a9a30f4327ac7b9b8027741 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 24 Mar 2020 12:10:57 -0700 Subject: [PATCH 080/432] vlog: Fix OVS_REQUIRES macro. Pass lock objects, not their addresses, to the annotation macros. Signed-off-by: William Tu Acked-by: Ben Pfaff --- lib/vlog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/vlog.c b/lib/vlog.c index 6d17d4837..ee6b0d3a6 100644 --- a/lib/vlog.c +++ b/lib/vlog.c @@ -257,7 +257,7 @@ vlog_get_level(const struct vlog_module *module, } static void -update_min_level(struct vlog_module *module) OVS_REQUIRES(&log_file_mutex) +update_min_level(struct vlog_module *module) OVS_REQUIRES(log_file_mutex) { enum vlog_destination destination; -- GitLab From e99c53ee6167a3b12426ea1d9f913713dd80810b Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Tue, 24 Mar 2020 15:00:37 +0800 Subject: [PATCH 081/432] travis: Enable OvS Travis CI for arm Enable part of travis jobs with gcc compiler for arm64 architecture 1. Add arm jobs into the matrix in .travis.yml configuration file 2. To enable OVS-DPDK jobs, set the build target according to different CPU architectures 3. Temporarily disable sparse checker because of static code checking failure on arm64 Considering the balance of the CI coverage and running time, some kernel and DPDK jobs are removed from Arm CI. Successful travis build jobs report: https://travis-ci.org/github/yzyuestc/ovs/builds/666129448 Reviewed-by: Yanqin Wei Reviewed-by: Ruifeng Wang Reviewed-by: JingZhao Ni Reviewed-by: Gavin Hu Signed-off-by: Lance Yang Signed-off-by: Ilya Maximets --- .travis.yml | 15 +++++++++++++++ .travis/linux-build.sh | 13 +++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ef9f86755..11497588b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -51,6 +51,21 @@ matrix: - os: osx compiler: clang env: OPTS="--disable-ssl" + - arch: arm64 + compiler: gcc + env: OPTS="--disable-ssl" + - arch: arm64 + compiler: gcc + env: KERNEL_LIST="5.5 4.19" + - arch: arm64 + compiler: gcc + env: KERNEL_LIST="4.9 3.16" + - arch: arm64 + compiler: gcc + env: DPDK=1 OPTS="--enable-shared" + - arch: arm64 + compiler: gcc + env: DPDK_SHARED=1 script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 359f7773b..02615a8ec 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -6,7 +6,6 @@ set -x CFLAGS_FOR_OVS="-g -O2" SPARSE_FLAGS="" EXTRA_OPTS="--enable-Werror" -TARGET="x86_64-native-linuxapp-gcc" function install_kernel() { @@ -87,6 +86,16 @@ function install_dpdk() local DPDK_VER=$1 local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version" + if [ -z "$TRAVIS_ARCH" ] || + [ "$TRAVIS_ARCH" == "amd64" ]; then + TARGET="x86_64-native-linuxapp-gcc" + elif [ "$TRAVIS_ARCH" == "aarch64" ]; then + TARGET="arm64-armv8a-linuxapp-gcc" + else + echo "Target is unknown" + exit 1 + fi + if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then # Avoid using cache for git tree build. rm -rf dpdk-dir @@ -177,7 +186,7 @@ elif [ "$M32" ]; then # Adding m32 flag directly to CC to avoid any posiible issues with API/ABI # difference on 'configure' and 'make' stages. export CC="$CC -m32" -else +elif [ "$TRAVIS_ARCH" != "aarch64" ]; then OPTS="--enable-sparse" if [ "$AFXDP" ]; then # netdev-afxdp uses memset for 64M for umem initialization. -- GitLab From edc2055a2bf73258d5731a8f8853397190348b04 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Thu, 27 Feb 2020 17:22:32 +0200 Subject: [PATCH 082/432] netdev-offload-tc: Flush rules on ingress block when init tc flow api OVS can fail to attach ingress block on iface when init tc flow api, if block already exist with rules on it and is shared with other iface. Fix by flush all existing rules on the ingress block prior to deleting it. Fixes: 093c9458fb02 ("tc: allow offloading of block ids") Signed-off-by: Dmytro Linkin Acked-by: Raed Salem Acked-by: Roi Dayan Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 5e7b873c8..875ebef71 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1907,6 +1907,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev); uint32_t block_id = 0; + struct tcf_id id; int ifindex; int error; @@ -1917,6 +1918,14 @@ netdev_tc_init_flow_api(struct netdev *netdev) return -ifindex; } + block_id = get_block_id_from_netdev(netdev); + + /* Flush rules explicitly needed when we work with ingress_block, + * so we will not fail with reattaching block to bond iface, for ex. + */ + id = tc_make_tcf_id(ifindex, block_id, 0, hook); + tc_del_filter(&id); + /* make sure there is no ingress/egress qdisc */ tc_add_del_qdisc(ifindex, false, 0, hook); @@ -1930,7 +1939,6 @@ netdev_tc_init_flow_api(struct netdev *netdev) ovsthread_once_done(&multi_mask_once); } - block_id = get_block_id_from_netdev(netdev); error = tc_add_del_qdisc(ifindex, true, block_id, hook); if (error && error != EEXIST) { -- GitLab From f598f46212d698f091408abd6ff6905a16980169 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 25 Mar 2020 00:50:45 +0100 Subject: [PATCH 083/432] dpif-netdev: Force port reconfiguration to change dynamic_txqs. In case number of polling threads goes from exact number of Tx queues in port to higher value while set_tx_multiq() not implemented or not requesting reconfiguration, port will not be reconfigured and datapath will continue using static Tx queue ids leading to crash. Ex.: Assuming that port p0 supports up to 4 Tx queues and doesn't support set_tx_multiq() method. For example, netdev-afxdp could be the case, because it could have multiple Tx queues, but doesn't have set_tx_multiq() implementation because number of Tx queues always equals to number of Rx queues. 1. Configuring pmd-cpu-mask to have 3 pmd threads. 2. Adding port p0 to OVS. At this point wanted_txqs = 4 (3 for pmd threads + 1 for non-pmd). Port reconfigured to have 4 Tx queues successfully. dynamic_txqs = (4 < 4) = false; 3. Configuring pmd-cpu-mask to have 10 pmd threads. At this point wanted_txqs = 11 (10 for pmd threads + 1 for non-pmd). Since set_tx_multiq() is not implemented, netdev doesn't request reconfiguration and 'dynamic_txqs' remains in 'false' state. 4. Since 'dynamic_txqs == false', dpif-netdev uses static Tx queue ids that are in range [0, 10] while device only supports 4 leading to unwanted behavior and crashes. Fix that by marking for reconfiguration all the ports that will likely change their 'dynamic_txqs' value. It looks like the issue could be reproduced only with afxdp ports, because all other non-dpdk ports ignores Tx queue ids and dpdk ports requests for reconfiguration on set_tx_multiq(). Reported-by: William Tu Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-March/368364.html Fixes: e32971b8ddb4 ("dpif-netdev: Centralized threads and queues handling code.") Acked-by: Kevin Traynor Signed-off-by: Ilya Maximets Signed-off-by: William Tu --- lib/dpif-netdev.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a798db45d..e456cc9be 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -4941,9 +4941,17 @@ reconfigure_datapath(struct dp_netdev *dp) /* Check for all the ports that need reconfiguration. We cache this in * 'port->need_reconfigure', because netdev_is_reconf_required() can - * change at any time. */ + * change at any time. + * Also mark for reconfiguration all ports which will likely change their + * 'dynamic_txqs' parameter. It's required to stop using them before + * changing this setting and it's simpler to mark ports here and allow + * 'pmd_remove_stale_ports' to remove them from threads. There will be + * no actual reconfiguration in 'port_reconfigure' because it's + * unnecessary. */ HMAP_FOR_EACH (port, node, &dp->ports) { - if (netdev_is_reconf_required(port->netdev)) { + if (netdev_is_reconf_required(port->netdev) + || (port->dynamic_txqs + != (netdev_n_txq(port->netdev) < wanted_txqs))) { port->need_reconfigure = true; } } -- GitLab From 3c6d05a02e0fd2cde2f988a0e41a19f47c3d6947 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 25 Nov 2019 11:19:23 -0800 Subject: [PATCH 084/432] userspace: Add GTP-U support. GTP, GPRS Tunneling Protocol, is a group of IP-based communications protocols used to carry general packet radio service (GPRS) within GSM, UMTS and LTE networks. GTP protocol has two parts: Signalling (GTP-Control, GTP-C) and User data (GTP-User, GTP-U). GTP-C is used for setting up GTP-U protocol, which is an IP-in-UDP tunneling protocol. Usually GTP is used in connecting between base station for radio, Serving Gateway (S-GW), and PDN Gateway (P-GW). This patch implements GTP-U protocol for userspace datapath, supporting only required header fields and G-PDU message type. See spec in: https://tools.ietf.org/html/draft-hmm-dmm-5g-uplane-analysis-00 Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/666518784 Signed-off-by: Feng Yang Co-authored-by: Feng Yang Signed-off-by: Yi Yang Co-authored-by: Yi Yang Signed-off-by: William Tu Acked-by: Ben Pfaff --- Documentation/faq/configuration.rst | 13 ++ Documentation/faq/releases.rst | 1 + NEWS | 3 + .../linux/compat/include/linux/openvswitch.h | 2 + include/openvswitch/flow.h | 4 +- include/openvswitch/match.h | 6 + include/openvswitch/meta-flow.h | 28 +++ include/openvswitch/packets.h | 4 +- lib/dpif-netlink-rtnl.c | 5 + lib/dpif-netlink.c | 5 + lib/flow.c | 28 +-- lib/flow.h | 2 +- lib/match.c | 36 +++- lib/meta-flow.c | 38 ++++ lib/meta-flow.xml | 79 ++++++++- lib/netdev-native-tnl.c | 165 ++++++++++++++++-- lib/netdev-native-tnl.h | 13 ++ lib/netdev-vport.c | 25 ++- lib/nx-match.c | 8 +- lib/odp-util.c | 123 ++++++++++++- lib/odp-util.h | 2 +- lib/ofp-match.c | 2 +- lib/packets.h | 68 ++++++++ lib/tnl-ports.c | 3 + ofproto/ofproto-dpif-rid.h | 2 +- ofproto/ofproto-dpif-xlate.c | 3 +- tests/ofproto.at | 2 +- tests/tunnel-push-pop.at | 22 +++ tests/tunnel.at | 76 ++++++++ vswitchd/vswitch.xml | 24 +++ 30 files changed, 752 insertions(+), 40 deletions(-) diff --git a/Documentation/faq/configuration.rst b/Documentation/faq/configuration.rst index ff3b71a5d..4a98740c5 100644 --- a/Documentation/faq/configuration.rst +++ b/Documentation/faq/configuration.rst @@ -225,6 +225,19 @@ Q: Does Open vSwitch support IPv6 GRE? options:remote_ip=fc00:100::1 \ options:packet_type=legacy_l2 +Q: Does Open vSwitch support GTP-U? + + A: Yes. Starting with version 2.13, the Open vSwitch userspace + datapath supports GTP-U (GPRS Tunnelling Protocol User Plane + (GTPv1-U)). TEID is set by using tunnel key field. + + :: + + $ ovs-vsctl add-br br0 + $ ovs-vsctl add-port br0 gtpu0 -- \ + set int gtpu0 type=gtpu options:key= \ + options:remote_ip=172.31.1.1 + Q: How do I connect two bridges? A: First, why do you want to do this? Two connected bridges are not much diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 748540b91..b3507bd1c 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -131,6 +131,7 @@ Q: Are all features available with all datapaths? Tunnel - Geneve-IPv6 4.4 2.6 2.6 NO Tunnel - ERSPAN 4.18 2.10 2.10 NO Tunnel - ERSPAN-IPv6 4.18 2.10 2.10 NO + Tunnel - GTP-U NO NO 2.13 NO QoS - Policing YES 1.1 2.6 NO QoS - Shaping YES 1.1 NO NO sFlow YES 1.0 1.0 NO diff --git a/NEWS b/NEWS index 32ca2e0c6..70bd17584 100644 --- a/NEWS +++ b/NEWS @@ -57,6 +57,9 @@ v2.13.0 - 14 Feb 2020 - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and type filter supports new filters: "dpdk" and "partially-offloaded". + - GTP-U Tunnel Protocol + * Add two new fields: tun_gtpu_flags, tun_gtpu_msgtype. + * Only support for userspace datapath. v2.12.0 - 03 Sep 2019 --------------------- diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 2f0c6559e..f7c3b2e99 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -245,6 +245,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_ERSPAN = 107, /* ERSPAN tunnel. */ OVS_VPORT_TYPE_IP6ERSPAN = 108, /* ERSPAN tunnel. */ OVS_VPORT_TYPE_IP6GRE = 109, + OVS_VPORT_TYPE_GTPU = 110, __OVS_VPORT_TYPE_MAX }; @@ -404,6 +405,7 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ + OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, /* struct gtpu_metadata */ __OVS_TUNNEL_KEY_ATTR_MAX }; diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h index 57b6c925c..3054015d9 100644 --- a/include/openvswitch/flow.h +++ b/include/openvswitch/flow.h @@ -27,7 +27,7 @@ extern "C" { /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 41 +#define FLOW_WC_SEQ 42 /* Number of Open vSwitch extension 32-bit registers. */ #define FLOW_N_REGS 16 @@ -168,7 +168,7 @@ BUILD_ASSERT_DECL(sizeof(struct ovs_key_nsh) % sizeof(uint64_t) == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t) == sizeof(struct flow_tnl) + sizeof(struct ovs_key_nsh) + 300 - && FLOW_WC_SEQ == 41); + && FLOW_WC_SEQ == 42); /* Incremental points at which flow classification may be performed in * segments. diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h index eeabd5f47..8af3b74ed 100644 --- a/include/openvswitch/match.h +++ b/include/openvswitch/match.h @@ -121,6 +121,12 @@ void match_set_tun_erspan_dir_masked(struct match *match, uint8_t dir, void match_set_tun_erspan_hwid(struct match *match, uint8_t hwid); void match_set_tun_erspan_hwid_masked(struct match *match, uint8_t hwid, uint8_t mask); +void match_set_tun_gtpu_flags(struct match *match, uint8_t flags); +void match_set_tun_gtpu_flags_masked(struct match *match, uint8_t flags, + uint8_t mask); +void match_set_tun_gtpu_msgtype(struct match *match, uint8_t msgtype); +void match_set_tun_gtpu_msgtype_masked(struct match *match, uint8_t msgtype, + uint8_t mask); void match_set_in_port(struct match *, ofp_port_t ofp_port); void match_set_pkt_mark(struct match *, uint32_t pkt_mark); void match_set_pkt_mark_masked(struct match *, uint32_t pkt_mark, uint32_t mask); diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h index 1f81d830e..d529a9f0d 100644 --- a/include/openvswitch/meta-flow.h +++ b/include/openvswitch/meta-flow.h @@ -506,6 +506,34 @@ enum OVS_PACKED_ENUM mf_field_id { */ MFF_TUN_ERSPAN_HWID, + /* "tun_gtpu_flags". + * + * GTP-U tunnel flags. + * + * Type: u8. + * Maskable: bitwise. + * Formatting: hexadecimal. + * Prerequisites: none. + * Access: read-only. + * NXM: none. + * OXM: NXOXM_ET_GTPU_FLAGS(15) since v2.13. + */ + MFF_TUN_GTPU_FLAGS, + + /* "tun_gtpu_msgtype". + * + * GTP-U tunnel message type. + * + * Type: u8. + * Maskable: bitwise. + * Formatting: decimal. + * Prerequisites: none. + * Access: read-only. + * NXM: none. + * OXM: NXOXM_ET_GTPU_MSGTYPE(16) since v2.13. + */ + MFF_TUN_GTPU_MSGTYPE, + #if TUN_METADATA_NUM_OPTS == 64 /* "tun_metadata". * diff --git a/include/openvswitch/packets.h b/include/openvswitch/packets.h index 925844eda..a65cb0d04 100644 --- a/include/openvswitch/packets.h +++ b/include/openvswitch/packets.h @@ -43,7 +43,9 @@ struct flow_tnl { uint32_t erspan_idx; uint8_t erspan_dir; uint8_t erspan_hwid; - uint8_t pad1[6]; /* Pad to 64 bits. */ + uint8_t gtpu_flags; + uint8_t gtpu_msgtype; + uint8_t pad1[4]; /* Pad to 64 bits. */ struct tun_metadata metadata; }; diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c index 582274c46..fd157ce2d 100644 --- a/lib/dpif-netlink-rtnl.c +++ b/lib/dpif-netlink-rtnl.c @@ -111,6 +111,8 @@ vport_type_to_kind(enum ovs_vport_type type, } else { return NULL; } + case OVS_VPORT_TYPE_GTPU: + return NULL; case OVS_VPORT_TYPE_NETDEV: case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: @@ -277,6 +279,7 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg, case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: case OVS_VPORT_TYPE_STT: + case OVS_VPORT_TYPE_GTPU: case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: default: @@ -358,6 +361,7 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg, case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: case OVS_VPORT_TYPE_STT: + case OVS_VPORT_TYPE_GTPU: case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: default: @@ -471,6 +475,7 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type) case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: case OVS_VPORT_TYPE_STT: + case OVS_VPORT_TYPE_GTPU: case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: default: diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 6d9427b57..dc642100f 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -745,6 +745,9 @@ get_vport_type(const struct dpif_netlink_vport *vport) case OVS_VPORT_TYPE_IP6GRE: return "ip6gre"; + case OVS_VPORT_TYPE_GTPU: + return "gtpu"; + case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: break; @@ -778,6 +781,8 @@ netdev_to_ovs_vport_type(const char *type) return OVS_VPORT_TYPE_IP6GRE; } else if (!strcmp(type, "gre")) { return OVS_VPORT_TYPE_GRE; + } else if (!strcmp(type, "gtpu")) { + return OVS_VPORT_TYPE_GTPU; } else { return OVS_VPORT_TYPE_UNSPEC; } diff --git a/lib/flow.c b/lib/flow.c index 5c32b4a01..cc1b3f2db 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -129,7 +129,7 @@ struct mf_ctx { * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are * defined as macros. */ -#if (FLOW_WC_SEQ != 41) +#if (FLOW_WC_SEQ != 42) #define MINIFLOW_ASSERT(X) ovs_assert(X) BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " "assertions enabled. Consider updating FLOW_WC_SEQ after " @@ -731,7 +731,7 @@ void miniflow_extract(struct dp_packet *packet, struct miniflow *dst) { /* Add code to this function (or its callees) to extract new fields. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); const struct pkt_metadata *md = &packet->md; const void *data = dp_packet_data(packet); @@ -1187,7 +1187,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata) { int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); match_init_catchall(flow_metadata); if (flow->tunnel.tun_id != htonll(0)) { @@ -1227,6 +1227,12 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata) if (flow->tunnel.erspan_hwid) { match_set_tun_erspan_hwid(flow_metadata, flow->tunnel.erspan_hwid); } + if (flow->tunnel.gtpu_flags) { + match_set_tun_gtpu_flags(flow_metadata, flow->tunnel.gtpu_flags); + } + if (flow->tunnel.gtpu_msgtype) { + match_set_tun_gtpu_msgtype(flow_metadata, flow->tunnel.gtpu_msgtype); + } tun_metadata_get_fmd(&flow->tunnel, flow_metadata); if (flow->metadata != htonll(0)) { match_set_metadata(flow_metadata, flow->metadata); @@ -1767,7 +1773,7 @@ flow_wildcards_init_for_packet(struct flow_wildcards *wc, memset(&wc->masks, 0x0, sizeof wc->masks); /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); if (flow_tnl_dst_is_set(&flow->tunnel)) { if (flow->tunnel.flags & FLOW_TNL_F_KEY) { @@ -1788,6 +1794,8 @@ flow_wildcards_init_for_packet(struct flow_wildcards *wc, WC_MASK_FIELD(wc, tunnel.erspan_idx); WC_MASK_FIELD(wc, tunnel.erspan_dir); WC_MASK_FIELD(wc, tunnel.erspan_hwid); + WC_MASK_FIELD(wc, tunnel.gtpu_flags); + WC_MASK_FIELD(wc, tunnel.gtpu_msgtype); if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) { if (flow->tunnel.metadata.present.map) { @@ -1918,7 +1926,7 @@ void flow_wc_map(const struct flow *flow, struct flowmap *map) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); flowmap_init(map); @@ -2021,7 +2029,7 @@ void flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata); memset(&wc->masks.regs, 0, sizeof wc->masks.regs); @@ -2165,7 +2173,7 @@ flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx, uint32_t miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); uint32_t hash = basis; if (flow) { @@ -2212,7 +2220,7 @@ ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst); uint32_t flow_hash_5tuple(const struct flow *flow, uint32_t basis) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); uint32_t hash = basis; if (flow) { @@ -2890,7 +2898,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type, if (clear_flow_L3) { /* Clear all L3 and L4 fields and dp_hash. */ - BUILD_ASSERT(FLOW_WC_SEQ == 41); + BUILD_ASSERT(FLOW_WC_SEQ == 42); memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); flow->dp_hash = 0; @@ -3188,7 +3196,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow, /* Add code to this function (or its callees) for emitting new fields or * protocols. (This isn't essential, so it can be skipped for initial * testing.) */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); uint32_t pseudo_hdr_csum; size_t l4_len; diff --git a/lib/flow.h b/lib/flow.h index 75751763c..b32f0b277 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -964,7 +964,7 @@ static inline void pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow) { /* Update this function whenever struct flow changes. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); md->recirc_id = flow->recirc_id; md->dp_hash = flow->dp_hash; diff --git a/lib/match.c b/lib/match.c index 0d1ec31ef..25c277cc6 100644 --- a/lib/match.c +++ b/lib/match.c @@ -374,6 +374,34 @@ match_set_tun_erspan_hwid(struct match *match, uint8_t hwid) match_set_tun_erspan_hwid_masked(match, hwid, UINT8_MAX); } +void +match_set_tun_gtpu_flags_masked(struct match *match, uint8_t flags, + uint8_t mask) +{ + match->wc.masks.tunnel.gtpu_flags = flags; + match->flow.tunnel.gtpu_flags = flags & mask; +} + +void +match_set_tun_gtpu_flags(struct match *match, uint8_t flags) +{ + match_set_tun_gtpu_flags_masked(match, flags, UINT8_MAX); +} + +void +match_set_tun_gtpu_msgtype_masked(struct match *match, uint8_t msgtype, + uint8_t mask) +{ + match->wc.masks.tunnel.gtpu_msgtype = msgtype; + match->flow.tunnel.gtpu_msgtype = msgtype & mask; +} + +void +match_set_tun_gtpu_msgtype(struct match *match, uint8_t msgtype) +{ + match_set_tun_gtpu_msgtype_masked(match, msgtype, UINT8_MAX); +} + void match_set_in_port(struct match *match, ofp_port_t ofp_port) { @@ -1325,6 +1353,12 @@ format_flow_tunnel(struct ds *s, const struct match *match) if (wc->masks.tunnel.erspan_hwid && tnl->erspan_ver == 2) { ds_put_format(s, "tun_erspan_hwid=%#"PRIx8",", tnl->erspan_hwid); } + if (wc->masks.tunnel.gtpu_flags) { + ds_put_format(s, "gtpu_flags=%#"PRIx8",", tnl->gtpu_flags); + } + if (wc->masks.tunnel.gtpu_msgtype) { + ds_put_format(s, "gtpu_msgtype=%"PRIu8",", tnl->gtpu_msgtype); + } if (wc->masks.tunnel.flags & FLOW_TNL_F_MASK) { format_flags_masked(s, "tun_flags", flow_tun_flag_to_string, tnl->flags & FLOW_TNL_F_MASK, @@ -1396,7 +1430,7 @@ match_format(const struct match *match, bool is_megaflow = false; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "%spriority=%s%d,", diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 8b62e6d96..9ab82460b 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -391,6 +391,10 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) case MFF_NSH_C3: case MFF_NSH_C4: return !wc->masks.nsh.context[mf->id - MFF_NSH_C1]; + case MFF_TUN_GTPU_FLAGS: + return !wc->masks.tunnel.gtpu_flags; + case MFF_TUN_GTPU_MSGTYPE: + return !wc->masks.tunnel.gtpu_msgtype; case MFF_N_IDS: default: @@ -530,6 +534,8 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value) case MFF_TUN_ERSPAN_VER: case MFF_TUN_ERSPAN_DIR: case MFF_TUN_ERSPAN_HWID: + case MFF_TUN_GTPU_FLAGS: + case MFF_TUN_GTPU_MSGTYPE: CASE_MFF_TUN_METADATA: case MFF_METADATA: case MFF_IN_PORT: @@ -711,6 +717,12 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, case MFF_TUN_ERSPAN_HWID: value->u8 = flow->tunnel.erspan_hwid; break; + case MFF_TUN_GTPU_FLAGS: + value->u8 = flow->tunnel.gtpu_flags; + break; + case MFF_TUN_GTPU_MSGTYPE: + value->u8 = flow->tunnel.gtpu_msgtype; + break; CASE_MFF_TUN_METADATA: tun_metadata_read(&flow->tunnel, mf, value); break; @@ -1042,6 +1054,12 @@ mf_set_value(const struct mf_field *mf, case MFF_TUN_ERSPAN_HWID: match_set_tun_erspan_hwid(match, value->u8); break; + case MFF_TUN_GTPU_FLAGS: + match_set_tun_gtpu_flags(match, value->u8); + break; + case MFF_TUN_GTPU_MSGTYPE: + match_set_tun_gtpu_msgtype(match, value->u8); + break; CASE_MFF_TUN_METADATA: tun_metadata_set_match(mf, value, NULL, match, err_str); break; @@ -1459,6 +1477,12 @@ mf_set_flow_value(const struct mf_field *mf, case MFF_TUN_ERSPAN_HWID: flow->tunnel.erspan_hwid = value->u8; break; + case MFF_TUN_GTPU_FLAGS: + flow->tunnel.gtpu_flags = value->u8; + break; + case MFF_TUN_GTPU_MSGTYPE: + flow->tunnel.gtpu_msgtype = value->u8; + break; CASE_MFF_TUN_METADATA: tun_metadata_write(&flow->tunnel, mf, value); break; @@ -1780,6 +1804,8 @@ mf_is_pipeline_field(const struct mf_field *mf) case MFF_TUN_ERSPAN_IDX: case MFF_TUN_ERSPAN_DIR: case MFF_TUN_ERSPAN_HWID: + case MFF_TUN_GTPU_FLAGS: + case MFF_TUN_GTPU_MSGTYPE: CASE_MFF_TUN_METADATA: case MFF_METADATA: case MFF_IN_PORT: @@ -1970,6 +1996,12 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str) case MFF_TUN_ERSPAN_HWID: match_set_tun_erspan_hwid_masked(match, 0, 0); break; + case MFF_TUN_GTPU_FLAGS: + match_set_tun_gtpu_flags_masked(match, 0, 0); + break; + case MFF_TUN_GTPU_MSGTYPE: + match_set_tun_gtpu_msgtype_masked(match, 0, 0); + break; CASE_MFF_TUN_METADATA: tun_metadata_set_match(mf, NULL, NULL, match, err_str); break; @@ -2377,6 +2409,12 @@ mf_set(const struct mf_field *mf, case MFF_TUN_ERSPAN_HWID: match_set_tun_erspan_hwid_masked(match, value->u8, mask->u8); break; + case MFF_TUN_GTPU_FLAGS: + match_set_tun_gtpu_flags_masked(match, value->u8, mask->u8); + break; + case MFF_TUN_GTPU_MSGTYPE: + match_set_tun_gtpu_msgtype_masked(match, value->u8, mask->u8); + break; CASE_MFF_TUN_METADATA: tun_metadata_set_match(mf, value, mask, match, err_str); break; diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index 2f9c5ee16..d4495552b 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -1456,7 +1456,8 @@ ovs-ofctl add-flow br-int 'in_port=3,tun_src=192.168.1.1,tun_id=5001 actions=1'
  • LISP has a 24-bit instance ID.
  • GRE has an optional 32-bit key.
  • STT has a 64-bit key.
  • -
  • ERSPAN has a 10-bit key (Session ID).
  • +
  • ERSPAN has a 10-bit key (Session ID).
  • +
  • GTPU has a 32-bit key (Tunnel Endpoint ID).
  • @@ -1797,6 +1798,82 @@ ovs-ofctl add-flow br-int 'in_port=3,tun_src=192.168.1.1,tun_id=5001 actions=1' A 6-bit unique identifier of an ERSPAN v2 engine within a system. +

    GTP-U Metadata Fields

    + +

    + These fields provide access to set-up GPRS Tunnelling Protocol + for User Plane (GTPv1-U), based on 3GPP TS 29.281. A GTP-U + header has the following format: +

    + + +
    + + + + +
    + +
    + +

    + The flags and message type have the Open vSwitch GTP-U specific fields + described below. Open vSwitch makes the TEID (Tunnel Endpoint + Identifier), which identifies a tunnel endpoint in the receiving GTP-U + protocol entity, available via . +

    + + +

    + This field holds the 8-bit GTP-U flags, encoded as: +

    + + +
    + + + + + + +
    +
    + +

    + The flags are: +

    +
    +
    version
    +
    Used to determine the version of the GTP-U protocol, which should + be set to 1.
    + +
    PT
    +
    Protocol type, used as a protocol discriminator + between GTP (1) and GTP' (0).
    + +
    rsv
    +
    Reserved. Must be zero.
    + +
    E
    +
    If 1, indicates the presence of a meaningful value of the Next + Extension Header field.
    + +
    S
    +
    If 1, indicates the presence of a meaningful value of the Sequence + Number field.
    + +
    PN
    +
    If 1, indicates the presence of a meaningful value of the N-PDU + Number field.
    +
    +
    + + + This field indicates whether it's a signalling message used for path + management, or a user plane message which carries the original packet. + The complete range of message types can be referred to [3GPP TS 29.281]. + +

    Geneve Fields

    diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index a78972888..0acc87953 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -55,6 +55,9 @@ static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5); #define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \ sizeof(struct genevehdr)) +#define GTPU_HLEN (sizeof(struct udp_header) + \ + sizeof(struct gtpuhdr)) + uint16_t tnl_udp_port_min = 32768; uint16_t tnl_udp_port_max = 61000; @@ -213,6 +216,27 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl, return udp + 1; } +static void +netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet, + int ip_tot_size) +{ + uint32_t csum; + + if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { + csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr( + dp_packet_data(packet))); + } else { + csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr( + dp_packet_data(packet))); + } + + csum = csum_continue(csum, udp, ip_tot_size); + udp->udp_csum = csum_finish(csum); + + if (!udp->udp_csum) { + udp->udp_csum = htons(0xffff); + } +} void netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, @@ -229,19 +253,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED, udp->udp_len = htons(ip_tot_size); if (udp->udp_csum) { - uint32_t csum; - if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) { - csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet))); - } else { - csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet))); - } - - csum = csum_continue(csum, udp, ip_tot_size); - udp->udp_csum = csum_finish(csum); - - if (!udp->udp_csum) { - udp->udp_csum = htons(0xffff); - } + netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); } } @@ -707,6 +719,133 @@ netdev_erspan_build_header(const struct netdev *netdev, return 0; } +struct dp_packet * +netdev_gtpu_pop_header(struct dp_packet *packet) +{ + struct pkt_metadata *md = &packet->md; + struct flow_tnl *tnl = &md->tunnel; + struct gtpuhdr *gtph; + unsigned int gtpu_hlen; + unsigned int hlen; + + ovs_assert(packet->l3_ofs > 0); + ovs_assert(packet->l4_ofs > 0); + + pkt_metadata_init_tnl(md); + if (GTPU_HLEN > dp_packet_l4_size(packet)) { + goto err; + } + + gtph = udp_extract_tnl_md(packet, tnl, &hlen); + if (!gtph) { + goto err; + } + + tnl->gtpu_flags = gtph->md.flags; + tnl->gtpu_msgtype = gtph->md.msgtype; + tnl->tun_id = be32_to_be64(get_16aligned_be32(>ph->teid)); + + if (tnl->gtpu_msgtype == GTPU_MSGTYPE_GPDU) { + struct ip_header *ip; + + if (gtph->md.flags & GTPU_S_MASK) { + gtpu_hlen = GTPU_HLEN + sizeof(struct gtpuhdr_opt); + } else { + gtpu_hlen = GTPU_HLEN; + } + ip = ALIGNED_CAST(struct ip_header *, (char *)gtph + gtpu_hlen); + + if (IP_VER(ip->ip_ihl_ver) == 4) { + packet->packet_type = htonl(PT_IPV4); + } else if (IP_VER(ip->ip_ihl_ver) == 6) { + packet->packet_type = htonl(PT_IPV6); + } else { + VLOG_WARN_RL(&err_rl, "GTP-U: Receive non-IP packet."); + } + dp_packet_reset_packet(packet, hlen + gtpu_hlen); + } else { + /* non-GPDU GTP-U messages, ex: echo request, end marker. + * Users should redirect these packets to controller, or. + * any application that handles GTP-U messages, so keep + * the original packet. + */ + packet->packet_type = htonl(PT_ETH); + VLOG_WARN_ONCE("Receive non-GPDU msgtype: %"PRIu8, + gtph->md.msgtype); + } + + return packet; + +err: + dp_packet_delete(packet); + return NULL; +} + +void +netdev_gtpu_push_header(const struct netdev *netdev, + struct dp_packet *packet, + const struct ovs_action_push_tnl *data) +{ + struct netdev_vport *dev = netdev_vport_cast(netdev); + struct netdev_tunnel_config *tnl_cfg; + struct udp_header *udp; + struct gtpuhdr *gtpuh; + int ip_tot_size; + unsigned int payload_len; + + payload_len = dp_packet_size(packet); + udp = netdev_tnl_push_ip_header(packet, data->header, + data->header_len, &ip_tot_size); + udp->udp_src = netdev_tnl_get_src_port(packet); + udp->udp_len = htons(ip_tot_size); + netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size); + + gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1); + + tnl_cfg = &dev->tnl_cfg; + if (tnl_cfg->set_seq) { + ovs_be16 *seqno = ALIGNED_CAST(ovs_be16 *, gtpuh + 1); + *seqno = htons(tnl_cfg->seqno++); + payload_len += sizeof(struct gtpuhdr_opt); + } + gtpuh->len = htons(payload_len); +} + +int +netdev_gtpu_build_header(const struct netdev *netdev, + struct ovs_action_push_tnl *data, + const struct netdev_tnl_build_header_params *params) +{ + struct netdev_vport *dev = netdev_vport_cast(netdev); + struct netdev_tunnel_config *tnl_cfg; + struct gtpuhdr *gtph; + unsigned int gtpu_hlen; + + ovs_mutex_lock(&dev->mutex); + tnl_cfg = &dev->tnl_cfg; + gtph = udp_build_header(tnl_cfg, data, params); + + /* Set to default if not set in flow. */ + gtph->md.flags = params->flow->tunnel.gtpu_flags ? + params->flow->tunnel.gtpu_flags : GTPU_FLAGS_DEFAULT; + gtph->md.msgtype = params->flow->tunnel.gtpu_msgtype ? + params->flow->tunnel.gtpu_msgtype : GTPU_MSGTYPE_GPDU; + put_16aligned_be32(>ph->teid, + be64_to_be32(params->flow->tunnel.tun_id)); + + gtpu_hlen = sizeof *gtph; + if (tnl_cfg->set_seq) { + gtph->md.flags |= GTPU_S_MASK; + gtpu_hlen += sizeof(struct gtpuhdr_opt); + } + ovs_mutex_unlock(&dev->mutex); + + data->header_len += gtpu_hlen; + data->tnl_type = OVS_VPORT_TYPE_GTPU; + + return 0; +} + struct dp_packet * netdev_vxlan_pop_header(struct dp_packet *packet) { diff --git a/lib/netdev-native-tnl.h b/lib/netdev-native-tnl.h index 5dc00122d..22ae2ce53 100644 --- a/lib/netdev-native-tnl.h +++ b/lib/netdev-native-tnl.h @@ -52,6 +52,19 @@ netdev_erspan_push_header(const struct netdev *netdev, struct dp_packet * netdev_erspan_pop_header(struct dp_packet *packet); +struct dp_packet * +netdev_gtpu_pop_header(struct dp_packet *packet); + +void +netdev_gtpu_push_header(const struct netdev *netdev, + struct dp_packet *packet, + const struct ovs_action_push_tnl *data); + +int +netdev_gtpu_build_header(const struct netdev *netdev, + struct ovs_action_push_tnl *data, + const struct netdev_tnl_build_header_params *p); + void netdev_tnl_push_udp_header(const struct netdev *netdev, struct dp_packet *packet, diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index b57d21ff8..8efd1eee8 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -111,7 +111,8 @@ netdev_vport_needs_dst_port(const struct netdev *dev) return (class->get_config == get_tunnel_config && (!strcmp("geneve", type) || !strcmp("vxlan", type) || - !strcmp("lisp", type) || !strcmp("stt", type)) ); + !strcmp("lisp", type) || !strcmp("stt", type) || + !strcmp("gtpu", type))); } const char * @@ -216,6 +217,8 @@ netdev_vport_construct(struct netdev *netdev_) dev->tnl_cfg.dst_port = port ? htons(port) : htons(LISP_DST_PORT); } else if (!strcmp(type, "stt")) { dev->tnl_cfg.dst_port = port ? htons(port) : htons(STT_DST_PORT); + } else if (!strcmp(type, "gtpu")) { + dev->tnl_cfg.dst_port = port ? htons(port) : htons(GTPU_DST_PORT); } dev->tnl_cfg.dont_fragment = true; @@ -433,6 +436,8 @@ tunnel_supported_layers(const char *type, } else if (!strcmp(type, "vxlan") && tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) { return TNL_L2 | TNL_L3; + } else if (!strcmp(type, "gtpu")) { + return TNL_L3; } else { return TNL_L2; } @@ -589,6 +594,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) tnl_cfg.dst_port = htons(STT_DST_PORT); } + if (!strcmp(type, "gtpu")) { + tnl_cfg.dst_port = htons(GTPU_DST_PORT); + } + needs_dst_port = netdev_vport_needs_dst_port(dev_); tnl_cfg.dont_fragment = true; @@ -907,7 +916,8 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) || (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || (!strcmp("lisp", type) && dst_port != LISP_DST_PORT) || - (!strcmp("stt", type) && dst_port != STT_DST_PORT)) { + (!strcmp("stt", type) && dst_port != STT_DST_PORT) || + (!strcmp("gtpu", type) && dst_port != GTPU_DST_PORT)) { smap_add_format(args, "dst_port", "%d", dst_port); } } @@ -1223,6 +1233,17 @@ netdev_vport_tunnel_register(void) }, {{NULL, NULL, 0, 0}} }, + { "gtpu_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "gtpu", + .build_header = netdev_gtpu_build_header, + .push_header = netdev_gtpu_push_header, + .pop_header = netdev_gtpu_pop_header, + }, + {{NULL, NULL, 0, 0}} + }, + }; static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; diff --git a/lib/nx-match.c b/lib/nx-match.c index 0432ad4de..058816c7b 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -1051,7 +1051,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, ovs_be32 spi_mask; int match_len; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); struct nxm_put_ctx ctx = { .output = b, .implied_ethernet = false }; @@ -1191,6 +1191,12 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, nxm_put_8m(&ctx, MFF_TUN_ERSPAN_HWID, oxm, flow->tunnel.erspan_hwid, match->wc.masks.tunnel.erspan_hwid); + /* GTP-U */ + nxm_put_8m(&ctx, MFF_TUN_GTPU_FLAGS, oxm, flow->tunnel.gtpu_flags, + match->wc.masks.tunnel.gtpu_flags); + nxm_put_8m(&ctx, MFF_TUN_GTPU_MSGTYPE, oxm, flow->tunnel.gtpu_msgtype, + match->wc.masks.tunnel.gtpu_msgtype); + /* Network Service Header */ nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags, match->wc.masks.nsh.flags); diff --git a/lib/odp-util.c b/lib/odp-util.c index 746d1e97d..b66d266cc 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -756,7 +756,17 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data) } else { VLOG_WARN("%s Invalid ERSPAN version %d\n", __func__, ersh->ver); } + } else if (data->tnl_type == OVS_VPORT_TYPE_GTPU) { + const struct gtpuhdr *gtph; + + gtph = format_udp_tnl_push_header(ds, udp); + + ds_put_format(ds, "gtpu(flags=0x%"PRIx8 + ",msgtype=%"PRIu8",teid=0x%"PRIx32")", + gtph->md.flags, gtph->md.msgtype, + ntohl(get_16aligned_be32(>ph->teid))); } + ds_put_format(ds, ")"); } @@ -1500,6 +1510,8 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) void *l3, *l4; int n = 0; uint8_t hwid, dir; + uint32_t teid; + uint8_t gtpu_flags, gtpu_msgtype; if (!ovs_scan_len(s, &n, "tnl_push(tnl_port(%"SCNi32"),", &data->tnl_port)) { return -EINVAL; @@ -1729,6 +1741,18 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data) header_len = sizeof *eth + ip_len + ERSPAN_GREHDR_LEN + sizeof *ersh + ERSPAN_V2_MDSIZE; + + } else if (ovs_scan_len(s, &n, "gtpu(flags=%"SCNi8",msgtype=%" + SCNu8",teid=0x%"SCNx32"))", + >pu_flags, >pu_msgtype, &teid)) { + struct gtpuhdr *gtph = (struct gtpuhdr *) (udp + 1); + + gtph->md.flags = gtpu_flags; + gtph->md.msgtype = gtpu_msgtype; + put_16aligned_be32(>ph->teid, htonl(teid)); + tnl_type = OVS_VPORT_TYPE_GTPU; + header_len = sizeof *eth + ip_len + + sizeof *udp + sizeof *gtph; } else { return -EINVAL; } @@ -2630,6 +2654,7 @@ static const struct attr_len_tbl ovs_tun_key_attr_lens[OVS_TUNNEL_KEY_ATTR_MAX + [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = 16 }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = 16 }, [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = ATTR_LEN_VARIABLE }, + [OVS_TUNNEL_KEY_ATTR_GTPU_OPTS] = { .len = ATTR_LEN_VARIABLE }, }; const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = { @@ -3035,6 +3060,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr, bool is_mask, } break; } + case OVS_TUNNEL_KEY_ATTR_GTPU_OPTS: { + const struct gtpu_metadata *opts = nl_attr_get(a); + + tun->gtpu_flags = opts->flags; + tun->gtpu_msgtype = opts->msgtype; + break; + } default: /* Allow this to show up as unexpected, if there are unknown @@ -3149,6 +3181,15 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key, &opts, sizeof(opts)); } + if ((!tnl_type || !strcmp(tnl_type, "gtpu")) && + (tun_key->gtpu_flags && tun_key->gtpu_msgtype)) { + struct gtpu_metadata opts; + + opts.flags = tun_key->gtpu_flags; + opts.msgtype = tun_key->gtpu_msgtype; + nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, + &opts, sizeof(opts)); + } nl_msg_end_nested(a, tun_key_ofs); } @@ -3645,6 +3686,22 @@ format_odp_tun_erspan_opt(const struct nlattr *attr, ds_chomp(ds, ','); } +static void +format_odp_tun_gtpu_opt(const struct nlattr *attr, + const struct nlattr *mask_attr, struct ds *ds, + bool verbose) +{ + const struct gtpu_metadata *opts, *mask; + + opts = nl_attr_get(attr); + mask = mask_attr ? nl_attr_get(mask_attr) : NULL; + + format_u8x(ds, "flags", opts->flags, mask ? &mask->flags : NULL, verbose); + format_u8u(ds, "msgtype", opts->msgtype, mask ? &mask->msgtype : NULL, + verbose); + ds_chomp(ds, ','); +} + #define MASK(PTR, FIELD) PTR ? &PTR->FIELD : NULL static void @@ -3897,6 +3954,11 @@ format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr, format_odp_tun_erspan_opt(a, ma, ds, verbose); ds_put_cstr(ds, "),"); break; + case OVS_TUNNEL_KEY_ATTR_GTPU_OPTS: + ds_put_cstr(ds, "gtpu("); + format_odp_tun_gtpu_opt(a, ma, ds, verbose); + ds_put_cstr(ds, ")"); + break; case __OVS_TUNNEL_KEY_ATTR_MAX: default: format_unknown_key(ds, a, ma); @@ -5104,6 +5166,50 @@ scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask) return 0; } +static int +scan_gtpu_metadata(const char *s, + struct gtpu_metadata *key, + struct gtpu_metadata *mask) +{ + const char *s_base = s; + uint8_t flags, flags_ma; + uint8_t msgtype, msgtype_ma; + int len; + + if (!strncmp(s, "flags=", 6)) { + s += 6; + len = scan_u8(s, &flags, mask ? &flags_ma : NULL); + if (len == 0) { + return 0; + } + s += len; + } + + if (s[0] == ',') { + s++; + } + + if (!strncmp(s, "msgtype=", 8)) { + s += 8; + len = scan_u8(s, &msgtype, mask ? &msgtype_ma : NULL); + if (len == 0) { + return 0; + } + s += len; + } + + if (!strncmp(s, ")", 1)) { + s += 1; + key->flags = flags; + key->msgtype = msgtype; + if (mask) { + mask->flags = flags_ma; + mask->msgtype = msgtype_ma; + } + } + return s - s_base; +} + static int scan_erspan_metadata(const char *s, struct erspan_metadata *key, @@ -5344,6 +5450,15 @@ erspan_to_attr(struct ofpbuf *a, const void *data_) sizeof *md); } +static void +gtpu_to_attr(struct ofpbuf *a, const void *data_) +{ + const struct gtpu_metadata *md = data_; + + nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, md, + sizeof *md); +} + #define SCAN_PUT_ATTR(BUF, ATTR, DATA, FUNC) \ { \ unsigned long call_fn = (unsigned long)FUNC; \ @@ -5730,6 +5845,8 @@ parse_odp_key_mask_attr__(struct parse_odp_context *context, const char *s, SCAN_FIELD_NESTED_FUNC("vxlan(gbp(", uint32_t, vxlan_gbp, vxlan_gbp_to_attr); SCAN_FIELD_NESTED_FUNC("geneve(", struct geneve_scan, geneve, geneve_to_attr); + SCAN_FIELD_NESTED_FUNC("gtpu(", struct gtpu_metadata, gtpu_metadata, + gtpu_to_attr); SCAN_FIELD_NESTED_FUNC("flags(", uint16_t, tun_flags, tun_flags_to_attr); } SCAN_END_NESTED(); @@ -5997,7 +6114,7 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, /* New "struct flow" fields that are visible to the datapath (including all * data fields) should be translated into equivalent datapath flow fields * here (you will have to add a OVS_KEY_ATTR_* for them). */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); struct ovs_key_ethernet *eth_key; size_t encap[FLOW_MAX_VLAN_HEADERS] = {0}; @@ -7096,7 +7213,7 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, /* New "struct flow" fields that are visible to the datapath (including all * data fields) should be translated from equivalent datapath flow fields * here (you will have to add a OVS_KEY_ATTR_* for them). */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); enum odp_key_fitness fitness = ODP_FIT_ERROR; if (errorp) { @@ -8445,7 +8562,7 @@ commit_odp_actions(const struct flow *flow, struct flow *base, /* If you add a field that OpenFlow actions can change, and that is visible * to the datapath (including all data fields), then you should also add * code here to commit changes to the field. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); enum slow_path_reason slow1, slow2; bool mpls_done = false; diff --git a/lib/odp-util.h b/lib/odp-util.h index 4ecce1aac..623a66aa2 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -147,7 +147,7 @@ void odp_portno_name_format(const struct hmap *portno_names, * add another field and forget to adjust this value. */ #define ODPUTIL_FLOW_KEY_BYTES 640 -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently diff --git a/lib/ofp-match.c b/lib/ofp-match.c index 2ec28f803..86a082dde 100644 --- a/lib/ofp-match.c +++ b/lib/ofp-match.c @@ -65,7 +65,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); diff --git a/lib/packets.h b/lib/packets.h index 4c1e91dee..447e6f6fa 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1447,6 +1447,74 @@ static inline ovs_be32 get_erspan_ts(enum erspan_ts_gra gra) return ts; } +/* + * GTP-U protocol header and metadata + * See: + * User Plane Protocol and Architectural Analysis on 3GPP 5G System + * draft-hmm-dmm-5g-uplane-analysis-00 + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Ver |P|R|E|S|N| Message Type| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Tunnel Endpoint Identifier | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Sequence Number | N-PDU Number | Next-Ext-Hdr | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * GTP-U Flags: + * P: Protocol Type (Set to '1') + * R: Reserved Bit (Set to '0') + * E: Extension Header Flag (Set to '1' if extension header exists) + * S: Sequence Number Flag (Set to '1' if sequence number exists) + * N: N-PDU Number Flag (Set to '1' if N-PDU number exists) + * + * GTP-U Message Type: + * Indicates the type of GTP-U message. + * + * GTP-U Length: + * Indicates the length in octets of the payload. + * + * User payload is transmitted in G-PDU packets. + */ + +#define GTPU_VER_MASK 0xe0 +#define GTPU_P_MASK 0x10 +#define GTPU_E_MASK 0x04 +#define GTPU_S_MASK 0x02 + +/* GTP-U UDP port. */ +#define GTPU_DST_PORT 2152 + +/* Default GTP-U flags: Ver = 1 and P = 1. */ +#define GTPU_FLAGS_DEFAULT 0x30 + +/* GTP-U message type for normal user plane PDU. */ +#define GTPU_MSGTYPE_REQ 1 /* Echo Request. */ +#define GTPU_MSGTYPE_REPL 2 /* Echo Reply. */ +#define GTPU_MSGTYPE_GPDU 255 /* User Payload. */ + +struct gtpu_metadata { + uint8_t flags; + uint8_t msgtype; +}; +BUILD_ASSERT_DECL(sizeof(struct gtpu_metadata) == 2); + +struct gtpuhdr { + struct gtpu_metadata md; + ovs_be16 len; + ovs_16aligned_be32 teid; +}; +BUILD_ASSERT_DECL(sizeof(struct gtpuhdr) == 8); + +struct gtpuhdr_opt { + ovs_be16 seqno; + uint8_t pdu_number; + uint8_t next_ext_type; +}; +BUILD_ASSERT_DECL(sizeof(struct gtpuhdr_opt) == 4); + /* VXLAN protocol header */ struct vxlanhdr { union { diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c index 17353046c..446b40763 100644 --- a/lib/tnl-ports.c +++ b/lib/tnl-ports.c @@ -178,6 +178,9 @@ tnl_type_to_nw_proto(const char type[]) if (!strcmp(type, "vxlan")) { return IPPROTO_UDP; } + if (!strcmp(type, "gtpu")) { + return IPPROTO_UDP; + } return 0; } diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h index 147ef9c33..e5d02caf2 100644 --- a/ofproto/ofproto-dpif-rid.h +++ b/ofproto/ofproto-dpif-rid.h @@ -99,7 +99,7 @@ struct rule; /* Metadata for restoring pipeline context after recirculation. Helpers * are inlined below to keep them together with the definition for easier * updates. */ -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); struct frozen_metadata { /* Metadata in struct flow. */ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 28dcc67dd..042c50a63 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3572,6 +3572,7 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac, break; case OVS_VPORT_TYPE_VXLAN: case OVS_VPORT_TYPE_GENEVE: + case OVS_VPORT_TYPE_GTPU: nw_proto = IPPROTO_UDP; break; case OVS_VPORT_TYPE_LISP: @@ -4123,7 +4124,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42); memset(&flow_tnl, 0, sizeof flow_tnl); if (!check_output_prerequisites(ctx, xport, flow, check_stp)) { diff --git a/tests/ofproto.at b/tests/ofproto.at index 23a5e1505..76a3be44d 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -2352,7 +2352,7 @@ head_table () { actions: output group set_field strip_vlan push_vlan mod_nw_ttl dec_ttl set_mpls_ttl dec_mpls_ttl push_mpls pop_mpls set_queue supported on Set-Field: tun_{id,src,dst,ipv6_{src,dst},flags,gbp_{id,flags},erspan_{idx,ver,dir,hwid},metadata0...metadata63} metadata in_{port,port_oxm} pkt_mark ct_{mark,label} reg0...reg15 xreg0...xreg7 xxreg0...xxreg3 eth_{src,dst} vlan_{tci,vid,pcp} mpls_{label,tc,ttl} ip_{src,dst} ipv6_{src,dst,label} nw_tos ip_dscp nw_{ecn,ttl} arp_{op,spa,tpa,sha,tha} tcp_{src,dst} udp_{src,dst} sctp_{src,dst} icmp_{type,code} icmpv6_{type,code} nd_{target,sll,tll,reserved,options_type} nsh_{flags,spi,si,c1...c4,ttl} matching: - arbitrary mask: dp_hash tun_{id,src,dst,ipv6_{src,dst},flags,gbp_{id,flags},erspan_{idx,ver,dir,hwid},metadata0...metadata63} metadata pkt_mark ct_{state,mark,label,nw_{src,dst},ipv6_{src,dst},tp_{src,dst}} reg0...reg15 xreg0...xreg7 xxreg0...xxreg3 eth_{src,dst} vlan_{tci,vid} ip_{src,dst} ipv6_{src,dst,label} ip_frag arp_{spa,tpa,sha,tha} tcp_{src,dst,flags} udp_{src,dst} sctp_{src,dst} nd_{target,sll,tll} nsh_{flags,c1...c4} + arbitrary mask: dp_hash tun_{id,src,dst,ipv6_{src,dst},flags,gbp_{id,flags},erspan_{idx,ver,dir,hwid},gtpu_{flags,msgtype},metadata0...metadata63} metadata pkt_mark ct_{state,mark,label,nw_{src,dst},ipv6_{src,dst},tp_{src,dst}} reg0...reg15 xreg0...xreg7 xxreg0...xxreg3 eth_{src,dst} vlan_{tci,vid} ip_{src,dst} ipv6_{src,dst,label} ip_frag arp_{spa,tpa,sha,tha} tcp_{src,dst,flags} udp_{src,dst} sctp_{src,dst} nd_{target,sll,tll} nsh_{flags,c1...c4} exact match or wildcard: recirc_id packet_type conj_id in_{port,port_oxm} actset_output ct_{zone,nw_proto} eth_type vlan_pcp mpls_{label,tc,bos,ttl} nw_{proto,tos} ip_dscp nw_{ecn,ttl} arp_op icmp_{type,code} icmpv6_{type,code} nd_{reserved,options_type} nsh_{mdtype,np,spi,si,ttl} ' "$1" diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at index b92c23fde..48c5de9d1 100644 --- a/tests/tunnel-push-pop.at +++ b/tests/tunnel-push-pop.at @@ -216,6 +216,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=vxlan \ options:remote_ip=1.1.2.92 options:key=456 options:packet_type=legacy_l3 ofport_request=7\ -- add-port int-br t7 -- set Interface t7 type=vxlan \ options:remote_ip=1.1.2.92 options:key=345 options:exts=gpe ofport_request=8\ + -- add-port int-br t8 -- set Interface t8 type=gtpu \ + options:remote_ip=1.1.2.92 options:key=123 ofport_request=9\ ], [0]) AT_CHECK([ovs-appctl dpif/show], [0], [dnl @@ -232,6 +234,7 @@ dummy@ovs-dummy: hit:0 missed:0 t5 6/6081: (geneve: egress_pkt_mark=1234, out_key=flow, remote_ip=1.1.2.93) t6 7/3: (gre: key=456, packet_type=legacy_l3, remote_ip=1.1.2.92) t7 8/4789: (vxlan: key=345, remote_ip=1.1.2.92) + t8 9/2152: (gtpu: key=123, remote_ip=1.1.2.92) ]) dnl First setup dummy interface IP address, then add the route @@ -342,6 +345,7 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl Listening ports: genev_sys_6081 (6081) ref_cnt=2 gre_sys (3) ref_cnt=2 +gtpu_sys_2152 (2152) ref_cnt=1 vxlan_sys_4789 (4789) ref_cnt=3 ]) @@ -369,6 +373,13 @@ AT_CHECK([tail -1 stdout], [0], [Datapath actions: tnl_pop(6081) ]) +dnl Check GTP-U tunnel pop +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=51283,dst=2152)'], +[0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: tnl_pop(2152) +]) + dnl Check VXLAN tunnel push AT_CHECK([ovs-ofctl add-flow int-br action=2]) AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout]) @@ -426,6 +437,15 @@ AT_CHECK([tail -1 stdout], [0], [Datapath actions: clone(tnl_push(tnl_port(6081),header(size=58,type=5,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=6081,csum=0x0),geneve(crit,vni=0x7b,options({class=0xffff,type=0x80,len=4,0xa}))),out_port(100)),1) ]) +dnl Check GTP-U tunnel push +AT_CHECK([ovs-ofctl add-flow int-br "actions=9"]) +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], +[0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: pop_eth,clone(tnl_push(tnl_port(2152),header(size=50,type=110,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=2152,csum=0x0),gtpu(flags=0x30,msgtype=255,teid=0x7b)),out_port(100)),1) +]) +AT_CHECK([ovs-ofctl del-flows int-br]) + dnl Check decapsulation of GRE packet AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820006558000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820006558000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637']) @@ -515,6 +535,7 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl Listening ports: genev_sys_6081 (6081) ref_cnt=1 gre_sys (3) ref_cnt=1 +gtpu_sys_2152 (2152) ref_cnt=1 vxlan_sys_4789 (4789) ref_cnt=2 vxlan_sys_4790 (4790) ref_cnt=1 ]) @@ -524,6 +545,7 @@ AT_CHECK([ovs-vsctl del-port int-br t1 \ -- del-port int-br t4 \ -- del-port int-br t6 \ -- del-port int-br t7 \ + -- del-port int-br t8 \ ], [0]) dnl Check tunnel lookup entries after deleting all remaining tunnel ports diff --git a/tests/tunnel.at b/tests/tunnel.at index ce000a25e..d65bf4412 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -1041,3 +1041,79 @@ AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([tunnel - GTP-U basic]) +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gtpu \ + options:remote_ip=1.1.1.1 \ + options:key=123 ofport_request=1]) + +AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl + br0 65534/100: (dummy-internal) + p1 1/2152: (gtpu: key=123, remote_ip=1.1.1.1) +]) + +AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl +Listening ports: +gtpu_sys_2152 (2152) ref_cnt=1 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([tunnel - GTP-U push and pop]) +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy \ + ofport_request=1 \ + -- add-port br0 p2 -- set Interface p2 type=dummy \ + ofport_request=2]) + +# Add these ports separately to ensure that they get the datapath port +# number expected below. +ovs-vsctl -- add-port br0 p3 \ + -- set Interface p3 type=gtpu \ + ofport_request=3 \ + options:remote_ip=1.1.1.1 \ + options:key=3 \ + options:packet_type=legacy_l3 +ovs-vsctl -- add-port br0 p4 \ + -- set Interface p4 type=gtpu \ + ofport_request=4 \ + options:remote_ip=1.1.1.2 \ + options:key=4 \ + options:packet_type=legacy_l3 +OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP + +dnl AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl +AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl + p1 1/1: (dummy) + p2 2/2: (dummy) + p3 3/2152: (gtpu: key=3, remote_ip=1.1.1.1) + p4 4/2152: (gtpu: key=4, remote_ip=1.1.1.2) +]) + +AT_DATA([flows.txt], [dnl +in_port=1,actions=3 +in_port=2,actions=4 +in_port=3,tun_gtpu_flags=0x30,tun_gtpu_msgtype=255,actions=1 +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl +Listening ports: +gtpu_sys_2152 (2152) ref_cnt=2 +]) + +dnl Encap: in_port=1,actions=3 +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: set(tunnel(tun_id=0x3,dst=1.1.1.1,ttl=64,tp_dst=2152,flags(df|key))),pop_eth,2152 +]) + +dnl receive packet from GTP-U port, match it, and output to layer3 GRE +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x3,src=1.1.1.1,dst=2.2.2.2,ttl=64,gtpu(flags=0x30,msgtype=255),flags(df|key)),in_port(2152),packet_type(ns=1,id=0),skb_mark(0),ipv4(frag=no)'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,packet_type=(1,0),tun_id=0x3,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=0,gtpu_flags=0x30,gtpu_msgtype=255,tun_flags=+df-csum+key,in_port=3,dl_type=0x0000 +Datapath actions: push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),1 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 4a74ed3ef..f9339af85 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2647,6 +2647,30 @@

    A pair of virtual devices that act as a patch cable.
    + +
    gtpu
    +
    +

    + GPRS Tunneling Protocol (GTP) is a group of IP-based communications + protocols used to carry general packet radio service (GPRS) within + GSM, UMTS and LTE networks. GTP-U is used for carrying user data + within the GPRS core network and between the radio access network + and the core network. The user data transported can be packets in + any of IPv4, IPv6, or PPP formats. +

    + +

    + The protocol is documented at + http://www.3gpp.org/DynaReport/29281.htm +

    + +

    + Open vSwitch uses UDP destination port 2152. The source port used + for GTP traffic varies on a per-flow basis and is in the ephemeral + port range. +

    +
    +
    -- GitLab From 2b7e536fa5e20be10e620b959e05557f88862d2c Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Wed, 25 Mar 2020 21:15:23 +0100 Subject: [PATCH 085/432] Revert "ovsdb-idl: Avoid sending redundant conditional monitoring updates" This reverts commit 5351980b047f4dd40be7a59a1e4b910df21eca0a. If the ovsdb-server reply to "monitor_cond_since" requests has "found" == false then ovsdb_idl_db_parse_monitor_reply() calls ovsdb_idl_db_clear() which iterates through all tables and unconditionally sets table->cond_changed to false. However, if the client had already set a new condition for some of the tables, this new condition request will never be sent to ovsdb-server until the condition is reset to a different value. This is due to the check in ovsdb_idl_db_set_condition(). One way to replicate the issue is described in the bugzilla reporting the bug, when ovn-controller is configured to use "ovn-monitor-all": https://bugzilla.redhat.com/show_bug.cgi?id=1808125#c6 Commit 5351980b047f tried to optimize sending redundant conditional monitoring updates but the chances that this scenario happens with the latest code is quite low since commit 403a6a0cb003 ("ovsdb-idl: Fast resync from server when connection reset.") changed the behavior of ovsdb_idl_db_parse_monitor_reply() to avoid calling ovsdb_idl_db_clear() in most cases. Reported-by: Dan Williams Reported-at: https://bugzilla.redhat.com/1808125 CC: Andy Zhou Fixes: 5351980b047f ("ovsdb-idl: Avoid sending redundant conditional monitoring updates") Acked-by: Han Zhou Acked-by: Ilya Maximets Signed-off-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 190143f36..1535ad7b5 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -610,7 +610,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) struct ovsdb_idl_table *table = &db->tables[i]; struct ovsdb_idl_row *row, *next_row; - table->cond_changed = false; if (hmap_is_empty(&table->rows)) { continue; } @@ -634,7 +633,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) } ovsdb_idl_row_destroy_postprocess(db); - db->cond_changed = false; db->cond_seqno = 0; ovsdb_idl_db_track_clear(db); -- GitLab From cdc5d9cba175e233eb8da498d46ff4be5080b035 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 27 Mar 2020 09:51:51 +0100 Subject: [PATCH 086/432] cirrus: Force pkg update on FreeBSD. Seems like FreeBSD ports/images are not well maintained and frequently causes package installation failures like this: [1/40] Fetching automake-1.16.1_2.txz: .......... done pkg: cached package automake-1.16.1_2: size mismatch, fetching from remote [2/40] Fetching automake-1.16.1_2.txz: .......... done pkg: cached package automake-1.16.1_2: size mismatch, cannot continue Consider running 'pkg update -f' Forced update doesn't increase build time significantly, but helps to solve at least this one kind of issues. Acked-by: William Tu Signed-off-by: Ilya Maximets --- .cirrus.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.cirrus.yml b/.cirrus.yml index 1b32f55d6..9428164ee 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -16,6 +16,7 @@ freebsd_build_task: prepare_script: - sysctl -w kern.coredump=0 + - pkg update -f - pkg install -y ${DEPENDENCIES} configure_script: -- GitLab From fed4282c53667def41d997a6bf4cb66b5da16b74 Mon Sep 17 00:00:00 2001 From: Usman Ansari Date: Wed, 1 Apr 2020 15:33:32 -0700 Subject: [PATCH 087/432] netdev-linux.c: Fix coverity unreachable code warning Coverity reports unreachable code in "?" statement Fixed by removing code segment and unused variables & defines Signed-off-by: Usman Ansari Signed-off-by: William Tu --- lib/netdev-linux.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index c6e46f188..ff045cb12 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -119,10 +119,6 @@ COVERAGE_DEFINE(netdev_set_ethtool); #define TC_RTAB_SIZE 1024 #endif -#ifndef TCM_IFINDEX_MAGIC_BLOCK -#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) -#endif - /* Linux 2.6.21 introduced struct tpacket_auxdata. * Linux 2.6.27 added the tp_vlan_tci member. * Linux 3.0 defined TP_STATUS_VLAN_VALID. @@ -2621,9 +2617,8 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate, uint16_t eth_type = (OVS_FORCE uint16_t) htons(ETH_P_ALL); size_t basic_offset, action_offset, inner_offset; uint16_t prio = TC_RESERVED_PRIORITY_POLICE; - int ifindex, index, err = 0; + int ifindex, err = 0; struct tc_police pol_act; - uint32_t block_id = 0; struct ofpbuf request; struct ofpbuf *reply; struct tcmsg *tcmsg; @@ -2634,10 +2629,9 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate, return err; } - index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex; - tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO, + tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO, &request); - tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT; + tcmsg->tcm_parent = TC_INGRESS_PARENT; tcmsg->tcm_info = tc_make_handle(prio, eth_type); tcmsg->tcm_handle = handle; -- GitLab From 146f52b9ef8a5f61f39bfd5644638c7608ccceca Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 6 Apr 2020 16:59:01 -0700 Subject: [PATCH 088/432] ovs-vswitchd: Fix icmp reply timeout description. Currently the userspace datapath implements conntrack ICMP reply state as when ICMP packets have been seen on both directions. However, the description is defined as timeout of the connection after an ICMP error is replied in response to an ICMP packet. Fixes: 61a5264d60d0c ("ovs-vswitchd: Add Datapath, CT_Zone, and CT_Zone_Policy tables.") Signed-off-by: William Tu Acked-by: Greg Rose --- vswitchd/vswitch.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index f9339af85..6d334370d 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -6137,9 +6137,9 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ - The timeout of the connection after an ICMP error is replied in - response to an ICMP packet. This timeout is only supported by - the userspace datapath. + The timeout of the connection when ICMP packets have been seen in + both direction. This timeout is only supported by the userspace + datapath. -- GitLab From 8ea05f1f44dc720d96c1a0781e80a7e5d405c2b1 Mon Sep 17 00:00:00 2001 From: Archana Holla Date: Tue, 7 Apr 2020 11:09:33 -0700 Subject: [PATCH 089/432] util: Update OVS_TYPEOF macro for C++ enabled applications. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OVS_TYPEOF macro doesn’t return the type of object for non __GNUC__ platforms. Updating it to use "decltype" keyword when used from C++ code. Signed-off-by: Archana Holla Signed-off-by: William Tu --- include/openvswitch/util.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h index 9189e6480..228b185c3 100644 --- a/include/openvswitch/util.h +++ b/include/openvswitch/util.h @@ -85,6 +85,8 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *); * assigned to OBJECT. */ #ifdef __GNUC__ #define OVS_TYPEOF(OBJECT) typeof(OBJECT) +#elif defined (__cplusplus) +#define OVS_TYPEOF(OBJECT) decltype(OBJECT) #else #define OVS_TYPEOF(OBJECT) void * #endif -- GitLab From 9e44424204cb3994f38be66b0e981ec59d591722 Mon Sep 17 00:00:00 2001 From: Malvika Gupta Date: Mon, 30 Mar 2020 20:54:01 +0800 Subject: [PATCH 090/432] tests/atlocal.in: Add check for aarch64 Architecture This patch adds a condition to check if the CPU architecture is aarch64. If the condition evaluates to true, $IS_ARM64 variable is set to 'yes'. For all other architectures, this variable is set to 'no'. Reviewed-by: Yanqin Wei Signed-off-by: Malvika Gupta Signed-off-by: William Tu --- tests/atlocal.in | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/atlocal.in b/tests/atlocal.in index 1dc7cd5d0..02e2dc57f 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -111,6 +111,16 @@ if test "$IS_WIN32" = yes; then export PYTHONLEGACYWINDOWSSTDIO fi +# Check for CPU architecture +case `uname -m` in +aarch64) + IS_ARM64="yes" + ;; +*) + IS_ARM64="no" + ;; +esac + # Check whether to run IPv6 tests. $PYTHON3 -c ' import errno -- GitLab From 804ef1a7026f77b4c54388bdce3c8bb601c728c2 Mon Sep 17 00:00:00 2001 From: Malvika Gupta Date: Mon, 30 Mar 2020 20:54:02 +0800 Subject: [PATCH 091/432] tests/testsuite: Skip failing UT cases on aarch64 The following test cases are failing inconsistently on aarch64 platforms and have been skipped until further investigation can be made on how to fix them: 20: bfd.at:268 bfd - bfd decay 2104: ovsdb-idl.at:1815 Check Python IDL connects to leader - Python3 (leader only) 2105: ovsdb-idl.at:1816 Check Python IDL reconnects to leader - Python3 (leader only) Suggested-by: Yanqin Wei Suggested-by: Lance Yang Signed-off-by: Malvika Gupta Signed-off-by: William Tu --- tests/bfd.at | 1 + tests/ovsdb-idl.at | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/bfd.at b/tests/bfd.at index 7af7be54a..8654ca5db 100644 --- a/tests/bfd.at +++ b/tests/bfd.at @@ -266,6 +266,7 @@ AT_CLEANUP # Tests below are for bfd decay features. AT_SETUP([bfd - bfd decay]) +AT_SKIP_IF([test "$IS_ARM64" = "yes"]) OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \ add-port br1 p1 -- set Interface p1 type=patch \ options:peer=p0 ofport_request=2 -- \ diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 564ef4c78..b5cbee7d9 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -1809,6 +1809,7 @@ m4_define([CHECK_STREAM_OPEN_BLOCK_PY], # with multiple remotes to assert the idl connects to the leader of the Raft cluster m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], [AT_SETUP([$1 - Python3 (leader only)]) + AT_SKIP_IF([test "$IS_ARM64" = "yes"]) AT_KEYWORDS([ovsdb server idl Python leader_only with tcp socket]) m4_define([LPBK],[127.0.0.1]) AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK]) -- GitLab From e5a9931b9a6447ca537f3d0dce5ca1867f62cf0b Mon Sep 17 00:00:00 2001 From: Lance Yang Date: Mon, 30 Mar 2020 20:54:03 +0800 Subject: [PATCH 092/432] Travis: Enable clang compiler and unit test for arm CI Enable testsuite and clang compiler for arm CI. In order not to increase the CI jobs, selectively enable them in the existing jobs instead of adding extra jobs. Successful travis job build report: https://travis-ci.org/github/yzyuestc/ovs/builds/667539360 Reviewed-by: Yanqin Wei Reviewed-by: Malvika Gupta Signed-off-by: Lance Yang Signed-off-by: William Tu --- .travis.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 11497588b..527240a67 100644 --- a/.travis.yml +++ b/.travis.yml @@ -53,19 +53,19 @@ matrix: env: OPTS="--disable-ssl" - arch: arm64 compiler: gcc - env: OPTS="--disable-ssl" + env: TESTSUITE=1 DPDK=1 - arch: arm64 compiler: gcc env: KERNEL_LIST="5.5 4.19" - arch: arm64 compiler: gcc env: KERNEL_LIST="4.9 3.16" - - arch: arm64 - compiler: gcc - env: DPDK=1 OPTS="--enable-shared" - arch: arm64 compiler: gcc env: DPDK_SHARED=1 + - arch: arm64 + compiler: clang + env: OPTS="--disable-ssl" script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS -- GitLab From 5c41c31ebd64fda821fb733a5784a7a440a794f8 Mon Sep 17 00:00:00 2001 From: Jiang Lidong Date: Tue, 7 Apr 2020 03:28:06 +0000 Subject: [PATCH 093/432] dpif-netdev: includes microsecond delta in meter bucket calculation When dp-netdev meter rate is higher than 200Mbps, observe more than 10% bias from configured rate value with UDP traffic. In dp-netdev meter, millisecond delta between now and last used is taken into bucket size calcualtion, while sub-millisecond part is truncated. If traffic rate is pretty high, time delta can be few milliseconds, its ratio to truncated part is less than 10:1, the loss of bucket size caused by truncated can be observed obviously by commited traffic rate. In this patch, microsend delta part is included in calculation of meter bucket to make it more precise. Signed-off-by: Jiang Lidong Signed-off-by: William Tu --- lib/dpif-netdev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e456cc9be..ef14e83b5 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -5735,6 +5735,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, struct dp_packet *packet; long long int long_delta_t; /* msec */ uint32_t delta_t; /* msec */ + uint32_t delta_in_us; /* usec */ const size_t cnt = dp_packet_batch_size(packets_); uint32_t bytes, volume; int exceeded_band[NETDEV_MAX_BURST]; @@ -5765,6 +5766,9 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, Assuming that all racing threads received packets at the same time to avoid overflow. */ long_delta_t = 0; + delta_in_us = 0; + } else { + delta_in_us = (now - meter->used) % 1000; } /* Make sure delta_t will not be too large, so that bucket will not @@ -5800,6 +5804,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Update band's bucket. */ band->bucket += delta_t * band->up.rate; + band->bucket += delta_in_us * band->up.rate / 1000; if (band->bucket > band->up.burst_size) { band->bucket = band->up.burst_size; } -- GitLab From 9e6c00bca9af29031d0e160d33174b7ae99b9244 Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Thu, 19 Mar 2020 20:05:39 +0100 Subject: [PATCH 094/432] bugtool: Fix for Python3. Currently ovs-bugtool tool doesn't start on Python 3. This commit fixes ovs-bugtool to make it works on Python 3. Replaced StringIO.StringIO with io.BytesIO since the script is processing binary data. Reported-at: https://bugzilla.redhat.com/1809241 Reported-by: Flavio Leitner Signed-off-by: Timothy Redaelli Co-authored-by: William Tu Signed-off-by: William Tu --- utilities/bugtool/ovs-bugtool.in | 48 +++++++++++++++++--------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in index e55bfc2ed..47f3c4629 100755 --- a/utilities/bugtool/ovs-bugtool.in +++ b/utilities/bugtool/ovs-bugtool.in @@ -33,8 +33,7 @@ # or func_output(). # -import StringIO -import commands +from io import BytesIO import fcntl import getopt import hashlib @@ -48,7 +47,7 @@ import warnings import zipfile from select import select from signal import SIGTERM -from subprocess import PIPE, Popen +from subprocess import PIPE, Popen, check_output from xml.dom.minidom import getDOMImplementation, parse @@ -348,7 +347,7 @@ def collect_data(): cap = v['cap'] if 'cmd_args' in v: if 'output' not in v.keys(): - v['output'] = StringIOmtime() + v['output'] = BytesIOmtime() if v['repeat_count'] > 0: if cap not in process_lists: process_lists[cap] = [] @@ -373,20 +372,23 @@ def collect_data(): if 'filename' in v and v['filename'].startswith('/proc/'): # proc files must be read into memory try: - f = open(v['filename'], 'r') + f = open(v['filename'], 'rb') s = f.read() f.close() if check_space(cap, v['filename'], len(s)): - v['output'] = StringIOmtime(s) + v['output'] = BytesIOmtime(s) except: pass elif 'func' in v: try: s = v['func'](cap) except Exception as e: - s = str(e) + s = str(e).encode() if check_space(cap, k, len(s)): - v['output'] = StringIOmtime(s) + if isinstance(s, str): + v['output'] = BytesIOmtime(s.encode()) + else: + v['output'] = BytesIOmtime(s) def main(argv=None): @@ -704,7 +706,7 @@ exclude those logs from the archive. # permit the user to filter out data # We cannot use iteritems, since we modify 'data' as we pass through - for (k, v) in sorted(data.items()): + for (k, v) in data.items(): cap = v['cap'] if 'filename' in v: key = k[0] @@ -721,7 +723,7 @@ exclude those logs from the archive. # include inventory data['inventory.xml'] = {'cap': None, - 'output': StringIOmtime(make_inventory(data, subdir))} + 'output': BytesIOmtime(make_inventory(data, subdir))} # create archive if output_fd == -1: @@ -782,7 +784,7 @@ def dump_scsi_hosts(cap): def module_info(cap): - output = StringIO.StringIO() + output = BytesIO() modules = open(PROC_MODULES, 'r') procs = [] @@ -806,7 +808,7 @@ def multipathd_topology(cap): def dp_list(): - output = StringIO.StringIO() + output = BytesIO() procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], caps[CAP_NETWORK_STATUS][MAX_TIME], output)] @@ -828,7 +830,7 @@ def collect_ovsdb(): if os.path.isfile(OPENVSWITCH_COMPACT_DB): os.unlink(OPENVSWITCH_COMPACT_DB) - output = StringIO.StringIO() + output = BytesIO() max_time = 5 procs = [ProcOutput(['ovsdb-tool', 'compact', OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB], @@ -871,7 +873,7 @@ def fd_usage(cap): def dump_rdac_groups(cap): - output = StringIO.StringIO() + output = BytesIO() procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)] run_procs([procs]) @@ -896,7 +898,7 @@ def load_plugins(just_capabilities=False, filter=None): for node in nodelist: if node.nodeType == node.TEXT_NODE: rc += node.data - return rc.encode() + return rc def getBoolAttr(el, attr, default=False): ret = default @@ -1037,7 +1039,7 @@ def make_tar(subdir, suffix, output_fd, output_file): s = os.stat(v['filename']) ti.mtime = s.st_mtime ti.size = s.st_size - tf.addfile(ti, open(v['filename'])) + tf.addfile(ti, open(v['filename'], 'rb')) except: pass finally: @@ -1095,12 +1097,12 @@ def make_inventory(inventory, subdir): s.setAttribute('date', time.strftime('%c')) s.setAttribute('hostname', platform.node()) s.setAttribute('uname', ' '.join(platform.uname())) - s.setAttribute('uptime', commands.getoutput(UPTIME)) + s.setAttribute('uptime', check_output(UPTIME).decode()) document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s) map(lambda k_v: inventory_entry(document, subdir, k_v[0], k_v[1]), inventory.items()) - return document.toprettyxml() + return document.toprettyxml().encode() def inventory_entry(document, subdir, k, v): @@ -1301,7 +1303,7 @@ class ProcOutput(object): line = self.proc.stdout.readline() else: line = self.proc.stdout.read(self.bufsize) - if line == '': + if line == b'': # process exited self.proc.stdout.close() self.status = self.proc.wait() @@ -1391,13 +1393,13 @@ def get_free_disk_space(path): return s.f_frsize * s.f_bfree -class StringIOmtime(StringIO.StringIO): - def __init__(self, buf=''): - StringIO.StringIO.__init__(self, buf) +class BytesIOmtime(BytesIO): + def __init__(self, buf=b''): + BytesIO.__init__(self, buf) self.mtime = time.time() def write(self, s): - StringIO.StringIO.write(self, s) + BytesIO.write(self, s) self.mtime = time.time() -- GitLab From 02f641e2c583799eb38d3b4a59ba5872da04c6d9 Mon Sep 17 00:00:00 2001 From: Yifeng Sun Date: Thu, 9 Apr 2020 11:37:38 -0700 Subject: [PATCH 095/432] tun_metadata: Fix coredump caused by use-after-free bug Tun_metadata can be referened by flow and frozen_state at the same time. When ovs-vswitchd handles TLV table mod message, the involved tun_metadata gets freed. The call trace to free tun_metadata is shown as below: ofproto_run - handle_openflow - handle_single_part_openflow - handle_tlv_table_mod - tun_metadata_table_mod - tun_metadata_postpone_free Unfortunately, this tun_metadata can be still used by some frozen_state, and later on when frozen_state tries to access its tun_metadata table, ovs-vswitchd crashes. The call trace to access tun_metadata from frozen_state is shown as below: udpif_upcall_handler - recv_upcalls - process_upcall - frozen_metadata_to_flow It is unsafe for frozen_state to reference tun_table because tun_table is protected by RCU while the lifecycle of frozen_state can span several RCU quiesce states. Current code violates OVS's RCU protection mechanism. This patch fixes it by simply stopping frozen_state from referencing tun_table. If frozen_state needs tun_table, the latest valid tun_table can be found through ofproto_get_tun_tab() efficiently. A previous commit seems fixing the samiliar issue: 254878c18874f6 (ofproto-dpif-xlate: Fix segmentation fault caused by tun_table) VMware-BZ: #2526222 Signed-off-by: Yifeng Sun Signed-off-by: William Tu --- ofproto/ofproto-dpif-rid.h | 12 +++++++++++- ofproto/ofproto-dpif-upcall.c | 3 ++- ofproto/ofproto-dpif-xlate.c | 9 +++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h index e5d02caf2..30cd5275f 100644 --- a/ofproto/ofproto-dpif-rid.h +++ b/ofproto/ofproto-dpif-rid.h @@ -22,6 +22,7 @@ #include "cmap.h" #include "ofproto-dpif-mirror.h" +#include "ofproto/ofproto-provider.h" #include "openvswitch/list.h" #include "openvswitch/ofp-actions.h" #include "ovs-thread.h" @@ -115,16 +116,25 @@ frozen_metadata_from_flow(struct frozen_metadata *md, { memset(md, 0, sizeof *md); md->tunnel = flow->tunnel; + /* It is unsafe for frozen_state to reference tun_table because + * tun_table is protected by RCU while the lifecycle of frozen_state + * can span several RCU quiesce states. + * + * The latest valid tun_table can be found by ofproto_get_tun_tab() + * efficiently. */ + md->tunnel.metadata.tab = NULL; md->metadata = flow->metadata; memcpy(md->regs, flow->regs, sizeof md->regs); md->in_port = flow->in_port.ofp_port; } static inline void -frozen_metadata_to_flow(const struct frozen_metadata *md, +frozen_metadata_to_flow(struct ofproto *ofproto, + const struct frozen_metadata *md, struct flow *flow) { flow->tunnel = md->tunnel; + flow->tunnel.metadata.tab = ofproto_get_tun_tab(ofproto); flow->metadata = md->metadata; memcpy(flow->regs, md->regs, sizeof flow->regs); flow->in_port.ofp_port = md->in_port; diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 8dfa05b71..5e08ef10d 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1534,7 +1534,8 @@ process_upcall(struct udpif *udpif, struct upcall *upcall, flow_clear_conntrack(&frozen_flow); } - frozen_metadata_to_flow(&state->metadata, &frozen_flow); + frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata, + &frozen_flow); flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata); ofproto_dpif_send_async_msg(upcall->ofproto, am); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 042c50a63..abce976c6 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -7544,7 +7544,8 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) /* Restore pipeline metadata. May change flow's in_port and other * metadata to the values that existed when freezing was triggered. */ - frozen_metadata_to_flow(&state->metadata, flow); + frozen_metadata_to_flow(&ctx.xbridge->ofproto->up, + &state->metadata, flow); /* Restore stack, if any. */ if (state->stack) { @@ -7596,14 +7597,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) ctx.error = XLATE_INVALID_TUNNEL_METADATA; goto exit; } - } else if (!flow->tunnel.metadata.tab || xin->frozen_state) { + } else if (!flow->tunnel.metadata.tab) { /* If the original flow did not come in on a tunnel, then it won't have * FLOW_TNL_F_UDPIF set. However, we still need to have a metadata * table in case we generate tunnel actions. */ - /* If the translation is from a frozen state, we use the latest - * TLV map to avoid segmentation fault in case the old TLV map is - * replaced by a new one. - * XXX: It is better to abort translation if the table is changed. */ flow->tunnel.metadata.tab = ofproto_get_tun_tab( &ctx.xbridge->ofproto->up); } -- GitLab From 134e6831acca48f10df3d59b8e1567c24dd925d2 Mon Sep 17 00:00:00 2001 From: Yifeng Sun Date: Thu, 9 Apr 2020 11:37:39 -0700 Subject: [PATCH 096/432] system-traffic: Check frozen state handling with TLV map change This patch enhances a system traffic test to prevent regression on the tunnel metadata table (tun_table) handling with frozen state. Without a proper fix this test can crash ovs-vswitchd due to a use-after-free bug on tun_table. These are the timed sequence of how this bug is triggered: - Adds an OpenFlow rule in OVS that matches Geneve tunnel metadata that contains a controller action. - When the first packet matches the aforementioned OpenFlow rule, during the miss upcall, OVS stores a pointer to the tun_table (that decodes the Geneve tunnel metadata) in a frozen state and pushes down a datapath flow into kernel datapath. - Issues a add-tlv-map command to reprogram the tun_table on OVS. OVS frees the old tun_table and create a new tun_table. - A subsequent packet hits the kernel datapath flow again. Since there is a controller action associated with that flow, it triggers slow path controller upcall. - In the slow path controller upcall, OVS derives the tun_table from the frozen state, which points to the old tun_table that is already being freed at this time point. - In order to access the tunnel metadata, OVS uses the invalid pointer that points to the old tun_table and triggers the core dump. Signed-off-by: Yi-Hung Wei Signed-off-by: Yifeng Sun Co-authored-by: Yi-Hung Wei Signed-off-by: William Tu --- tests/system-traffic.at | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 4a39c929c..3ed03d92b 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -611,6 +611,16 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 10.1.1.100 | FORMAT_PING], [0], [dnl 3 packets transmitted, 3 received, 0% packet loss, time 0ms ]) +dnl Test OVS handles TLV map modifictions properly when restores frozen state. +NS_CHECK_EXEC([at_ns0], [ping 10.1.1.100 > /dev/null &]) + +AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0x88,len=4}->tun_metadata1"]) +sleep 1 +AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0x99,len=4}->tun_metadata2"]) +sleep 1 +AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0xaa,len=4}->tun_metadata3"]) +sleep 1 + OVS_APP_EXIT_AND_WAIT([ovs-ofctl]) OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP -- GitLab From 9d7893b30de1e0eb8d5d3af4f92908583167caa9 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 13 Apr 2020 08:36:44 -0700 Subject: [PATCH 097/432] ofp-actions: Fix memory leak on error path. Need to free the memory before return. Detected by gcc10. Signed-off-by: William Tu Reviewed-by: Yifeng Sun --- lib/ofp-actions.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index ef8b2b452..a94d1a7ca 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -5966,6 +5966,7 @@ parse_CLONE(char *arg, const struct ofpact_parse_params *pp) clone = pp->ofpacts->header; if (ofpbuf_oversized(pp->ofpacts)) { + free(error); return xasprintf("input too big"); } -- GitLab From 82b7e6d19e89de67dae41308e25935c351f59316 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 14 Apr 2020 11:42:10 -0700 Subject: [PATCH 098/432] compat: Fix broken partial backport of extack op parameter A series of commits added support for the extended ack parameter to the newlink, changelink and validate ops in the rtnl_link_ops structure: a8b8a889e369d ("net: add netlink_ext_ack argument to rtnl_link_ops.validate") 7a3f4a185169b ("net: add netlink_ext_ack argument to rtnl_link_ops.newlink") ad744b223c521 ("net: add netlink_ext_ack argument to rtnl_link_ops.changelink") These commits were all added at the same time and present since the Linux kernel 4.13 release. In our compatiblity layer we have a define HAVE_EXT_ACK_IN_RTNL_LINKOPS that indicates the presence of the extended ack parameter for these three link operations. At least one distro has only backported two of the three patches, for newlink and changelink, while not backporting patch a8b8a889e369d for the validate op. Our compatibility layer code in acinclude.m4 is able to find the presence of the extack within the rtnl_link_ops structure so it defines HAVE_EXT_ACK_IN_RTNL_LINKOPS but since the validate link op does not have the extack parameter the compilation fails on recent kernels for that particular distro. Other kernel distributions based upon this distro will presumably also encounter the compile errors. Introduce a new function in acinclude.m4 that will find function op definitions and then search for the required parameter. Then use this function to define HAVE_RTNLOP_VALIDATE_WITH_EXTACK so that we can detect and enable correct compilation on kernels which have not backported the entire set of patches. This function is generic to any function op - it need not be in a structure. In places where HAVE_EXT_ACK_IN_RTNL_LINKOPS wraps validate functions replace it with the new HAVE_RTNLOP_VALIDATE_WITH_EXTACK define. Passes Travis here: https://travis-ci.org/github/gvrose8192/ovs-experimental/builds/674599698 Passes a kernel check-kmod test on several systems, including sles12 sp4 4.12.14-95.48-default kernel, without any regressions. VMWare-BZ: #2544032 Signed-off-by: Greg Rose Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- acinclude.m4 | 34 ++++++++++++++++++++++++++++++ datapath/linux/compat/geneve.c | 2 +- datapath/linux/compat/ip6_gre.c | 10 ++++----- datapath/linux/compat/ip6_tunnel.c | 2 +- datapath/linux/compat/ip_gre.c | 10 ++++----- datapath/linux/compat/lisp.c | 2 +- datapath/linux/compat/stt.c | 2 +- datapath/linux/compat/vxlan.c | 2 +- 8 files changed, 49 insertions(+), 15 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 02efea6de..0901f2870 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -520,6 +520,37 @@ AC_DEFUN([OVS_FIND_PARAM_IFELSE], [ fi ]) +dnl OVS_FIND_OP_PARAM_IFELSE(FILE, OP, REGEX, [IF-MATCH], [IF-NO-MATCH]) +dnl +dnl Looks for OP in FILE. If it is found, greps for REGEX within the +dnl OP definition. If this is successful, runs IF-MATCH, otherwise +dnl IF_NO_MATCH. If IF-MATCH is empty then it defines to +dnl OVS_DEFINE(HAVE__WITH_), with and +dnl translated to uppercase. +AC_DEFUN([OVS_FIND_OP_PARAM_IFELSE], [ + AC_MSG_CHECKING([whether $2 has member $3 in $1]) + if test -f $1; then + awk '/$2[[ \t\n]]*\)\(/,/;/' $1 2>/dev/null | grep '$3' >/dev/null + status=$? + case $status in + 0) + AC_MSG_RESULT([yes]) + m4_if([$4], [], [OVS_DEFINE([HAVE_]m4_toupper([$2])[_WITH_]m4_toupper([$3]))], [$4]) + ;; + 1) + AC_MSG_RESULT([no]) + $5 + ;; + *) + AC_MSG_ERROR([grep exited with status $status]) + ;; + esac + else + AC_MSG_RESULT([file not found]) + $5 + fi +]) + dnl OVS_DEFINE(NAME) dnl dnl Defines NAME to 1 in kcompat.h. @@ -1056,6 +1087,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/net/netlink.h], [nla_parse_deprecated_strict], [OVS_DEFINE([HAVE_NLA_PARSE_DEPRECATED_STRICT])]) + OVS_FIND_OP_PARAM_IFELSE([$KSRC/include/net/rtnetlink.h], + [validate], [extack], + [OVS_DEFINE([HAVE_RTNLOP_VALIDATE_WITH_EXTACK])]) if cmp -s datapath/linux/kcompat.h.new \ datapath/linux/kcompat.h >/dev/null 2>&1; then diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c index 5b183963d..1551a3721 100644 --- a/datapath/linux/compat/geneve.c +++ b/datapath/linux/compat/geneve.c @@ -1419,7 +1419,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = { [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 }, }; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int geneve_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else diff --git a/datapath/linux/compat/ip6_gre.c b/datapath/linux/compat/ip6_gre.c index da0fa432b..3aa9844b3 100644 --- a/datapath/linux/compat/ip6_gre.c +++ b/datapath/linux/compat/ip6_gre.c @@ -1687,7 +1687,7 @@ static struct pernet_operations ip6gre_net_ops = { .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1713,7 +1713,7 @@ static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[], } #define ip6gre_tunnel_validate rpl_ip6gre_tunnel_validate -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else @@ -1739,7 +1739,7 @@ static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) } out: -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK return ip6gre_tunnel_validate(tb, data, extack); #else return ip6gre_tunnel_validate(tb, data); @@ -1747,7 +1747,7 @@ out: } #define ip6gre_tap_validate rpl_ip6gre_tap_validate -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int rpl_ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -1762,7 +1762,7 @@ static int rpl_ip6erspan_tap_validate(struct nlattr *tb[], if (!data) return 0; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK ret = ip6gre_tap_validate(tb, data, extack); #else ret = ip6gre_tap_validate(tb, data); diff --git a/datapath/linux/compat/ip6_tunnel.c b/datapath/linux/compat/ip6_tunnel.c index 9f4bae7dd..984a51bfb 100644 --- a/datapath/linux/compat/ip6_tunnel.c +++ b/datapath/linux/compat/ip6_tunnel.c @@ -1754,7 +1754,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int rpl_ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c index 41379b19a..c194ffe00 100644 --- a/datapath/linux/compat/ip_gre.c +++ b/datapath/linux/compat/ip_gre.c @@ -623,7 +623,7 @@ static const struct gre_protocol ipgre_protocol = { .err_handler = __gre_err, }; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else @@ -646,7 +646,7 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) return 0; } -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else @@ -672,7 +672,7 @@ static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) } out: -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK return ipgre_tunnel_validate(tb, data, NULL); #else return ipgre_tunnel_validate(tb, data); @@ -707,7 +707,7 @@ enum { #define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1) -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else @@ -720,7 +720,7 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[]) if (!data) return 0; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK ret = ipgre_tap_validate(tb, data, NULL); #else ret = ipgre_tap_validate(tb, data); diff --git a/datapath/linux/compat/lisp.c b/datapath/linux/compat/lisp.c index 58144adf6..6dc066de8 100644 --- a/datapath/linux/compat/lisp.c +++ b/datapath/linux/compat/lisp.c @@ -612,7 +612,7 @@ static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = { [IFLA_LISP_PORT] = { .type = NLA_U16 }, }; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int lisp_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack __always_unused *extack) #else diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c index 8a5853f19..39a294764 100644 --- a/datapath/linux/compat/stt.c +++ b/datapath/linux/compat/stt.c @@ -1904,7 +1904,7 @@ static const struct nla_policy stt_policy[IFLA_STT_MAX + 1] = { [IFLA_STT_PORT] = { .type = NLA_U16 }, }; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int stt_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack __always_unused *extack) #else diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index 6090f4290..f8f667e97 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -1708,7 +1708,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG }, }; -#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS +#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) #else -- GitLab From 976c452a7b86533e8599970d771f077159815744 Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Mon, 10 Feb 2020 13:48:53 +0000 Subject: [PATCH 099/432] acinclude: handle dependencies for DPDK's PCAP PMD If RTE_LIBRTE_PMD_PCAP is enabled in the DPDK build, OVS must link the pcap library, otherwise build failures will occur. Signed-off-by: Ciara Loftus Signed-off-by: William Tu --- acinclude.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index 0901f2870..b5b2f59e4 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -355,6 +355,10 @@ AC_DEFUN([OVS_CHECK_DPDK], [ OVS_FIND_DEPENDENCY([get_mempolicy], [numa], [libnuma]) ], [], [[#include ]]) + AC_CHECK_DECL([RTE_LIBRTE_PMD_PCAP], [ + OVS_FIND_DEPENDENCY([pcap_dump_close], [pcap], [libpcap]) + ], [], [[#include ]]) + AC_CHECK_DECL([RTE_LIBRTE_VHOST_NUMA], [ AC_DEFINE([VHOST_NUMA], [1], [NUMA Aware vHost support detected in DPDK.]) ], [], [[#include ]]) -- GitLab From 5bb068a7723f830848eb991f9ac6fe13640bf3da Mon Sep 17 00:00:00 2001 From: Ciara Loftus Date: Mon, 10 Feb 2020 13:48:54 +0000 Subject: [PATCH 100/432] acinclude: handle dependencies for DPDK's AF_XDP PMD If RTE_LIBRTE_AF_XDP is enabled in the DPDK build, OVS must link the libbpf library, otherwise build failures will occur. Signed-off-by: Ciara Loftus Signed-off-by: William Tu --- acinclude.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index b5b2f59e4..0e90c3332 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -359,6 +359,10 @@ AC_DEFUN([OVS_CHECK_DPDK], [ OVS_FIND_DEPENDENCY([pcap_dump_close], [pcap], [libpcap]) ], [], [[#include ]]) + AC_CHECK_DECL([RTE_LIBRTE_PMD_AF_XDP], [ + LIBBPF_LDADD="-lbpf" + ], [], [[#include ]]) + AC_CHECK_DECL([RTE_LIBRTE_VHOST_NUMA], [ AC_DEFINE([VHOST_NUMA], [1], [NUMA Aware vHost support detected in DPDK.]) ], [], [[#include ]]) -- GitLab From 38c69ccf8e294109326d6c1d38a300175d6d370f Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 16 Apr 2020 12:54:53 -0700 Subject: [PATCH 101/432] conntrack: Add coverage count for l4csum error. Add a coverage counter when userspace conntrack receives a packet with invalid l4 checksum. When using veth for testing, users often forget to turn off the tx offload on the other side of the namespace, causing l4 checksum not calculated in packet header, and at conntrack, return invalid conntrack state. Suggested-by: Yi-Hung Wei Signed-off-by: William Tu Acked-by: Yi-Hung Wei --- lib/conntrack.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/conntrack.c b/lib/conntrack.c index 0cbc8f6d2..95d48c5ee 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -44,6 +44,7 @@ VLOG_DEFINE_THIS_MODULE(conntrack); COVERAGE_DEFINE(conntrack_full); COVERAGE_DEFINE(conntrack_long_cleanup); +COVERAGE_DEFINE(conntrack_l4csum_err); struct conn_lookup_ctx { struct conn_key key; @@ -1661,6 +1662,7 @@ checksum_valid(const struct conn_key *key, const void *data, size_t size, } else if (key->dl_type == htons(ETH_TYPE_IPV6)) { return packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0; } else { + COVERAGE_INC(conntrack_l4csum_err); return false; } } @@ -1704,7 +1706,12 @@ check_l4_udp(const struct conn_key *key, const void *data, size_t size, static inline bool check_l4_icmp(const void *data, size_t size, bool validate_checksum) { - return validate_checksum ? csum(data, size) == 0 : true; + if (validate_checksum && csum(data, size) != 0) { + COVERAGE_INC(conntrack_l4csum_err); + return false; + } else { + return true; + } } static inline bool -- GitLab From 925511592a4e239250aed8cf80bd4dffe4e47486 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 14 Apr 2020 08:17:04 -0700 Subject: [PATCH 102/432] fatal-signal: Remove snprintf. Function snprintf is not async-signal-safe. Replace it with our own implementation. Example ovs-vswitchd.log output: 2020-03-25T01:08:19.673Z|00050|memory|INFO|handlers:2 ports:3 SIGSEGV detected, backtrace: 0x4872d9 0x7f4e2ab974b0 0x7f4e2ac5d74d <__poll+0x2d> 0x531098 0x51aefc 0x445ca9 0x5056fd 0x7f4e2b65f6ba 0x7f4e2ac6941d 0x0 <+0x0> Tested-at: https://travis-ci.org/github/williamtu/ovs-travis/builds/674901331 Tested-by: Yifeng Sun Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- lib/fatal-signal.c | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c index 51cf628d9..bbb31ef27 100644 --- a/lib/fatal-signal.c +++ b/lib/fatal-signal.c @@ -158,6 +158,23 @@ fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux), } #ifdef HAVE_UNWIND +/* Convert unsigned long long to string. This is needed because + * using snprintf() is not async signal safe. */ +static inline int +llong_to_hex_str(unsigned long long value, char *str) +{ + int i = 0, res; + + if (value / 16 > 0) { + i = llong_to_hex_str(value / 16, str); + } + + res = value % 16; + str[i] = "0123456789abcdef"[res]; + + return i + 1; +} + /* Send the backtrace buffer to monitor thread. * * Note that this runs in the signal handling context, any system @@ -192,20 +209,32 @@ send_backtrace_to_monitor(void) { dep * sizeof(struct unw_backtrace))); } else { /* Since there is no monitor daemon running, write backtrace - * in current process. This is not asyn-signal-safe due to - * use of snprintf(). + * in current process. */ char str[] = "SIGSEGV detected, backtrace:\n"; + char ip_str[16], offset_str[6]; + char line[64], fn_name[UNW_MAX_FUNCN]; vlog_direct_write_to_log_file_unsafe(str); for (int i = 0; i < dep; i++) { - char line[64]; - - snprintf(line, 64, "0x%016"PRIxPTR" <%s+0x%"PRIxPTR">\n", - unw_bt[i].ip, - unw_bt[i].func, - unw_bt[i].offset); + memset(line, 0, sizeof line); + memset(fn_name, 0, sizeof fn_name); + memset(offset_str, 0, sizeof offset_str); + memset(ip_str, ' ', sizeof ip_str); + ip_str[sizeof(ip_str) - 1] = 0; + + llong_to_hex_str(unw_bt[i].ip, ip_str); + llong_to_hex_str(unw_bt[i].offset, offset_str); + + strcat(line, "0x"); + strcat(line, ip_str); + strcat(line, "<"); + memcpy(fn_name, unw_bt[i].func, UNW_MAX_FUNCN - 1); + strcat(line, fn_name); + strcat(line, "+0x"); + strcat(line, offset_str); + strcat(line, ">\n"); vlog_direct_write_to_log_file_unsafe(line); } } -- GitLab From cda22c659e953c75b4c3c6e7bcfa0e37bbe48967 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 21 Apr 2020 13:33:55 +0200 Subject: [PATCH 103/432] Utilities: make print() in gdb script work on all version of Python Signed-off-by: Eelco Chaudron Signed-off-by: William Tu --- utilities/gdb/ovs_gdb.py | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/gdb/ovs_gdb.py b/utilities/gdb/ovs_gdb.py index befc2b4a4..6b42ac80e 100644 --- a/utilities/gdb/ovs_gdb.py +++ b/utilities/gdb/ovs_gdb.py @@ -55,6 +55,7 @@ # ... # ... # +from __future__ import print_function import gdb import sys import uuid -- GitLab From eb8fd61d91866032118fa3f63e022d007d812626 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 17 Apr 2020 14:51:34 +0200 Subject: [PATCH 104/432] Utilities: Add the ovs_dump_ofpacts command to gdb This adds the ovs_dump_ifpacts command: (gdb) help ovs_dump_ofpacts Dump all actions in an ofpacts set Usage: ovs_dump_ofpacts : Pointer to set of ofpact structures. : Total length of the set. Example dumping all actions when in the clone_xlate_actions() function: (gdb) ovs_dump_ofpacts actions actions_len (struct ofpact *) 0x561c7be487c8: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} (struct ofpact *) 0x561c7be487e0: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} (struct ofpact *) 0x561c7be487f8: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} (struct ofpact *) 0x561c7be48810: {type = OFPACT_SET_FIELD, raw = 255 '', len = 32} (struct ofpact *) 0x561c7be48830: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} (struct ofpact *) 0x561c7be48848: {type = OFPACT_RESUBMIT, raw = 38 '&', len = 16} Signed-off-by: Eelco Chaudron Signed-off-by: William Tu --- utilities/gdb/ovs_gdb.py | 74 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/utilities/gdb/ovs_gdb.py b/utilities/gdb/ovs_gdb.py index 6b42ac80e..1111f3100 100644 --- a/utilities/gdb/ovs_gdb.py +++ b/utilities/gdb/ovs_gdb.py @@ -414,6 +414,39 @@ class ForEachLIST(): return self.__next__() +# +# Class that will provide an iterator over an OFPACTS. +# +class ForEachOFPACTS(): + def __init__(self, ofpacts, ofpacts_len): + self.ofpact = ofpacts.cast(gdb.lookup_type('struct ofpact').pointer()) + self.length = int(ofpacts_len) + + def __round_up(self, val, round_to): + return int(val) + (round_to - int(val)) % round_to + + def __iter__(self): + return self + + def __next__(self): + if self.ofpact is None or self.length <= 0: + raise StopIteration + + ofpact = self.ofpact + length = self.__round_up(ofpact['len'], 8) + + self.length -= length + self.ofpact = self.ofpact.cast( + gdb.lookup_type('void').pointer()) + length + self.ofpact = self.ofpact.cast( + gdb.lookup_type('struct ofpact').pointer()) + + return ofpact + + def next(self): + return self.__next__() + + # # Implements the GDB "ovs_dump_bridges" command # @@ -1234,6 +1267,46 @@ class CmdShowUpcall(gdb.Command): self.display_udpif_upcall(udpif, 0, "dbg" in arg_list) +# +# Implements the GDB "ovs_dump_ofpacts" command +# +class CmdDumpOfpacts(gdb.Command): + """Dump all actions in an ofpacts set + Usage: ovs_dump_ofpacts + + : Pointer to set of ofpact structures. + : Total length of the set. + + Example dumping all actions when in the clone_xlate_actions() function: + + (gdb) ovs_dump_ofpacts actions actions_len + (struct ofpact *) 0x561c7be487c8: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} + (struct ofpact *) 0x561c7be487e0: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} + (struct ofpact *) 0x561c7be487f8: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} + (struct ofpact *) 0x561c7be48810: {type = OFPACT_SET_FIELD, raw = 255 '', len = 32} + (struct ofpact *) 0x561c7be48830: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24} + (struct ofpact *) 0x561c7be48848: {type = OFPACT_RESUBMIT, raw = 38 '&', len = 16} + """ + def __init__(self): + super(CmdDumpOfpacts, self).__init__("ovs_dump_ofpacts", + gdb.COMMAND_DATA) + + def invoke(self, arg, from_tty): + arg_list = gdb.string_to_argv(arg) + + if len(arg_list) != 2: + print("usage: ovs_dump_ofpacts ") + return + + ofpacts = gdb.parse_and_eval(arg_list[0]).cast( + gdb.lookup_type('struct ofpact').pointer()) + + length = gdb.parse_and_eval(arg_list[1]) + + for node in ForEachOFPACTS(ofpacts, length): + print("(struct ofpact *) {}: {}".format(node, node.dereference())) + + # # Initialize all GDB commands # @@ -1245,6 +1318,7 @@ CmdDumpDpNetdevPorts() CmdDumpDpProvider() CmdDumpNetdev() CmdDumpNetdevProvider() +CmdDumpOfpacts() CmdDumpOvsList() CmdDumpSimap() CmdDumpSmap() -- GitLab From 542dbc5a185407e4b3aef000f043fa2c653ccc12 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Tue, 21 Apr 2020 09:24:38 +0100 Subject: [PATCH 105/432] vlog: Fast path in vlog. Avoid grabbing any mutexes if the log levels specify that no logging is to take place. Signed-off-by: Anton Ivanov Signed-off-by: William Tu --- lib/vlog.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/lib/vlog.c b/lib/vlog.c index ee6b0d3a6..533f93755 100644 --- a/lib/vlog.c +++ b/lib/vlog.c @@ -1103,10 +1103,17 @@ vlog_valist(const struct vlog_module *module, enum vlog_level level, { bool log_to_console = module->levels[VLF_CONSOLE] >= level; bool log_to_syslog = module->levels[VLF_SYSLOG] >= level; - bool log_to_file; + bool log_to_file = module->levels[VLF_FILE] >= level; + + if (!(log_to_console || log_to_syslog || log_to_file)) { + /* fast path - all logging levels specify no logging, no + * need to hog the log mutex + */ + return; + } ovs_mutex_lock(&log_file_mutex); - log_to_file = module->levels[VLF_FILE] >= level && log_fd >= 0; + log_to_file &= (log_fd >= 0); ovs_mutex_unlock(&log_file_mutex); if (log_to_console || log_to_syslog || log_to_file) { int save_errno = errno; -- GitLab From 3738d9298fe788409f732f8e111ffcd204070da3 Mon Sep 17 00:00:00 2001 From: Anton Ivanov Date: Tue, 21 Apr 2020 09:23:57 +0100 Subject: [PATCH 106/432] ovsdb: Switch ovsdb log fsync to data only. We do not check metadata - mtime, atime, anywhere, so we do not need to update it every time we sync the log. if the system supports it, the log update should be data only Signed-off-by: Anton Ivanov Signed-off-by: William Tu --- ovsdb/log.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ovsdb/log.c b/ovsdb/log.c index c82a79c9f..41af77679 100644 --- a/ovsdb/log.c +++ b/ovsdb/log.c @@ -658,7 +658,16 @@ ovsdb_log_write_and_free(struct ovsdb_log *log, struct json *json) struct ovsdb_error * ovsdb_log_commit_block(struct ovsdb_log *file) { +#if (_POSIX_C_SOURCE >= 199309L || _XOPEN_SOURCE >= 500) + /* we do not check metadata - mtime, atime, anywhere, so we + * do not need to update it every time we sync the log. + * if the system supports it, the log update should be + * data only + */ + if (file->stream && fdatasync(fileno(file->stream))) { +#else if (file->stream && fsync(fileno(file->stream))) { +#endif return ovsdb_io_error(errno, "%s: fsync failed", file->display_name); } return NULL; -- GitLab From ffc0c87393012402e4647c07cc848f68aee7faf9 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 21 Apr 2020 15:09:05 -0700 Subject: [PATCH 107/432] ovsdb: Remove duplicated function defintions ovsdb_function_from_string() and ovsdb_function_to_string() are defined both in ovsdb/condition.c and lib/ovsdb-condidtion.c with the same function definition. Remove the one in ovsdb/condition.c to avoid duplication. This also resolves the following bazel building error. ./libopenvswitch.lo(ovsdb-condition.pic.o): In function `ovsdb_function_from_string': /lib/ovsdb-condition.c:24: multiple definition of `ovsdb_function_from_string' ./libovsdb.a(condition.pic.o):/proc/self/cwd/external/openvswitch_repo/ovsdb/condition.c:34: first defined here ./libopenvswitch.lo(ovsdb-condition.pic.o): In function `ovsdb_function_from_string': ./lib/ovsdb-condition.c:24: multiple definition of `ovsdb_function_to_string' ./libovsdb.a(condition.pic.o):/proc/self/cwd/external/openvswitch_repo/ovsdb/condition.c:335 Reported-by: Harold Lim Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- ovsdb/condition.c | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/ovsdb/condition.c b/ovsdb/condition.c index 692c09328..388dd54a1 100644 --- a/ovsdb/condition.c +++ b/ovsdb/condition.c @@ -29,33 +29,6 @@ #include "table.h" #include "util.h" -struct ovsdb_error * -ovsdb_function_from_string(const char *name, enum ovsdb_function *function) -{ -#define OVSDB_FUNCTION(ENUM, NAME) \ - if (!strcmp(name, NAME)) { \ - *function = ENUM; \ - return NULL; \ - } - OVSDB_FUNCTIONS; -#undef OVSDB_FUNCTION - - return ovsdb_syntax_error(NULL, "unknown function", - "No function named %s.", name); -} - -const char * -ovsdb_function_to_string(enum ovsdb_function function) -{ - switch (function) { -#define OVSDB_FUNCTION(ENUM, NAME) case ENUM: return NAME; - OVSDB_FUNCTIONS; -#undef OVSDB_FUNCTION - } - - return NULL; -} - static struct ovsdb_error * ovsdb_clause_from_json(const struct ovsdb_table_schema *ts, const struct json *json, -- GitLab From 5bfc519fee499b5b8b1eeb2d26c1baa6a5f42d5b Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 14 Apr 2020 06:22:55 -0700 Subject: [PATCH 108/432] netdev-afxdp: Add interrupt mode netdev class. The patch adds a new netdev class 'afxdp-nonpmd' to enable afxdp interrupt mode. This is similar to 'type=afxdp', except that the is_pmd field is set to false. As a result, the packet processing is handled by main thread, not pmd thread. This avoids burning the CPU to always 100% when there is no traffic. Signed-off-by: William Tu Signed-off-by: Ilya Maximets --- NEWS | 3 +++ lib/netdev-linux.c | 37 +++++++++++++++++++++++-------------- lib/netdev-provider.h | 1 + lib/netdev.c | 1 + tests/system-afxdp.at | 23 +++++++++++++++++++++++ 5 files changed, 51 insertions(+), 14 deletions(-) diff --git a/NEWS b/NEWS index 70bd17584..6db2d993f 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,9 @@ Post-v2.13.0 * Deprecated DPDK ring ports (dpdkr) are no longer supported. - Linux datapath: * Support for kernel versions up to 5.5.x. + - AF_XDP: + * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles + by enabling interrupt mode. v2.13.0 - 14 Feb 2020 diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index ff045cb12..1d7ed0145 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -3599,24 +3599,33 @@ const struct netdev_class netdev_internal_class = { }; #ifdef HAVE_AF_XDP +#define NETDEV_AFXDP_CLASS_COMMON \ + .construct = netdev_afxdp_construct, \ + .destruct = netdev_afxdp_destruct, \ + .get_stats = netdev_afxdp_get_stats, \ + .get_custom_stats = netdev_afxdp_get_custom_stats, \ + .get_status = netdev_linux_get_status, \ + .set_config = netdev_afxdp_set_config, \ + .get_config = netdev_afxdp_get_config, \ + .reconfigure = netdev_afxdp_reconfigure, \ + .get_numa_id = netdev_linux_get_numa_id, \ + .send = netdev_afxdp_batch_send, \ + .rxq_construct = netdev_afxdp_rxq_construct, \ + .rxq_destruct = netdev_afxdp_rxq_destruct, \ + .rxq_recv = netdev_afxdp_rxq_recv + const struct netdev_class netdev_afxdp_class = { NETDEV_LINUX_CLASS_COMMON, + NETDEV_AFXDP_CLASS_COMMON, .type = "afxdp", .is_pmd = true, - .init = netdev_afxdp_init, - .construct = netdev_afxdp_construct, - .destruct = netdev_afxdp_destruct, - .get_stats = netdev_afxdp_get_stats, - .get_custom_stats = netdev_afxdp_get_custom_stats, - .get_status = netdev_linux_get_status, - .set_config = netdev_afxdp_set_config, - .get_config = netdev_afxdp_get_config, - .reconfigure = netdev_afxdp_reconfigure, - .get_numa_id = netdev_linux_get_numa_id, - .send = netdev_afxdp_batch_send, - .rxq_construct = netdev_afxdp_rxq_construct, - .rxq_destruct = netdev_afxdp_rxq_destruct, - .rxq_recv = netdev_afxdp_rxq_recv, +}; + +const struct netdev_class netdev_afxdp_nonpmd_class = { + NETDEV_LINUX_CLASS_COMMON, + NETDEV_AFXDP_CLASS_COMMON, + .type = "afxdp-nonpmd", + .is_pmd = false, }; #endif diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 6f509424b..d9503adb0 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -850,6 +850,7 @@ extern const struct netdev_class netdev_tap_class; #ifdef HAVE_AF_XDP extern const struct netdev_class netdev_afxdp_class; +extern const struct netdev_class netdev_afxdp_nonpmd_class; #endif #ifdef __cplusplus } diff --git a/lib/netdev.c b/lib/netdev.c index 8c44eee8e..90962eec6 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -154,6 +154,7 @@ netdev_initialize(void) netdev_register_flow_api_provider(&netdev_offload_tc); #ifdef HAVE_AF_XDP netdev_register_provider(&netdev_afxdp_class); + netdev_register_provider(&netdev_afxdp_nonpmd_class); #endif #endif #if defined(__FreeBSD__) || defined(__NetBSD__) diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at index e4451624f..0d09906fb 100644 --- a/tests/system-afxdp.at +++ b/tests/system-afxdp.at @@ -22,3 +22,26 @@ AT_CHECK([grep "ovs-p0: could not set configuration" ovs-vswitchd.log | wc -l], OVS_TRAFFIC_VSWITCHD_STOP(["/ovs-p0: Too big 'n_rxq'/d /ovs-p0: could not set configuration/d"]) AT_CLEANUP + + +AT_SETUP([AF_XDP - ping between pmd and non-pmd ports]) +AT_KEYWORDS([afxdp nonpmd]) +OVS_TRAFFIC_VSWITCHD_START() + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) + +ADD_NAMESPACES(at_ns0, at_ns1) +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_CHECK([ovs-vsctl del-port ovs-p0]) +AT_CHECK([ovs-vsctl add-port br0 ovs-p0 -- \ + set interface ovs-p0 type=afxdp-nonpmd options:n_rxq=1], + [0], [], [stderr]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP -- GitLab From 8c2b63b7804eb348550f63d97801cc9a002a94c3 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 27 Apr 2020 08:45:19 -0700 Subject: [PATCH 109/432] docs: Fix GTP-U release version. GTP-U support should be at OVS-2.14. Signed-off-by: William Tu Acked-by: Ilya Maximets --- Documentation/faq/releases.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index b3507bd1c..f170ebd3f 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -131,7 +131,7 @@ Q: Are all features available with all datapaths? Tunnel - Geneve-IPv6 4.4 2.6 2.6 NO Tunnel - ERSPAN 4.18 2.10 2.10 NO Tunnel - ERSPAN-IPv6 4.18 2.10 2.10 NO - Tunnel - GTP-U NO NO 2.13 NO + Tunnel - GTP-U NO NO 2.14 NO QoS - Policing YES 1.1 2.6 NO QoS - Shaping YES 1.1 NO NO sFlow YES 1.0 1.0 NO -- GitLab From d93c3111ccbf738c4b463d5c0892e981851d55ad Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 27 Apr 2020 08:42:29 -0700 Subject: [PATCH 110/432] conntrack: Fix icmp conntrack state. ICMP conntrack state should be ICMPS_REPLY after seeing both side of ICMP traffic. Signed-off-by: William Tu Acked-by: Yi-Hung Wei --- lib/conntrack-icmp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c index 63246f012..6cbf9656d 100644 --- a/lib/conntrack-icmp.c +++ b/lib/conntrack-icmp.c @@ -50,9 +50,12 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_, struct dp_packet *pkt OVS_UNUSED, bool reply, long long now) { struct conn_icmp *conn = conn_icmp_cast(conn_); - conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST; - conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now); + if (reply && conn->state == ICMPS_FIRST) { + conn->state = ICMPS_REPLY; + } + + conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now); return CT_UPDATE_VALID; } -- GitLab From 5e76d41d52b0a86e1e351abc6b08920993eafc61 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 27 Apr 2020 08:49:13 -0700 Subject: [PATCH 111/432] AUTHORS: Add Anton Ivanov. Signed-off-by: William Tu Acked-by: Numan Siddique --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 61a3f6117..5d83d309c 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -60,6 +60,7 @@ Andy Zhou azhou@ovn.org Ankur Sharma ankursharma@vmware.com Anoob Soman anoob.soman@citrix.com Ansis Atteka aatteka@vmware.com +Anton Ivanov anton.ivanov@cambridgegreys.com Antonio Fischetti antonio.fischetti@intel.com Anupam Chanda Ariel Tubaltsev atubaltsev@vmware.com -- GitLab From 81f71381ff66b059aa9d19000ceded33139a5eca Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 14 Apr 2020 13:33:28 -0700 Subject: [PATCH 112/432] ofp-actions: Add delete field action This patch adds a new OpenFlow action, delete field, to delete a field in packets. Currently, only the tun_metadata fields are supported. One use case to add this action is to support multiple versions of geneve tunnel metadatas to be exchanged among different versions of networks. For example, we may introduce tun_metadata2 to replace old tun_metadata1, but still want to provide backward compatibility to the older release. In this case, in the new OpenFlow pipeline, we would like to support the case to receive a packet with tun_metadata1, do some processing. And if the packet is going to a switch in the newer release, we would like to delete the value in tun_metadata1 and set a value into tun_metadata2. Currently, ovs does not provide an action to remove a value in tun_metadata if the value is present. This patch fulfills the gap by adding the delete_field action. For example, the OpenFlow syntax to delete tun_metadata1 is: actions=delete_field:tun_metadata1 Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu Acked-by: William Tu --- NEWS | 1 + include/openvswitch/ofp-actions.h | 13 ++++- lib/nx-match.c | 20 ++++++- lib/nx-match.h | 4 +- lib/ofp-actions.c | 90 ++++++++++++++++++++++++++++++- lib/ovs-actions.xml | 16 ++++++ lib/tun-metadata.c | 17 ++++++ lib/tun-metadata.h | 1 + ofproto/ofproto-dpif-xlate.c | 24 ++++++++- tests/ofp-actions.at | 3 ++ tests/tunnel.at | 37 +++++++++++++ 11 files changed, 221 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 6db2d993f..b61a60272 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ Post-v2.13.0 * The OpenFlow ofp_desc/serial_num may now be configured by setting the value of other-config:dp-sn in the Bridge table. * Added support to watch CONTROLLER port status in fast failover group. + * New action "delete_field". - DPDK: * Deprecated DPDK pdump packet capture support removed. * Deprecated DPDK ring ports (dpdkr) are no longer supported. diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h index c8948e0d6..226e86d0b 100644 --- a/include/openvswitch/ofp-actions.h +++ b/include/openvswitch/ofp-actions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017, 2019 Nicira, Inc. + * Copyright (c) 2012-2017, 2019-2020 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -94,6 +94,7 @@ struct vl_mff_map; OFPACT(PUSH_MPLS, ofpact_push_mpls, ofpact, "push_mpls") \ OFPACT(POP_MPLS, ofpact_pop_mpls, ofpact, "pop_mpls") \ OFPACT(DEC_NSH_TTL, ofpact_null, ofpact, "dec_nsh_ttl") \ + OFPACT(DELETE_FIELD, ofpact_delete_field, ofpact, "delete_field") \ \ /* Generic encap & decap */ \ OFPACT(ENCAP, ofpact_encap, props, "encap") \ @@ -576,6 +577,16 @@ struct ofpact_pop_mpls { ); }; +/* OFPACT_DELETE_FIELD. + * + * Used for NXAST_DELETE_FIELD. */ +struct ofpact_delete_field { + OFPACT_PADDED_MEMBERS( + struct ofpact ofpact; + const struct mf_field *field; + ); +}; + /* OFPACT_SET_TUNNEL. * * Used for NXAST_SET_TUNNEL, NXAST_SET_TUNNEL64. */ diff --git a/lib/nx-match.c b/lib/nx-match.c index 058816c7b..3ffd7d9d7 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc. + * Copyright (c) 2010-2017, 2020 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1994,6 +1994,24 @@ nxm_execute_stack_pop(const struct ofpact_stack *pop, } } +/* Parses a field from '*s' into '*field'. If successful, stores the + * reference to the field in '*field', and returns NULL. On failure, + * returns a malloc()'ed error message. + */ +char * OVS_WARN_UNUSED_RESULT +mf_parse_field(const struct mf_field **field, const char *s) +{ + const struct nxm_field *f; + int s_len = strlen(s); + + f = nxm_field_by_name(s, s_len); + (*field) = f ? mf_from_id(f->id) : mf_from_name_len(s, s_len); + if (!*field) { + return xasprintf("unknown field `%s'", s); + } + return NULL; +} + /* Formats 'sf' into 's' in a format normally acceptable to * mf_parse_subfield(). (It won't be acceptable if sf->field is NULL or if * sf->field has no NXM name.) */ diff --git a/lib/nx-match.h b/lib/nx-match.h index 9be40a981..3120ac0a0 100644 --- a/lib/nx-match.h +++ b/lib/nx-match.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2017 Nicira, Inc. + * Copyright (c) 2010-2017, 2020 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,8 @@ struct vl_mff_map; * See include/openflow/nicira-ext.h for NXM specification. */ +char * mf_parse_field(const struct mf_field **field, const char *s) + OVS_WARN_UNUSED_RESULT; void mf_format_subfield(const struct mf_subfield *, struct ds *); char *mf_parse_subfield__(struct mf_subfield *sf, const char **s) OVS_WARN_UNUSED_RESULT; diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index a94d1a7ca..be08a53fd 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008-2017, 2019 Nicira, Inc. + * Copyright (c) 2008-2017, 2019-2020 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -361,6 +361,9 @@ enum ofp_raw_action_type { /* NX1.0+(49): struct nx_action_check_pkt_larger, ... VLMFF */ NXAST_RAW_CHECK_PKT_LARGER, + /* NX1.0+(50): struct nx_action_delete_field. VLMFF */ + NXAST_RAW_DELETE_FIELD, + /* ## ------------------ ## */ /* ## Debugging actions. ## */ /* ## ------------------ ## */ @@ -500,6 +503,7 @@ ofpact_next_flattened(const struct ofpact *ofpact) case OFPACT_DECAP: case OFPACT_DEC_NSH_TTL: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: return ofpact_next(ofpact); case OFPACT_CLONE: @@ -4140,6 +4144,87 @@ check_SET_TUNNEL(const struct ofpact_tunnel *a OVS_UNUSED, return 0; } +/* Delete field action. */ + +/* Action structure for DELETE_FIELD */ +struct nx_action_delete_field { + ovs_be16 type; /* OFPAT_VENDOR */ + ovs_be16 len; /* Length is 24. */ + ovs_be32 vendor; /* NX_VENDOR_ID. */ + ovs_be16 subtype; /* NXAST_DELETE_FIELD. */ + /* Followed by: + * - OXM/NXM header for field to delete (4 or 8 bytes). + * - Enough 0-bytes to pad out the action to 24 bytes. */ + uint8_t pad[14]; +}; +OFP_ASSERT(sizeof(struct nx_action_delete_field ) == 24); + +static enum ofperr +decode_NXAST_RAW_DELETE_FIELD(const struct nx_action_delete_field *nadf, + enum ofp_version ofp_version OVS_UNUSED, + const struct vl_mff_map *vl_mff_map, + uint64_t *tlv_bitmap, struct ofpbuf *out) +{ + struct ofpact_delete_field *delete_field; + enum ofperr err; + + delete_field = ofpact_put_DELETE_FIELD(out); + delete_field->ofpact.raw = NXAST_RAW_DELETE_FIELD; + + struct ofpbuf b = ofpbuf_const_initializer(nadf, ntohs(nadf->len)); + ofpbuf_pull(&b, OBJECT_OFFSETOF(nadf, pad)); + + err = mf_vl_mff_nx_pull_header(&b, vl_mff_map, &delete_field->field, + NULL, tlv_bitmap); + if (err) { + return err; + } + + return 0; +} + +static void +encode_DELETE_FIELD(const struct ofpact_delete_field *delete_field, + enum ofp_version ofp_version OVS_UNUSED, + struct ofpbuf *out) +{ + struct nx_action_delete_field *nadf = put_NXAST_DELETE_FIELD(out); + size_t size = out->size; + + out->size = size - sizeof nadf->pad; + nx_put_mff_header(out, delete_field->field, 0, false); + out->size = size; +} + +static char * OVS_WARN_UNUSED_RESULT +parse_DELETE_FIELD(char *arg, const struct ofpact_parse_params *pp) +{ + struct ofpact_delete_field *delete_field; + + delete_field = ofpact_put_DELETE_FIELD(pp->ofpacts); + return mf_parse_field(&delete_field->field, arg); +} + +static void +format_DELETE_FIELD(const struct ofpact_delete_field *odf, + const struct ofpact_format_params *fp) +{ + ds_put_format(fp->s, "%sdelete_field:%s", colors.param, + colors.end); + ds_put_format(fp->s, "%s", odf->field->name); +} + +static enum ofperr +check_DELETE_FIELD(const struct ofpact_delete_field *odf, + struct ofpact_check_params *cp OVS_UNUSED) +{ + if (odf->field->id < MFF_TUN_METADATA0 || + odf->field->id > MFF_TUN_METADATA63) { + return OFPERR_OFPBAC_BAD_ARGUMENT; + } + return 0; +} + /* Set queue action. */ static enum ofperr @@ -7870,6 +7955,7 @@ action_set_classify(const struct ofpact *a) case OFPACT_DEBUG_RECIRC: case OFPACT_DEBUG_SLOW: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: return ACTION_SLOT_INVALID; default: @@ -8073,6 +8159,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type, case OFPACT_DECAP: case OFPACT_DEC_NSH_TTL: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: default: return OVSINST_OFPIT11_APPLY_ACTIONS; } @@ -8984,6 +9071,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port) case OFPACT_DECAP: case OFPACT_DEC_NSH_TTL: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: default: return false; } diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml index ab8e08b84..060a079b4 100644 --- a/lib/ovs-actions.xml +++ b/lib/ovs-actions.xml @@ -1552,6 +1552,22 @@ for i in [1,n_slaves]: This action was added in Open vSwitch 2.11.90.

    + + +

    The delete_field action

    + delete_field:field + +

    + The delete_field action deletes a field in the syntax + described under ``Field Specifications'' above. Currently, only + the tun_metadta fields are supported. +

    + +

    + This action was added in Open vSwitch 2.13.90. +

    +
    + diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c index f8a0e1952..c0b0ae044 100644 --- a/lib/tun-metadata.c +++ b/lib/tun-metadata.c @@ -261,6 +261,23 @@ tun_metadata_write(struct flow_tnl *tnl, value->tun_metadata + mf->n_bytes - loc->len, loc, idx); } +/* Deletes field 'mf' in 'tnl' (in non-UDPIF format). + * 'mf' must be an MFF_TUN_METADATA* field. + */ +void +tun_metadata_delete(struct flow_tnl *tnl, const struct mf_field *mf) +{ + unsigned int idx; + + if (tnl->flags & FLOW_TNL_F_UDPIF) { + return; + } + + idx = mf->id - MFF_TUN_METADATA0; + ovs_assert(idx < TUN_METADATA_NUM_OPTS); + ULLONG_SET0(tnl->metadata.present.map, idx); +} + static const struct tun_metadata_loc * metadata_loc_from_match(const struct tun_table *map, struct match *match, const char *name, unsigned int idx, diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h index 7dad9504b..67dedae25 100644 --- a/lib/tun-metadata.h +++ b/lib/tun-metadata.h @@ -47,6 +47,7 @@ void tun_metadata_read(const struct flow_tnl *, const struct mf_field *, union mf_value *); void tun_metadata_write(struct flow_tnl *, const struct mf_field *, const union mf_value *); +void tun_metadata_delete(struct flow_tnl *, const struct mf_field *); void tun_metadata_set_match(const struct mf_field *, const union mf_value *value, const union mf_value *mask, struct match *, diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index abce976c6..80fba84cb 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -1,4 +1,4 @@ -/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019 Nicira, Inc. +/* Copyright (c) 2009-2017, 2019-2020 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -5159,6 +5159,21 @@ compose_dec_mpls_ttl_action(struct xlate_ctx *ctx) return true; } +static void +xlate_delete_field(struct xlate_ctx *ctx, + struct flow *flow, + const struct ofpact_delete_field *odf) +{ + struct ds s = DS_EMPTY_INITIALIZER; + + /* Currently, only tun_metadata is allowed for delete_field action. */ + tun_metadata_delete(&flow->tunnel, odf->field); + + ds_put_format(&s, "delete %s", odf->field->name); + xlate_report(ctx, OFT_DETAIL, "%s", ds_cstr(&s)); + ds_destroy(&s); +} + /* Emits an action that outputs to 'port', within 'ctx'. * * 'controller_len' affects only packets sent to an OpenFlow controller. It @@ -5684,6 +5699,7 @@ reversible_actions(const struct ofpact *ofpacts, size_t ofpacts_len) case OFPACT_WRITE_ACTIONS: case OFPACT_WRITE_METADATA: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: break; case OFPACT_CT: @@ -5993,6 +6009,7 @@ freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end, case OFPACT_CT_CLEAR: case OFPACT_NAT: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: /* These may not generate PACKET INs. */ break; @@ -6653,6 +6670,7 @@ recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx) case OFPACT_WRITE_METADATA: case OFPACT_GOTO_TABLE: case OFPACT_CHECK_PKT_LARGER: + case OFPACT_DELETE_FIELD: default: break; } @@ -7030,6 +7048,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a)); break; + case OFPACT_DELETE_FIELD: + xlate_delete_field(ctx, flow, ofpact_get_DELETE_FIELD(a)); + break; + case OFPACT_CLEAR_ACTIONS: xlate_report_action_set(ctx, "was"); ofpbuf_clear(&ctx->action_set); diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at index 4893280a9..28b2099a0 100644 --- a/tests/ofp-actions.at +++ b/tests/ofp-actions.at @@ -316,6 +316,9 @@ ffff 0018 00002320 0031 05dc 000000010004000000000000 # actions=check_pkt_larger(1000)->NXM_NX_XXREG1[4] ffff 0018 00002320 0031 03e8 00040001e010000000000000 +# actions=delete_field:tun_metadata10 +ffff 0018 00002320 0032 00 01 64 7c 00 00 00 00 000000000000 + ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout diff --git a/tests/tunnel.at b/tests/tunnel.at index d65bf4412..b3764aed8 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -890,6 +890,43 @@ Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([tunnel - Delete Geneve option]) +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \ + options:remote_ip=1.1.1.1 ofport_request=1 \ + -- add-port br0 p2 -- set Interface p2 type=dummy \ + ofport_request=2 ofport_request=2]) +OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP + +AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0,len=4}->tun_metadata0,{class=0xffff,type=1,len=4}->tun_metadata1,{class=0xffff,type=2,len=4}->tun_metadata3"]) + +AT_DATA([flows.txt], [dnl +table=0,tun_metadata0=0x11112222,actions=set_field:0x55556666->tun_metadata1,resubmit(,1) +table=0,tun_metadata0=0x33334444,actions=delete_field:tun_metadata0,set_field:0x77778888->tun_metadata1,resubmit(,1) +table=0,tun_metadata0=0x88889999,actions=delete_field:tun_metadata3,resubmit(,1) +table=1,actions=IN_PORT +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x11112222}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no)'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x11112222,tun_metadata1=NP,in_port=1,nw_ecn=0,nw_frag=no +Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0,len=4,0x11112222}{class=0xffff,type=0x1,len=4,0x55556666}),flags(df))),6081 +]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x33334444}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no)'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x33334444,tun_metadata1=NP,in_port=1,nw_ecn=0,nw_frag=no +Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0x1,len=4,0x77778888}),flags(df))),6081 +]) + +AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x88889999}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no)'], [0], [stdout]) +AT_CHECK([tail -2 stdout], [0], + [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x88889999,in_port=1,nw_ecn=0,nw_frag=no +Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0,len=4,0x88889999}),flags(df))),6081 +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel - concomitant IPv6 and IPv4 tunnels]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \ options:remote_ip=1.1.1.1 ofport_request=1 \ -- GitLab From 55dac25954ba9edf6b58d42977d33f586c4dc47c Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 29 Apr 2020 09:14:54 -0700 Subject: [PATCH 113/432] bugtool: Add dump-tlv-map. This helps debugging the tlv map issues. Signed-off-by: William Tu Acked-by: Yi-Hung Wei --- utilities/bugtool/plugins/network-status/openvswitch.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml b/utilities/bugtool/plugins/network-status/openvswitch.xml index 72aa44930..e6fa4fd15 100644 --- a/utilities/bugtool/plugins/network-status/openvswitch.xml +++ b/utilities/bugtool/plugins/network-status/openvswitch.xml @@ -39,6 +39,7 @@ /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-ports" /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-groups" /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-group-stats" + /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-tlv-map" /usr/share/openvswitch/scripts/ovs-bugtool-get-dpdk-nic-numa ip -s -s link show /usr/share/openvswitch/scripts/ovs-bugtool-get-port-stats -- GitLab From f5a36db10b1ffe243a64f650e24160eb985c8eac Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 29 Apr 2020 10:30:26 -0700 Subject: [PATCH 114/432] ovs-bugtool: Add ethtool -l for combined channel. Users of netdev-afxdp has to setup the combined channel on physical NIC. This helps debugging related issues. Example output: $ ethtool -l enp3s0f0 Channel parameters for enp3s0f0: Pre-set maximums: RX: 0 TX: 0 Other: 1 Combined: 63 Current hardware settings: RX: 0 TX: 0 Other: 1 Combined: 1 Some previous discussion: https://mail.openvswitch.org/pipermail/ovs-dev/2020-January/366631.html Signed-off-by: William Tu Acked-by: Yi-Hung Wei --- utilities/bugtool/ovs-bugtool.in | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in index 47f3c4629..1a5170d8c 100755 --- a/utilities/bugtool/ovs-bugtool.in +++ b/utilities/bugtool/ovs-bugtool.in @@ -628,6 +628,7 @@ exclude those logs from the archive. cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-k', p]) cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-i', p]) cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-c', p]) + cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-l', p]) if int(t) == 1: cmd_output(CAP_NETWORK_INFO, [TC, '-s', '-d', 'class', 'show', 'dev', p]) -- GitLab From b9f825a5442a979b28a5ec18640129058d334e85 Mon Sep 17 00:00:00 2001 From: Jiang Lidong Date: Thu, 23 Apr 2020 05:35:14 +0000 Subject: [PATCH 115/432] netdev-linux: remove sum of vport stats and kernel netdev stats. When using kernel veth as OVS interface, doubled drop counter value is shown when veth drops packets due to traffic overrun. In netdev_linux_get_stats, it reads both vport stats and kernel netdev stats, in case vport stats retrieve failure. If both of them success, error counters are added to include errors from different layers. But implementation of ovs_vport_get_stats in kernel data path has included kernel netdev stats by calling dev_get_stats. When drop or other error counters is not zero, its value is doubled by netdev_linux_get_stats. In this change, adding kernel netdev stats into vport stats is removed, since vport stats includes all information of kernel netdev stats. Signed-off-by: Jiang Lidong Signed-off-by: William Tu --- lib/netdev-linux.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 1d7ed0145..40d0cc110 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -2208,18 +2208,6 @@ netdev_linux_get_stats(const struct netdev *netdev_, /* stats not available from OVS then use netdev stats. */ *stats = dev_stats; } else { - /* Use kernel netdev's packet and byte counts since vport's counters - * do not reflect packet counts on the wire when GSO, TSO or GRO are - * enabled. */ - stats->rx_packets = dev_stats.rx_packets; - stats->rx_bytes = dev_stats.rx_bytes; - stats->tx_packets = dev_stats.tx_packets; - stats->tx_bytes = dev_stats.tx_bytes; - - stats->rx_errors += dev_stats.rx_errors; - stats->tx_errors += dev_stats.tx_errors; - stats->rx_dropped += dev_stats.rx_dropped; - stats->tx_dropped += dev_stats.tx_dropped; stats->multicast += dev_stats.multicast; stats->collisions += dev_stats.collisions; stats->rx_length_errors += dev_stats.rx_length_errors; -- GitLab From 2fcd7c077c006cbefeb5fda18818ae6d94c600b1 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 30 Apr 2020 07:40:36 -0700 Subject: [PATCH 116/432] AUTHORS: Add Jiang Lidong. Signed-off-by: William Tu --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 5d83d309c..3d805412d 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -194,6 +194,7 @@ Jeremy Stribling Jeroen van Bemmel jvb127@gmail.com Jesse Gross jesse@kernel.org Jian Li lijian@ooclab.com +Jiang Lidong jianglidong3@jd.com Jianbo Liu jianbol@mellanox.com Jing Ai jinga@google.com Jiri Benc jbenc@redhat.com -- GitLab From 5519e384f6a17f564fef4c5eb39e471e16c77235 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 29 Apr 2020 14:25:50 -0700 Subject: [PATCH 117/432] compat: Fix ipv6_dst_lookup build error The geneve/vxlan compat code base invokes ipv6_dst_lookup() which is recently replaced by ipv6_dst_lookup_flow() in the stable kernel tree. This causes travis build failure: * https://travis-ci.org/github/openvswitch/ovs/builds/681084038 This patch updates the backport logic to invoke the right function. Related patch in git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git b9f3e457098e ("net: ipv6_stub: use ip6_dst_lookup_flow instead of ip6_dst_lookup") Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- acinclude.m4 | 3 +++ datapath/linux/compat/geneve.c | 11 +++++++---- datapath/linux/compat/vxlan.c | 14 ++++++++------ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 0e90c3332..dabbffd01 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -589,7 +589,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup.*net], [OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_NET])]) + OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow.*net], + [OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_FLOW_NET])]) OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_stub]) + OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow]) OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [ERR_CAST]) OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [IS_ERR_OR_NULL]) diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c index 1551a3721..7bfc6d882 100644 --- a/datapath/linux/compat/geneve.c +++ b/datapath/linux/compat/geneve.c @@ -962,14 +962,17 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, return dst; } -#ifdef HAVE_IPV6_DST_LOOKUP_NET +#if defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) + if (ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, &dst, + fl6)) { +#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW) + if (ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, &dst, fl6)) { +#elif defined(HAVE_IPV6_DST_LOOKUP_NET) if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) { -#else -#ifdef HAVE_IPV6_STUB +#elif defined(HAVE_IPV6_STUB) if (ipv6_stub->ipv6_dst_lookup(gs6->sock->sk, &dst, fl6)) { #else if (ip6_dst_lookup(gs6->sock->sk, &dst, fl6)) { -#endif #endif netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr); return ERR_PTR(-ENETUNREACH); diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index f8f667e97..b334870b7 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -990,17 +990,19 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.fl6_dport = dport; fl6.fl6_sport = sport; -#ifdef HAVE_IPV6_DST_LOOKUP_NET - err = ipv6_stub->ipv6_dst_lookup(vxlan->net, - sock6->sock->sk, +#if defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) + err = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, + &ndst, &fl6); +#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW) + err = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &ndst, &fl6); +#elif defined(HAVE_IPV6_DST_LOOKUP_NET) + err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk, &ndst, &fl6); -#else -#ifdef HAVE_IPV6_STUB +#elif defined(HAVE_IPV6_STUB) err = ipv6_stub->ipv6_dst_lookup(vxlan->vn6_sock->sock->sk, &ndst, &fl6); #else err = ip6_dst_lookup(vxlan->vn6_sock->sock->sk, &ndst, &fl6); -#endif #endif if (err < 0) return ERR_PTR(err); -- GitLab From 2078901a4c142d25d1fae8710f4d38938385c954 Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 29 Apr 2020 12:25:11 -0700 Subject: [PATCH 118/432] userspace: Add conntrack timeout policy support. Commit 1f1613183733 ("ct-dpif, dpif-netlink: Add conntrack timeout policy support") adds conntrack timeout policy for kernel datapath. This patch enables support for the userspace datapath. I tested using the 'make check-system-userspace' which checks the timeout policies for ICMP and UDP cases. Signed-off-by: William Tu Acked-by: Yi-Hung Wei --- Documentation/faq/releases.rst | 2 +- NEWS | 2 + lib/automake.mk | 2 + lib/conntrack-icmp.c | 6 +- lib/conntrack-other.c | 4 +- lib/conntrack-private.h | 70 ++----- lib/conntrack-tcp.c | 5 +- lib/conntrack-tp.c | 308 +++++++++++++++++++++++++++++++ lib/conntrack-tp.h | 30 +++ lib/conntrack.c | 37 ++-- lib/conntrack.h | 8 +- lib/ct-dpif.h | 2 + lib/dpif-netdev.c | 75 +++++++- ofproto/ofproto-dpif.c | 3 +- tests/system-traffic.at | 29 ++- tests/system-userspace-macros.at | 6 +- tests/test-conntrack.c | 6 +- 17 files changed, 500 insertions(+), 95 deletions(-) create mode 100644 lib/conntrack-tp.c create mode 100644 lib/conntrack-tp.h diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index f170ebd3f..dbc1706de 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -118,7 +118,7 @@ Q: Are all features available with all datapaths? ========================== ============== ============== ========= ======= Connection tracking 4.3 2.5 2.6 YES Conntrack Fragment Reass. 4.3 2.6 2.12 YES - Conntrack Timeout Policies 5.2 2.12 NO NO + Conntrack Timeout Policies 5.2 2.12 2.14 NO Conntrack Zone Limit 4.18 2.10 2.13 YES Conntrack NAT 4.6 2.6 2.8 YES Tunnel - LISP NO 2.11 NO NO diff --git a/NEWS b/NEWS index b61a60272..3dbd8ec0e 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ Post-v2.13.0 - AF_XDP: * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles by enabling interrupt mode. + - Userspace datapath: + * Add support for conntrack zone-based timeout policy. v2.13.0 - 14 Feb 2020 diff --git a/lib/automake.mk b/lib/automake.mk index 95925b57c..86940ccd2 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -53,6 +53,8 @@ lib_libopenvswitch_la_SOURCES = \ lib/conntrack-icmp.c \ lib/conntrack-private.h \ lib/conntrack-tcp.c \ + lib/conntrack-tp.c \ + lib/conntrack-tp.h \ lib/conntrack-other.c \ lib/conntrack.c \ lib/conntrack.h \ diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c index 6cbf9656d..bf49f9a9f 100644 --- a/lib/conntrack-icmp.c +++ b/lib/conntrack-icmp.c @@ -22,6 +22,7 @@ #include #include "conntrack-private.h" +#include "conntrack-tp.h" #include "dp-packet.h" enum OVS_PACKED_ENUM icmp_state { @@ -79,12 +80,13 @@ icmp6_valid_new(struct dp_packet *pkt) static struct conn * icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED, - long long now) + long long now, uint32_t tp_id) { struct conn_icmp *conn = xzalloc(sizeof *conn); conn->state = ICMPS_FIRST; - conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now); + conn->up.tp_id = tp_id; + conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now); return &conn->up; } diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c index de22ef87c..d3b460185 100644 --- a/lib/conntrack-other.c +++ b/lib/conntrack-other.c @@ -17,6 +17,7 @@ #include #include "conntrack-private.h" +#include "conntrack-tp.h" #include "dp-packet.h" enum OVS_PACKED_ENUM other_state { @@ -69,12 +70,13 @@ other_valid_new(struct dp_packet *pkt OVS_UNUSED) static struct conn * other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED, - long long now) + long long now, uint32_t tp_id) { struct conn_other *conn; conn = xzalloc(sizeof *conn); conn->state = OTHERS_FIRST; + conn->up.tp_id = tp_id; conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now); diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 9a8ca3910..343475301 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -118,6 +118,8 @@ struct conn { /* Immutable data. */ bool alg_related; /* True if alg data connection. */ enum ct_conn_type conn_type; + + uint32_t tp_id; /* Timeout policy ID. */ }; enum ct_update_res { @@ -131,28 +133,20 @@ enum ct_update_res { * are listed here. The name will be prefix by CT_TM_ and the value is in * milliseconds */ #define CT_TIMEOUTS \ - CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \ - CT_TIMEOUT(TCP_OPENING, 30 * 1000) \ - CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \ - CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \ - CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \ - CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \ - CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \ - CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \ - CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \ - CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \ - CT_TIMEOUT(ICMP_REPLY, 30 * 1000) - -/* The smallest of the above values: it is used as an upper bound for the - * interval between two rounds of cleanup of expired entries */ -#define CT_TM_MIN (30 * 1000) - -#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN); - CT_TIMEOUTS -#undef CT_TIMEOUT + CT_TIMEOUT(TCP_FIRST_PACKET) \ + CT_TIMEOUT(TCP_OPENING) \ + CT_TIMEOUT(TCP_ESTABLISHED) \ + CT_TIMEOUT(TCP_CLOSING) \ + CT_TIMEOUT(TCP_FIN_WAIT) \ + CT_TIMEOUT(TCP_CLOSED) \ + CT_TIMEOUT(OTHER_FIRST) \ + CT_TIMEOUT(OTHER_MULTIPLE) \ + CT_TIMEOUT(OTHER_BIDIR) \ + CT_TIMEOUT(ICMP_FIRST) \ + CT_TIMEOUT(ICMP_REPLY) enum ct_timeout { -#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME, +#define CT_TIMEOUT(NAME) CT_TM_##NAME, CT_TIMEOUTS #undef CT_TIMEOUT N_CT_TM @@ -163,6 +157,7 @@ struct conntrack { struct cmap conns OVS_GUARDED; struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED; struct hmap zone_limits OVS_GUARDED; + struct hmap timeout_policies OVS_GUARDED; uint32_t hash_basis; /* Salt for hashing a connection key. */ pthread_t clean_thread; /* Periodically cleans up connection tracker. */ struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */ @@ -197,7 +192,7 @@ extern struct ct_l4_proto ct_proto_icmp6; struct ct_l4_proto { struct conn *(*new_conn)(struct conntrack *ct, struct dp_packet *pkt, - long long now); + long long now, uint32_t tp_id); bool (*valid_new)(struct dp_packet *pkt); enum ct_update_res (*conn_update)(struct conntrack *ct, struct conn *conn, struct dp_packet *pkt, bool reply, @@ -206,39 +201,6 @@ struct ct_l4_proto { struct ct_dpif_protoinfo *); }; -extern long long ct_timeout_val[]; - - -/* ct_lock must be held. */ -static inline void -conn_init_expiration(struct conntrack *ct, struct conn *conn, - enum ct_timeout tm, long long now) -{ - conn->expiration = now + ct_timeout_val[tm]; - ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node); -} - -/* The conn entry lock must be held on entry and exit. */ -static inline void -conn_update_expiration(struct conntrack *ct, struct conn *conn, - enum ct_timeout tm, long long now) - OVS_NO_THREAD_SAFETY_ANALYSIS -{ - ovs_mutex_unlock(&conn->lock); - - ovs_mutex_lock(&ct->ct_lock); - ovs_mutex_lock(&conn->lock); - if (!conn->cleaned) { - conn->expiration = now + ct_timeout_val[tm]; - ovs_list_remove(&conn->exp_node); - ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node); - } - ovs_mutex_unlock(&conn->lock); - ovs_mutex_unlock(&ct->ct_lock); - - ovs_mutex_lock(&conn->lock); -} - static inline uint32_t tcp_payload_length(struct dp_packet *pkt) { diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c index 47261c755..18a2aa7c7 100644 --- a/lib/conntrack-tcp.c +++ b/lib/conntrack-tcp.c @@ -39,6 +39,7 @@ #include #include "conntrack-private.h" +#include "conntrack-tp.h" #include "coverage.h" #include "ct-dpif.h" #include "dp-packet.h" @@ -435,7 +436,8 @@ tcp_valid_new(struct dp_packet *pkt) } static struct conn * -tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now) +tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now, + uint32_t tp_id) { struct conn_tcp* newconn = NULL; struct tcp_header *tcp = dp_packet_l4(pkt); @@ -471,6 +473,7 @@ tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now) src->state = CT_DPIF_TCPS_SYN_SENT; dst->state = CT_DPIF_TCPS_CLOSED; + newconn->up.tp_id = tp_id; conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now); return &newconn->up; diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c new file mode 100644 index 000000000..3a7604c0d --- /dev/null +++ b/lib/conntrack-tp.c @@ -0,0 +1,308 @@ +/* + * Copyright (c) 2020 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include "conntrack-private.h" +#include "conntrack-tp.h" +#include "ct-dpif.h" +#include "openvswitch/vlog.h" + +VLOG_DEFINE_THIS_MODULE(conntrack_tp); +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + +static const char *ct_timeout_str[] = { +#define CT_TIMEOUT(NAME) #NAME, + CT_TIMEOUTS +#undef CT_TIMEOUT +}; + +/* Default timeout policy in seconds. */ +static unsigned int ct_dpif_netdev_tp_def[] = { + [CT_DPIF_TP_ATTR_TCP_SYN_SENT] = 30, + [CT_DPIF_TP_ATTR_TCP_SYN_RECV] = 30, + [CT_DPIF_TP_ATTR_TCP_ESTABLISHED] = 24 * 60 * 60, + [CT_DPIF_TP_ATTR_TCP_FIN_WAIT] = 15 * 60, + [CT_DPIF_TP_ATTR_TCP_TIME_WAIT] = 45, + [CT_DPIF_TP_ATTR_TCP_CLOSE] = 30, + [CT_DPIF_TP_ATTR_UDP_FIRST] = 60, + [CT_DPIF_TP_ATTR_UDP_SINGLE] = 60, + [CT_DPIF_TP_ATTR_UDP_MULTIPLE] = 30, + [CT_DPIF_TP_ATTR_ICMP_FIRST] = 60, + [CT_DPIF_TP_ATTR_ICMP_REPLY] = 30, +}; + +static struct timeout_policy * +timeout_policy_lookup(struct conntrack *ct, int32_t tp_id) + OVS_REQUIRES(ct->ct_lock) +{ + struct timeout_policy *tp; + uint32_t hash; + + hash = hash_int(tp_id, ct->hash_basis); + HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) { + if (tp->policy.id == tp_id) { + return tp; + } + } + return NULL; +} + +struct timeout_policy * +timeout_policy_get(struct conntrack *ct, int32_t tp_id) +{ + struct timeout_policy *tp; + + ovs_mutex_lock(&ct->ct_lock); + tp = timeout_policy_lookup(ct, tp_id); + if (!tp) { + ovs_mutex_unlock(&ct->ct_lock); + return NULL; + } + + ovs_mutex_unlock(&ct->ct_lock); + return tp; +} + +static void +update_existing_tp(struct timeout_policy *tp_dst, + const struct timeout_policy *tp_src) +{ + struct ct_dpif_timeout_policy *dst; + const struct ct_dpif_timeout_policy *src; + int i; + + dst = &tp_dst->policy; + src = &tp_src->policy; + + /* Set the value and present bit to dst if present + * bit in src is set. + */ + for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) { + if (src->present & (1 << i)) { + dst->attrs[i] = src->attrs[i]; + dst->present |= (1 << i); + } + } +} + +static void +init_default_tp(struct timeout_policy *tp, uint32_t tp_id) +{ + tp->policy.id = tp_id; + /* Initialize the timeout value to default, but not + * setting the present bit. + */ + tp->policy.present = 0; + memcpy(tp->policy.attrs, ct_dpif_netdev_tp_def, + sizeof tp->policy.attrs); +} + +static void +timeout_policy_create(struct conntrack *ct, + struct timeout_policy *new_tp) + OVS_REQUIRES(ct->ct_lock) +{ + uint32_t tp_id = new_tp->policy.id; + struct timeout_policy *tp; + uint32_t hash; + + tp = xzalloc(sizeof *tp); + init_default_tp(tp, tp_id); + update_existing_tp(tp, new_tp); + hash = hash_int(tp_id, ct->hash_basis); + hmap_insert(&ct->timeout_policies, &tp->node, hash); +} + +static void +timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp) + OVS_REQUIRES(ct->ct_lock) +{ + hmap_remove(&ct->timeout_policies, &tp->node); + free(tp); +} + +static int +timeout_policy_delete__(struct conntrack *ct, uint32_t tp_id) + OVS_REQUIRES(ct->ct_lock) +{ + int err = 0; + struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id); + + if (tp) { + timeout_policy_clean(ct, tp); + } else { + VLOG_WARN_RL(&rl, "Failed to delete a non-existent timeout " + "policy: id=%d", tp_id); + err = ENOENT; + } + return err; +} + +int +timeout_policy_delete(struct conntrack *ct, uint32_t tp_id) +{ + int err; + + ovs_mutex_lock(&ct->ct_lock); + err = timeout_policy_delete__(ct, tp_id); + ovs_mutex_unlock(&ct->ct_lock); + return err; +} + +void +timeout_policy_init(struct conntrack *ct) + OVS_REQUIRES(ct->ct_lock) +{ + struct timeout_policy tp; + + hmap_init(&ct->timeout_policies); + + /* Create default timeout policy. */ + memset(&tp, 0, sizeof tp); + tp.policy.id = DEFAULT_TP_ID; + timeout_policy_create(ct, &tp); +} + +int +timeout_policy_update(struct conntrack *ct, + struct timeout_policy *new_tp) +{ + int err = 0; + uint32_t tp_id = new_tp->policy.id; + + ovs_mutex_lock(&ct->ct_lock); + struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id); + if (tp) { + err = timeout_policy_delete__(ct, tp_id); + } + timeout_policy_create(ct, new_tp); + ovs_mutex_unlock(&ct->ct_lock); + return err; +} + +static enum ct_dpif_tp_attr +tm_to_ct_dpif_tp(enum ct_timeout tm) +{ + switch (tm) { + case CT_TM_TCP_FIRST_PACKET: + return CT_DPIF_TP_ATTR_TCP_SYN_SENT; + case CT_TM_TCP_OPENING: + return CT_DPIF_TP_ATTR_TCP_SYN_RECV; + case CT_TM_TCP_ESTABLISHED: + return CT_DPIF_TP_ATTR_TCP_ESTABLISHED; + case CT_TM_TCP_CLOSING: + return CT_DPIF_TP_ATTR_TCP_FIN_WAIT; + case CT_TM_TCP_FIN_WAIT: + return CT_DPIF_TP_ATTR_TCP_TIME_WAIT; + case CT_TM_TCP_CLOSED: + return CT_DPIF_TP_ATTR_TCP_CLOSE; + case CT_TM_OTHER_FIRST: + return CT_DPIF_TP_ATTR_UDP_FIRST; + case CT_TM_OTHER_BIDIR: + return CT_DPIF_TP_ATTR_UDP_MULTIPLE; + case CT_TM_OTHER_MULTIPLE: + return CT_DPIF_TP_ATTR_UDP_SINGLE; + case CT_TM_ICMP_FIRST: + return CT_DPIF_TP_ATTR_ICMP_FIRST; + case CT_TM_ICMP_REPLY: + return CT_DPIF_TP_ATTR_ICMP_REPLY; + case N_CT_TM: + default: + OVS_NOT_REACHED(); + break; + } + OVS_NOT_REACHED(); + return CT_DPIF_TP_ATTR_MAX; +} + +static void +conn_update_expiration__(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now, + uint32_t tp_value) + OVS_REQUIRES(conn->lock) +{ + ovs_mutex_unlock(&conn->lock); + + ovs_mutex_lock(&ct->ct_lock); + ovs_mutex_lock(&conn->lock); + if (!conn->cleaned) { + conn->expiration = now + tp_value * 1000; + ovs_list_remove(&conn->exp_node); + ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node); + } + ovs_mutex_unlock(&conn->lock); + ovs_mutex_unlock(&ct->ct_lock); + + ovs_mutex_lock(&conn->lock); +} + +/* The conn entry lock must be held on entry and exit. */ +void +conn_update_expiration(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now) + OVS_REQUIRES(conn->lock) +{ + struct timeout_policy *tp; + uint32_t val; + + ovs_mutex_lock(&ct->ct_lock); + tp = timeout_policy_lookup(ct, conn->tp_id); + if (tp) { + val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)]; + } else { + val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)]; + } + ovs_mutex_unlock(&ct->ct_lock); + + VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d " + "val=%u sec.", + ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); + + conn_update_expiration__(ct, conn, tm, now, val); +} + +static void +conn_init_expiration__(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now, + uint32_t tp_value) +{ + conn->expiration = now + tp_value * 1000; + ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node); +} + +/* ct_lock must be held. */ +void +conn_init_expiration(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now) + OVS_REQUIRES(ct->ct_lock) +{ + struct timeout_policy *tp; + uint32_t val; + + tp = timeout_policy_lookup(ct, conn->tp_id); + if (tp) { + val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)]; + } else { + val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)]; + } + + VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.", + ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); + + conn_init_expiration__(ct, conn, tm, now, val); +} diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h new file mode 100644 index 000000000..4d411d19f --- /dev/null +++ b/lib/conntrack-tp.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2020 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CONNTRACK_TP_H +#define CONNTRACK_TP_H 1 + +#define CT_DPIF_NETDEV_TP_MIN 30 +enum ct_timeout; +void timeout_policy_init(struct conntrack *ct); +int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp); +int timeout_policy_delete(struct conntrack *ct, uint32_t tp_id); +struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tp_id); +void conn_init_expiration(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now); +void conn_update_expiration(struct conntrack *ct, struct conn *conn, + enum ct_timeout tm, long long now); +#endif diff --git a/lib/conntrack.c b/lib/conntrack.c index 95d48c5ee..f42ba4b60 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -25,6 +25,7 @@ #include "bitmap.h" #include "conntrack.h" #include "conntrack-private.h" +#include "conntrack-tp.h" #include "coverage.h" #include "csum.h" #include "ct-dpif.h" @@ -89,7 +90,8 @@ static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis); static void conn_key_reverse(struct conn_key *); static bool valid_new(struct dp_packet *pkt, struct conn_key *); static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt, - struct conn_key *, long long now); + struct conn_key *, long long now, + uint32_t tp_id); static void delete_conn_cmn(struct conn *); static void delete_conn(struct conn *); static void delete_conn_one(struct conn *conn); @@ -176,12 +178,6 @@ static alg_helper alg_helpers[] = { [CT_ALG_CTL_TFTP] = handle_tftp_ctl, }; -long long ct_timeout_val[] = { -#define CT_TIMEOUT(NAME, VAL) [CT_TM_##NAME] = VAL, - CT_TIMEOUTS -#undef CT_TIMEOUT -}; - /* The maximum TCP or UDP port number. */ #define CT_MAX_L4_PORT 65535 /* String buffer used for parsing FTP string messages. @@ -313,6 +309,7 @@ conntrack_init(void) } hmap_init(&ct->zone_limits); ct->zone_limit_seq = 0; + timeout_policy_init(ct); ovs_mutex_unlock(&ct->ct_lock); ct->hash_basis = random_uint32(); @@ -503,6 +500,12 @@ conntrack_destroy(struct conntrack *ct) } hmap_destroy(&ct->zone_limits); + struct timeout_policy *tp; + HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) { + free(tp); + } + hmap_destroy(&ct->timeout_policies); + ovs_mutex_unlock(&ct->ct_lock); ovs_mutex_destroy(&ct->ct_lock); @@ -957,7 +960,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, struct conn_lookup_ctx *ctx, bool commit, long long now, const struct nat_action_info_t *nat_action_info, const char *helper, const struct alg_exp_node *alg_exp, - enum ct_alg_ctl_type ct_alg_ctl) + enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id) OVS_REQUIRES(ct->ct_lock) { struct conn *nc = NULL; @@ -988,7 +991,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, return nc; } - nc = new_conn(ct, pkt, &ctx->key, now); + nc = new_conn(ct, pkt, &ctx->key, now, tp_id); memcpy(&nc->key, &ctx->key, sizeof nc->key); memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key); conn_key_reverse(&nc->rev_key); @@ -1276,7 +1279,8 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, bool force, bool commit, long long now, const uint32_t *setmark, const struct ovs_key_ct_labels *setlabel, const struct nat_action_info_t *nat_action_info, - ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper) + ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, + uint32_t tp_id) { /* Reset ct_state whenever entering a new zone. */ if (pkt->md.ct_state && pkt->md.ct_zone != zone) { @@ -1360,7 +1364,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, ovs_mutex_lock(&ct->ct_lock); if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) { conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info, - helper, alg_exp, ct_alg_ctl); + helper, alg_exp, ct_alg_ctl, tp_id); } ovs_mutex_unlock(&ct->ct_lock); } @@ -1396,7 +1400,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, const struct ovs_key_ct_labels *setlabel, ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, const struct nat_action_info_t *nat_action_info, - long long now) + long long now, uint32_t tp_id) { ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone, ct->hash_basis); @@ -1418,7 +1422,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, write_ct_md(packet, zone, NULL, NULL, NULL); } else { process_one(ct, packet, &ctx, zone, force, commit, now, setmark, - setlabel, nat_action_info, tp_src, tp_dst, helper); + setlabel, nat_action_info, tp_src, tp_dst, helper, + tp_id); } } @@ -1524,7 +1529,7 @@ conntrack_clean(struct conntrack *ct, long long now) atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit); size_t clean_max = n_conn_limit > 10 ? n_conn_limit / 10 : 1; long long min_exp = ct_sweep(ct, now, clean_max); - long long next_wakeup = MIN(min_exp, now + CT_TM_MIN); + long long next_wakeup = MIN(min_exp, now + CT_DPIF_NETDEV_TP_MIN); return next_wakeup; } @@ -2354,9 +2359,9 @@ valid_new(struct dp_packet *pkt, struct conn_key *key) static struct conn * new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key, - long long now) + long long now, uint32_t tp_id) { - return l4_protos[key->nw_proto]->new_conn(ct, pkt, now); + return l4_protos[key->nw_proto]->new_conn(ct, pkt, now, tp_id); } static void diff --git a/lib/conntrack.h b/lib/conntrack.h index b0d0fc8d9..9553b188a 100644 --- a/lib/conntrack.h +++ b/lib/conntrack.h @@ -20,6 +20,7 @@ #include #include "cmap.h" +#include "ct-dpif.h" #include "latch.h" #include "odp-netlink.h" #include "openvswitch/hmap.h" @@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch, const struct ovs_key_ct_labels *setlabel, ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper, const struct nat_action_info_t *nat_action_info, - long long now); + long long now, uint32_t tp_id); void conntrack_clear(struct dp_packet *packet); struct conntrack_dump { @@ -111,6 +112,11 @@ struct conntrack_zone_limit { uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ }; +struct timeout_policy { + struct hmap_node node; + struct ct_dpif_timeout_policy policy; +}; + enum { INVALID_ZONE = -2, DEFAULT_ZONE = -1, /* Default zone for zone limit management. */ diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index 3e227d9e3..e4c7a640b 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -59,6 +59,8 @@ struct ct_dpif_timestamp { uint64_t stop; }; +#define DEFAULT_TP_ID 0 + #define CT_DPIF_TCP_STATES \ CT_DPIF_TCP_STATE(CLOSED) \ CT_DPIF_TCP_STATE(LISTEN) \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index ef14e83b5..51c888501 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -36,6 +36,7 @@ #include "bitmap.h" #include "cmap.h" #include "conntrack.h" +#include "conntrack-tp.h" #include "coverage.h" #include "ct-dpif.h" #include "csum.h" @@ -7342,6 +7343,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, bool commit = false; unsigned int left; uint16_t zone = 0; + uint32_t tp_id = 0; const char *helper = NULL; const uint32_t *setmark = NULL; const struct ovs_key_ct_labels *setlabel = NULL; @@ -7377,8 +7379,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, * netlink events. */ break; case OVS_CT_ATTR_TIMEOUT: - /* Userspace datapath does not support customized timeout - * policy yet. */ + if (!str_to_uint(nl_attr_get_string(b), 10, &tp_id)) { + VLOG_WARN("Invalid Timeout Policy ID: %s.", + nl_attr_get_string(b)); + tp_id = DEFAULT_TP_ID; + } break; case OVS_CT_ATTR_NAT: { const struct nlattr *b_nest; @@ -7464,7 +7469,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force, commit, zone, setmark, setlabel, aux->flow->tp_src, aux->flow->tp_dst, helper, nat_action_info_ref, - pmd->ctx.now / 1000); + pmd->ctx.now / 1000, tp_id); break; } @@ -7697,6 +7702,62 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED, return err; } +static int +dpif_netdev_ct_set_timeout_policy(struct dpif *dpif, + const struct ct_dpif_timeout_policy *dpif_tp) +{ + struct timeout_policy tp; + struct dp_netdev *dp; + + dp = get_dp_netdev(dpif); + memcpy(&tp.policy, dpif_tp, sizeof tp.policy); + return timeout_policy_update(dp->conntrack, &tp); +} + +static int +dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id, + struct ct_dpif_timeout_policy *dpif_tp) +{ + struct timeout_policy *tp; + struct dp_netdev *dp; + int err = 0; + + dp = get_dp_netdev(dpif); + tp = timeout_policy_get(dp->conntrack, tp_id); + if (!tp) { + return ENOENT; + } + memcpy(dpif_tp, &tp->policy, sizeof tp->policy); + return err; +} + +static int +dpif_netdev_ct_del_timeout_policy(struct dpif *dpif, + uint32_t tp_id) +{ + struct dp_netdev *dp; + int err = 0; + + dp = get_dp_netdev(dpif); + err = timeout_policy_delete(dp->conntrack, tp_id); + return err; +} + +static int +dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED, + uint32_t tp_id, + uint16_t dl_type OVS_UNUSED, + uint8_t nw_proto OVS_UNUSED, + char **tp_name, bool *is_generic) +{ + struct ds ds = DS_EMPTY_INITIALIZER; + + ds_put_format(&ds, "%"PRIu32, tp_id); + *tp_name = ds_steal_cstr(&ds); + *is_generic = true; + return 0; +} + static int dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable) { @@ -7807,13 +7868,13 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_ct_set_limits, dpif_netdev_ct_get_limits, dpif_netdev_ct_del_limits, - NULL, /* ct_set_timeout_policy */ - NULL, /* ct_get_timeout_policy */ - NULL, /* ct_del_timeout_policy */ + dpif_netdev_ct_set_timeout_policy, + dpif_netdev_ct_get_timeout_policy, + dpif_netdev_ct_del_timeout_policy, NULL, /* ct_timeout_policy_dump_start */ NULL, /* ct_timeout_policy_dump_next */ NULL, /* ct_timeout_policy_dump_done */ - NULL, /* ct_get_timeout_policy_name */ + dpif_netdev_ct_get_timeout_policy_name, dpif_netdev_ipf_set_enabled, dpif_netdev_ipf_set_min_frag, dpif_netdev_ipf_set_max_nfrags, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index d21874b46..7e10375f2 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -5426,7 +5426,8 @@ clear_existing_ct_timeout_policies(struct dpif_backer *backer) static void ct_zone_config_init(struct dpif_backer *backer) { - backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID); + backer->tp_ids = id_pool_create(DEFAULT_TP_ID + 1, + MAX_TIMEOUT_POLICY_ID - 1); cmap_init(&backer->ct_zones); hmap_init(&backer->ct_tps); ovs_list_init(&backer->ct_tp_kill_list); diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 3ed03d92b..2a0fbadff 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -3311,8 +3311,15 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= AT_CHECK([ovs-appctl dpctl/flush-conntrack]) dnl Shorten the udp_single and icmp_first timeout in zone 5 +dnl Userspace datapath uses udp_first and icmp_reply, and +dnl kernel datapath uses udp_single and icmp_first VSCTL_ADD_DATAPATH_TABLE() -AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3]) + +dnl Creating more timeout policies +for i in `seq 1 255`; do +ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=$i udp_first=$i udp_single=$i icmp_first=$i icmp_reply=$i; +done +AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=1 udp_single=1 icmp_first=1 icmp_reply=1]) dnl Send ICMP and UDP traffic NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl @@ -3327,7 +3334,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= dnl Wait until the timeout expire. dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired. -sleep 4 +sleep 6 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl ]) @@ -3345,11 +3352,27 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src= dnl Wait until the timeout expire. dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired. -sleep 4 +sleep 6 AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl ]) +dnl Set the timeout policy to default again. +AT_CHECK([ovs-vsctl del-zone-tp $DP_TYPE zone=5]) + +dnl Send ICMP and UDP traffic +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) + +sleep 1 + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl +icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=,type=0,code=0),zone=5 +udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),zone=5 +]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index ba7f4102f..72c84b9c7 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -99,12 +99,8 @@ m4_define([CHECK_CONNTRACK_NAT]) # CHECK_CONNTRACK_TIMEOUT() # # Perform requirements checks for running conntrack customized timeout tests. -* The userspace datapath does not support this feature yet. # -m4_define([CHECK_CONNTRACK_TIMEOUT], -[ - AT_SKIP_IF([:]) -]) +m4_define([CHECK_CONNTRACK_TIMEOUT]) # CHECK_CT_DPIF_SET_GET_MAXCONNS() # diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c index f77ee75e3..e7c73220a 100644 --- a/tests/test-conntrack.c +++ b/tests/test-conntrack.c @@ -90,7 +90,7 @@ ct_thread_main(void *aux_) ovs_barrier_block(&barrier); for (i = 0; i < n_pkts; i += batch_size) { conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL, - 0, 0, NULL, NULL, now); + 0, 0, NULL, NULL, now, 0); } ovs_barrier_block(&barrier); destroy_packets(pkt_batch); @@ -174,7 +174,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_, if (flow.dl_type != dl_type) { conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, - NULL, NULL, 0, 0, NULL, NULL, now); + NULL, NULL, 0, 0, NULL, NULL, now, 0); dp_packet_batch_init(&new_batch); } dp_packet_batch_add(&new_batch, packet); @@ -182,7 +182,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_, if (!dp_packet_batch_is_empty(&new_batch)) { conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL, - 0, 0, NULL, NULL, now); + 0, 0, NULL, NULL, now, 0); } } -- GitLab From cbff5189defb8b4b7c01d9c7444d63bfa0d27ab9 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 1 May 2020 08:42:25 -0700 Subject: [PATCH 119/432] docs: Document check_pkt_len action. Cc: Numan Siddique Signed-off-by: William Tu Acked-by: Numan Siddique --- Documentation/faq/releases.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index dbc1706de..3903e5922 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -140,6 +140,7 @@ Q: Are all features available with all datapaths? NIC Bonding YES 1.0 1.0 YES Multiple VTEPs YES 1.10 1.10 YES Meter action 4.15 2.10 2.7 NO + check_pkt_len action 5.2 2.12 2.12 NO ========================== ============== ============== ========= ======= Do note, however: -- GitLab From 5119cfe32d021300663809eb23be6b33cbd4073a Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 4 May 2020 09:26:24 -0700 Subject: [PATCH 120/432] netdev-afxdp: Fix missing init. When introducing the interrupt mode for netdev-afxdp, the netdev init function is accidentally removed. Fix it by adding it back. Fixes: 5bfc519fee499 ("netdev-afxdp: Add interrupt mode netdev class.") Acked-by: Ilya Maximets Acked-by: Greg Rose Signed-off-by: William Tu --- lib/netdev-linux.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 40d0cc110..b52071e92 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -3588,6 +3588,7 @@ const struct netdev_class netdev_internal_class = { #ifdef HAVE_AF_XDP #define NETDEV_AFXDP_CLASS_COMMON \ + .init = netdev_afxdp_init, \ .construct = netdev_afxdp_construct, \ .destruct = netdev_afxdp_destruct, \ .get_stats = netdev_afxdp_get_stats, \ -- GitLab From 48b1c7642e2a046a255532b5f2322e70e352d790 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 4 May 2020 21:55:41 +0200 Subject: [PATCH 121/432] raft: Fix leak of the incomplete command. Function raft_command_initiate() returns correctly referenced command instance. 'n_ref' equals 1 for complete commands and 2 for incomplete commands because one more reference is in raft->commands list. raft_handle_execute_command_request__() leaks the reference by not returning pointer anywhere and not unreferencing incomplete commands. 792 bytes in 11 blocks are definitely lost in loss record 258 of 262 at 0x483BB1A: calloc (vg_replace_malloc.c:762) by 0x44BA32: xcalloc (util.c:121) by 0x422E5F: raft_command_create_incomplete (raft.c:2038) by 0x422E5F: raft_command_initiate (raft.c:2061) by 0x428651: raft_handle_execute_command_request__ (raft.c:4161) by 0x428651: raft_handle_execute_command_request (raft.c:4177) by 0x428651: raft_handle_rpc (raft.c:4230) by 0x428651: raft_conn_run (raft.c:1445) by 0x428DEA: raft_run (raft.c:1803) by 0x407392: main_loop (ovsdb-server.c:226) by 0x407392: main (ovsdb-server.c:469) Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") Signed-off-by: Ilya Maximets Acked-by: Han Zhou Signed-off-by: William Tu --- ovsdb/raft.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 6391eeb13..e0af6bd62 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -4163,9 +4163,7 @@ raft_handle_execute_command_request__( cmd->sid = rq->common.sid; enum raft_command_status status = cmd->status; - if (status != RAFT_CMD_INCOMPLETE) { - raft_command_unref(cmd); - } + raft_command_unref(cmd); return status; } -- GitLab From 732cb79fb867b99b697da4cd305a3903a680454a Mon Sep 17 00:00:00 2001 From: David Marchand Date: Tue, 28 Apr 2020 14:03:53 +0200 Subject: [PATCH 122/432] sparse: Fix typo in DPDK endian conversion macros. This header is duplicated from the DPDK generic header. Fix typo identified in DPDK [1]. While at it, RTE_EXEC_ENV_BSDAPP has been replaced with RTE_EXEC_ENV_FREEBSD in 19.05 [2]. 1: https://git.dpdk.org/dpdk/commit/?id=a3e283ed904c 2: https://git.dpdk.org/dpdk/commit/?id=5fbc1d498f54 Signed-off-by: David Marchand Signed-off-by: Ian Stokes --- include/sparse/rte_byteorder.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/sparse/rte_byteorder.h b/include/sparse/rte_byteorder.h index d32b5e691..72cacac89 100644 --- a/include/sparse/rte_byteorder.h +++ b/include/sparse/rte_byteorder.h @@ -49,7 +49,7 @@ #include "openvswitch/types.h" #include -#ifdef RTE_EXEC_ENV_BSDAPP +#ifdef RTE_EXEC_ENV_FREEBSD #include #else #include @@ -127,9 +127,9 @@ #define RTE_BE16(v) (OVS_FORCE rte_be16_t)(RTE_STATIC_BSWAP16(v)) #define RTE_BE32(v) (OVS_FORCE rte_be32_t)(RTE_STATIC_BSWAP32(v)) #define RTE_BE64(v) (OVS_FORCE rte_be64_t)(RTE_STATIC_BSWAP64(v)) -#define RTE_LE16(v) (OVS_FORCE rte_be16_t)(v) -#define RTE_LE32(v) (OVS_FORCE rte_be32_t)(v) -#define RTE_LE64(v) (OVS_FORCE rte_be64_t)(v) +#define RTE_LE16(v) (OVS_FORCE rte_le16_t)(v) +#define RTE_LE32(v) (OVS_FORCE rte_le32_t)(v) +#define RTE_LE64(v) (OVS_FORCE rte_le64_t)(v) #else #error Unsupported endianness. #endif -- GitLab From 6187bd3aa00dff30b7db3414381e750ecf8ab777 Mon Sep 17 00:00:00 2001 From: William Tu Date: Sat, 2 May 2020 09:01:48 -0700 Subject: [PATCH 123/432] ovsdb-idlc: Fix memory leak reported by Coverity. Coverity shows the following memory leak in this code pattern: void ovsrec_ipfix_index_set_obs_domain_id(... { struct ovsdb_datum datum; // 1. alloc_fn: Storage is returned from allocation function xmalloc. // 2. var_assign: Assigning: key = storage returned from xmalloc(16UL). union ovsdb_atom *key = xmalloc(sizeof(union ovsdb_atom)); // 3. Condition n_obs_domain_id, taking false branch. if (n_obs_domain_id) { datum.n = 1; datum.keys = key; key->integer = *obs_domain_id; } else { datum.n = 0; datum.keys = NULL; } datum.values = NULL; ovsdb_idl_index_write(CONST_CAST(struct ovsdb_idl_row *, &row->head... // CID 1420891 (#1 of 1): Resource leak (RESOURCE_LEAK) Fixed it by moving the xmalloc to the true branch. Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- ovsdb/ovsdb-idlc.in | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index c285ee4b3..1d385e15c 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -1351,9 +1351,10 @@ struct %(s)s * print(" datum.values = NULL;") txn_write_func = "ovsdb_idl_index_write" elif type.is_optional_pointer(): - print(" union ovsdb_atom *key = xmalloc(sizeof (union ovsdb_atom));") + print(" union ovsdb_atom *key;") print() print(" if (%s) {" % keyVar) + print(" key = xmalloc(sizeof (union ovsdb_atom));") print(" datum.n = 1;") print(" datum.keys = key;") print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar)) @@ -1364,9 +1365,10 @@ struct %(s)s * print(" datum.values = NULL;") txn_write_func = "ovsdb_idl_index_write" elif type.n_max == 1: - print(" union ovsdb_atom *key = xmalloc(sizeof(union ovsdb_atom));") + print(" union ovsdb_atom *key;") print() print(" if (%s) {" % nVar) + print(" key = xmalloc(sizeof(union ovsdb_atom));") print(" datum.n = 1;") print(" datum.keys = key;") print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), "*" + keyVar)) -- GitLab From 11827c63e22e6f668379dc74260a84f68940275c Mon Sep 17 00:00:00 2001 From: William Tu Date: Sat, 2 May 2020 09:08:26 -0700 Subject: [PATCH 124/432] ovsdb-idlc: Fix memory leak reported by Coverity. An exmplae pattern shown below: void ovsrec_ct_zone_index_set_external_ids(const struct ovsrec_ct_zone... { // 1. alloc_fn: Storage is returned from allocation function xmalloc. // 2. var_assign: Assigning: datum = storage returned from xmalloc(24UL). struct ovsdb_datum *datum = xmalloc(sizeof(struct ovsdb_datum)); // 3. Condition external_ids, taking false branch. if (external_ids) { ... } else { // 4. noescape: Resource datum is not freed or pointed-to in ovsdb_datum_init_empty. ovsdb_datum_init_empty(datum); } // 5. noescape: Resource datum is not freed or pointed-to in ovsdb_idl_index_write. ovsdb_idl_index_write(CONST_CAST(struct ovsdb_idl_row *, &row->header_), &ovsrec_ct_zone_columns[OVSREC_CT_ZONE_COL_EXTERNAL_IDS], datum, &ovsrec_table_classes[OVSREC_TABLE_CT_ZONE]); // CID 1420856 (#1 of 1): Resource leak (RESOURCE_LEAK) // 6. leaked_storage: Variable datum going out of scope leaks the storage it points to. Fix it by freeing the datum. Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- ovsdb/ovsdb-idlc.in | 1 + 1 file changed, 1 insertion(+) diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index 1d385e15c..698fe25f3 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -1306,6 +1306,7 @@ struct %(s)s * &%(s)s_columns[%(S)s_COL_%(C)s], datum, &%(p)stable_classes[%(P)sTABLE_%(T)s]); + free(datum); } """ % {'t': tableName, 'p': prefix, -- GitLab From e398275024e815b52e796fcfe350fdd0d139ebba Mon Sep 17 00:00:00 2001 From: William Tu Date: Sat, 2 May 2020 09:24:30 -0700 Subject: [PATCH 125/432] ovsdb-idl: Fix NULL deref reported by Coverity. When 'datum.values' or 'datum.keys' is NULL, some code path calling into ovsdb_idl_txn_write__ triggers NULL deref. An example is below: ovsrec_open_vswitch_set_cur_cfg(const struct ovsrec_open_vswitch { struct ovsdb_datum datum; union ovsdb_atom key; datum.n = 1; datum.keys = &key; key.integer = cur_cfg; // 1. assign_zero: Assigning: datum.values = NULL. datum.values = NULL; // CID 1421356 (#1 of 1): Explicit null dereferenced (FORWARD_NULL) // 2. var_deref_model: Passing &datum to ovsdb_idl_txn_write_clone,\ // which dereferences null datum.values. ovsdb_idl_txn_write_clone(&row->header_, &ovsrec_open_vswitch_col } And with the following calls: ovsdb_idl_txn_write_clone ovsdb_idl_txn_write__ 6. deref_parm_in_call: Function ovsdb_datum_destroy dereferences datum->values ovsdb_datum_destroy And another possible NULL deref is at ovsdb_datum_equals(). Fix the two by adding additional checks. Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- lib/ovsdb-data.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c index 4828624f6..c145f5ad9 100644 --- a/lib/ovsdb-data.c +++ b/lib/ovsdb-data.c @@ -1017,6 +1017,10 @@ static void free_data(enum ovsdb_atomic_type type, union ovsdb_atom *atoms, size_t n_atoms) { + if (!atoms) { + return; + } + if (ovsdb_atom_needs_destruction(type)) { unsigned int i; for (i = 0; i < n_atoms; i++) { -- GitLab From db5a066c17bdeaa7ecac08870331ae583f5ddfcc Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Mon, 30 Mar 2020 17:21:04 -0700 Subject: [PATCH 126/432] raft: Disable RAFT jsonrpc inactivity probe. With the scale test of 640 nodes k8s cluster, raft DB nodes' jsonrpc session got closed due to the timeout of default 5 seconds probe. It will cause disturbance of the raft cluster. Since we already have the heartbeat for RAFT, just disable the probe between the servers to avoid the unnecessary jsonrpc inactivity probe. Acked-by: Han Zhou Signed-off-by: Zhen Wang Signed-off-by: Ilya Maximets --- ovsdb/raft.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index e0af6bd62..18f29973e 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -938,6 +938,7 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, &conn->sid); conn->incoming = incoming; conn->js_seqno = jsonrpc_session_get_seqno(conn->js); + jsonrpc_session_set_probe_interval(js, 0); } /* Starts the local server in an existing Raft cluster, using the local copy of -- GitLab From 43a6cf355455c70957893fb600b983c928173d5d Mon Sep 17 00:00:00 2001 From: Mark Michelson Date: Fri, 8 May 2020 17:00:27 -0400 Subject: [PATCH 127/432] RAFT: Add clarifying note for cluster/leave operation. We had a user express confusion about the state of a cluster after using cluster/leave. The user had a three server cluster and used cluster/leave to remove two servers from the cluster. The user expected that the single server left would not function since the quorum of two servers for a three server cluster was not met. In actuality, cluster/leave removes the server from the cluster and alters the cluster size in the process. Thus the single remaining server continued to function since quorum was reached. This documentation change makes it a bit more explicit that cluster/leave alters the size of the cluster and cites the three server down to one server case as an example. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1798158 Acked-by: Han Zhou Signed-off-by: Mark Michelson Signed-off-by: Ben Pfaff --- ovsdb/ovsdb-server.1.in | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in index 338f3bc29..6667553df 100644 --- a/ovsdb/ovsdb-server.1.in +++ b/ovsdb/ovsdb-server.1.in @@ -347,6 +347,11 @@ until the server has left the cluster. .IP Once a server leaves a cluster, it may never rejoin it. Instead, create a new server and join it to the cluster. +.IP +Note that removing the server from the cluster alters the total size +of the cluster. For example, if you remove two servers from a three +server cluster, then the "cluster" becomes a single functioning server. +This does not result in a three server cluster that lacks quorum. . .IP "\fBcluster/kick \fIdb server\fR" Start graceful removal of \fIserver\fR from \fIdb\fR's cluster, like -- GitLab From c101cd4171cfe04e214f858b4bbe089e56f13f9b Mon Sep 17 00:00:00 2001 From: Ansis Atteka Date: Wed, 13 May 2020 10:44:11 -0700 Subject: [PATCH 128/432] debian: Fix broken build after some man pages became generated from RST As far as I know, the official way to build debian packages is by invoking following command: > fakeroot debian/rules binary However, that command started to fail with these errors: dh_installman --language=C dh_installman: Cannot find (any matches for) "utilities/ovs-appctl.8" (tried in .) dh_installman: Cannot find (any matches for) "utilities/ovs-l3ping.8" (tried in .) dh_installman: Cannot find (any matches for) "utilities/ovs-tcpdump.8" (tried in .) because the generated manpages are not part of the source tree anymore. This patch updates debian *.manpages files to point to the generted files. Fixes: 39b5e46312 ("Documentation: Convert multiple manpages to ReST.") CC: Ben Pfaff Signed-off-by: Ansis Atteka Acked-by: Ben Pfaff --- debian/openvswitch-common.manpages | 6 +++--- debian/openvswitch-switch.manpages | 6 +++--- debian/openvswitch-test.manpages | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/debian/openvswitch-common.manpages b/debian/openvswitch-common.manpages index 9ac6a1dd6..95004122c 100644 --- a/debian/openvswitch-common.manpages +++ b/debian/openvswitch-common.manpages @@ -1,7 +1,7 @@ ovsdb/ovsdb-client.1 ovsdb/ovsdb-tool.1 utilities/bugtool/ovs-bugtool.8 -utilities/ovs-appctl.8 +debian/tmp/usr/share/man/man8/ovs-appctl.8 utilities/ovs-ofctl.8 -utilities/ovs-parse-backtrace.8 -utilities/ovs-pki.8 +debian/tmp/usr/share/man/man8/ovs-parse-backtrace.8 +debian/tmp/usr/share/man/man8/ovs-pki.8 diff --git a/debian/openvswitch-switch.manpages b/debian/openvswitch-switch.manpages index 1161cfda7..7fd7bc55d 100644 --- a/debian/openvswitch-switch.manpages +++ b/debian/openvswitch-switch.manpages @@ -1,12 +1,12 @@ ovsdb/ovsdb-server.1 ovsdb/ovsdb-server.5 -utilities/ovs-ctl.8 +debian/tmp/usr/share/man/man8/ovs-ctl.8 utilities/ovs-dpctl-top.8 utilities/ovs-dpctl.8 utilities/ovs-kmod-ctl.8 utilities/ovs-pcap.1 -utilities/ovs-tcpdump.8 -utilities/ovs-tcpundump.1 +debian/tmp/usr/share/man/man8/ovs-tcpdump.8 +debian/tmp/usr/share/man/man1/ovs-tcpundump.1 utilities/ovs-vsctl.8 vswitchd/ovs-vswitchd.8 vswitchd/ovs-vswitchd.conf.db.5 diff --git a/debian/openvswitch-test.manpages b/debian/openvswitch-test.manpages index 3f7185869..eb3a561d0 100644 --- a/debian/openvswitch-test.manpages +++ b/debian/openvswitch-test.manpages @@ -1 +1 @@ -utilities/ovs-l3ping.8 +debian/tmp/usr/share/man/man8/ovs-l3ping.8 -- GitLab From 29bb3093eb8b387ada862ea502626a4cfbaa3358 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 24 Mar 2020 15:10:50 -0700 Subject: [PATCH 129/432] userspace: Enable TSO support for non-DPDK. This patch enables TSO support for non-DPDK use cases, and also add check-system-tso testsuite. Before TSO, we have to disable checksum offload, allowing the kernel to calculate the TCP/UDP packet checsum. With TSO, we can skip the checksum validation by enabling checksum offload, and with large packet size, we see better performance. Consider container to container use cases: iperf3 -c (ns0) -> veth peer -> OVS -> veth peer -> iperf3 -s (ns1) And I got around 6Gbps, similar to TSO with DPDK-enabled. Acked-by: Flavio Leitner Acked-by: Ilya Maximets Signed-off-by: William Tu --- lib/dp-packet.c | 6 +- lib/dp-packet.h | 573 +++++++++++++++------------------- lib/userspace-tso.c | 5 - tests/.gitignore | 3 + tests/automake.mk | 21 ++ tests/system-tso-macros.at | 31 ++ tests/system-tso-testsuite.at | 26 ++ 7 files changed, 340 insertions(+), 325 deletions(-) create mode 100644 tests/system-tso-macros.at create mode 100644 tests/system-tso-testsuite.at diff --git a/lib/dp-packet.c b/lib/dp-packet.c index cd2623500..72f6d09ac 100644 --- a/lib/dp-packet.c +++ b/lib/dp-packet.c @@ -192,10 +192,8 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom) sizeof(struct dp_packet) - offsetof(struct dp_packet, l2_pad_size)); -#ifdef DPDK_NETDEV - new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags; - new_buffer->mbuf.ol_flags &= ~DPDK_MBUF_NON_OFFLOADING_FLAGS; -#endif + *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer); + *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK; if (dp_packet_rss_valid(buffer)) { dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer)); diff --git a/lib/dp-packet.h b/lib/dp-packet.h index 9f8991faa..0430cca8e 100644 --- a/lib/dp-packet.h +++ b/lib/dp-packet.h @@ -48,18 +48,62 @@ enum OVS_PACKED_ENUM dp_packet_source { #define DP_PACKET_CONTEXT_SIZE 64 -#ifndef DPDK_NETDEV +#ifdef DPDK_NETDEV +#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF +#else +#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF +#endif + /* Bit masks for the 'ol_flags' member of the 'dp_packet' structure. */ enum dp_packet_offload_mask { - DP_PACKET_OL_RSS_HASH_MASK = 0x1, /* Is the 'rss_hash' valid? */ - DP_PACKET_OL_FLOW_MARK_MASK = 0x2, /* Is the 'flow_mark' valid? */ + /* Value 0 is not used. */ + /* Is the 'rss_hash' valid? */ + DEF_OL_FLAG(DP_PACKET_OL_RSS_HASH, PKT_RX_RSS_HASH, 0x1), + /* Is the 'flow_mark' valid? */ + DEF_OL_FLAG(DP_PACKET_OL_FLOW_MARK, PKT_RX_FDIR_ID, 0x2), + /* Bad L4 checksum in the packet. */ + DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, 0x4), + /* Bad IP checksum in the packet. */ + DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD, 0x8), + /* Valid L4 checksum in the packet. */ + DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_GOOD, 0x10), + /* Valid IP checksum in the packet. */ + DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_GOOD, 0x20), + /* TCP Segmentation Offload. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40), + /* Offloaded packet is IPv4. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80), + /* Offloaded packet is IPv6. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100), + /* Offload TCP checksum. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CKSUM, PKT_TX_TCP_CKSUM, 0x200), + /* Offload UDP checksum. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, PKT_TX_UDP_CKSUM, 0x400), + /* Offload SCTP checksum. */ + DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM, 0x800), + /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */ }; -#else -/* DPDK mbuf ol_flags that are not really an offload flags. These are mostly - * related to mbuf memory layout and OVS should not touch/clear them. */ -#define DPDK_MBUF_NON_OFFLOADING_FLAGS (EXT_ATTACHED_MBUF | \ - IND_ATTACHED_MBUF) -#endif + +#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \ + DP_PACKET_OL_FLOW_MARK | \ + DP_PACKET_OL_RX_L4_CKSUM_BAD | \ + DP_PACKET_OL_RX_IP_CKSUM_BAD | \ + DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ + DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ + DP_PACKET_OL_TX_TCP_SEG | \ + DP_PACKET_OL_TX_IPV4 | \ + DP_PACKET_OL_TX_IPV6 | \ + DP_PACKET_OL_TX_TCP_CKSUM | \ + DP_PACKET_OL_TX_UDP_CKSUM | \ + DP_PACKET_OL_TX_SCTP_CKSUM) + +#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \ + DP_PACKET_OL_TX_UDP_CKSUM | \ + DP_PACKET_OL_TX_SCTP_CKSUM) +#define DP_PACKET_OL_RX_IP_CKSUM_MASK (DP_PACKET_OL_RX_IP_CKSUM_GOOD | \ + DP_PACKET_OL_RX_IP_CKSUM_BAD) +#define DP_PACKET_OL_RX_L4_CKSUM_MASK (DP_PACKET_OL_RX_L4_CKSUM_GOOD | \ + DP_PACKET_OL_RX_L4_CKSUM_BAD) /* Buffer for holding packet data. A dp_packet is automatically reallocated * as necessary if it grows too large for the available memory. @@ -450,6 +494,45 @@ dp_packet_get_nd_payload(const struct dp_packet *b) ? (const char *)dp_packet_l4(b) + ND_MSG_LEN : NULL; } +#ifdef DPDK_NETDEV +static inline uint64_t * +dp_packet_ol_flags_ptr(const struct dp_packet *b) +{ + return CONST_CAST(uint64_t *, &b->mbuf.ol_flags); +} + +static inline uint32_t * +dp_packet_rss_ptr(const struct dp_packet *b) +{ + return CONST_CAST(uint32_t *, &b->mbuf.hash.rss); +} + +static inline uint32_t * +dp_packet_flow_mark_ptr(const struct dp_packet *b) +{ + return CONST_CAST(uint32_t *, &b->mbuf.hash.fdir.hi); +} + +#else +static inline uint32_t * +dp_packet_ol_flags_ptr(const struct dp_packet *b) +{ + return CONST_CAST(uint32_t *, &b->ol_flags); +} + +static inline uint32_t * +dp_packet_rss_ptr(const struct dp_packet *b) +{ + return CONST_CAST(uint32_t *, &b->rss_hash); +} + +static inline uint32_t * +dp_packet_flow_mark_ptr(const struct dp_packet *b) +{ + return CONST_CAST(uint32_t *, &b->flow_mark); +} +#endif + #ifdef DPDK_NETDEV BUILD_ASSERT_DECL(offsetof(struct dp_packet, mbuf) == 0); @@ -521,168 +604,6 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) b->mbuf.buf_len = s; } -/* Returns 'true' if packet 'b' is marked for TCP segmentation offloading. */ -static inline bool -dp_packet_hwol_is_tso(const struct dp_packet *b) -{ - return !!(b->mbuf.ol_flags & PKT_TX_TCP_SEG); -} - -/* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */ -static inline bool -dp_packet_hwol_is_ipv4(const struct dp_packet *b) -{ - return !!(b->mbuf.ol_flags & PKT_TX_IPV4); -} - -/* Returns the L4 cksum offload bitmask. */ -static inline uint64_t -dp_packet_hwol_l4_mask(const struct dp_packet *b) -{ - return b->mbuf.ol_flags & PKT_TX_L4_MASK; -} - -/* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */ -static inline bool -dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) -{ - return (b->mbuf.ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM; -} - -/* Returns 'true' if packet 'b' is marked for UDP checksum offloading. */ -static inline bool -dp_packet_hwol_l4_is_udp(struct dp_packet *b) -{ - return (b->mbuf.ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM; -} - -/* Returns 'true' if packet 'b' is marked for SCTP checksum offloading. */ -static inline bool -dp_packet_hwol_l4_is_sctp(struct dp_packet *b) -{ - return (b->mbuf.ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM; -} - -/* Mark packet 'b' for IPv4 checksum offloading. */ -static inline void -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) -{ - b->mbuf.ol_flags |= PKT_TX_IPV4; -} - -/* Mark packet 'b' for IPv6 checksum offloading. */ -static inline void -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) -{ - b->mbuf.ol_flags |= PKT_TX_IPV6; -} - -/* Mark packet 'b' for TCP checksum offloading. It implies that either - * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */ -static inline void -dp_packet_hwol_set_csum_tcp(struct dp_packet *b) -{ - b->mbuf.ol_flags |= PKT_TX_TCP_CKSUM; -} - -/* Mark packet 'b' for UDP checksum offloading. It implies that either - * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */ -static inline void -dp_packet_hwol_set_csum_udp(struct dp_packet *b) -{ - b->mbuf.ol_flags |= PKT_TX_UDP_CKSUM; -} - -/* Mark packet 'b' for SCTP checksum offloading. It implies that either - * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */ -static inline void -dp_packet_hwol_set_csum_sctp(struct dp_packet *b) -{ - b->mbuf.ol_flags |= PKT_TX_SCTP_CKSUM; -} - -/* Mark packet 'b' for TCP segmentation offloading. It implies that - * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading - * and also for TCP checksum offloading. */ -static inline void -dp_packet_hwol_set_tcp_seg(struct dp_packet *b) -{ - b->mbuf.ol_flags |= PKT_TX_TCP_SEG; -} - -/* Returns the RSS hash of the packet 'p'. Note that the returned value is - * correct only if 'dp_packet_rss_valid(p)' returns true */ -static inline uint32_t -dp_packet_get_rss_hash(const struct dp_packet *p) -{ - return p->mbuf.hash.rss; -} - -static inline void -dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash) -{ - p->mbuf.hash.rss = hash; - p->mbuf.ol_flags |= PKT_RX_RSS_HASH; -} - -static inline bool -dp_packet_rss_valid(const struct dp_packet *p) -{ - return p->mbuf.ol_flags & PKT_RX_RSS_HASH; -} - -static inline void -dp_packet_reset_offload(struct dp_packet *p) -{ - p->mbuf.ol_flags &= DPDK_MBUF_NON_OFFLOADING_FLAGS; -} - -static inline bool -dp_packet_ip_checksum_valid(const struct dp_packet *p) -{ - return (p->mbuf.ol_flags & PKT_RX_IP_CKSUM_MASK) == - PKT_RX_IP_CKSUM_GOOD; -} - -static inline bool -dp_packet_ip_checksum_bad(const struct dp_packet *p) -{ - return (p->mbuf.ol_flags & PKT_RX_IP_CKSUM_MASK) == - PKT_RX_IP_CKSUM_BAD; -} - -static inline bool -dp_packet_l4_checksum_valid(const struct dp_packet *p) -{ - return (p->mbuf.ol_flags & PKT_RX_L4_CKSUM_MASK) == - PKT_RX_L4_CKSUM_GOOD; -} - -static inline bool -dp_packet_l4_checksum_bad(const struct dp_packet *p) -{ - return (p->mbuf.ol_flags & PKT_RX_L4_CKSUM_MASK) == - PKT_RX_L4_CKSUM_BAD; -} - -static inline bool -dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark) -{ - if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) { - *mark = p->mbuf.hash.fdir.hi; - return true; - } - - return false; -} - -static inline void -dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark) -{ - p->mbuf.hash.fdir.hi = mark; - p->mbuf.ol_flags |= PKT_RX_FDIR_ID; -} - #else /* DPDK_NETDEV */ static inline void @@ -739,151 +660,6 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s) b->allocated_ = s; } -/* There are no implementation when not DPDK enabled datapath. */ -static inline bool -dp_packet_hwol_is_tso(const struct dp_packet *b OVS_UNUSED) -{ - return false; -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline bool -dp_packet_hwol_is_ipv4(const struct dp_packet *b OVS_UNUSED) -{ - return false; -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline uint64_t -dp_packet_hwol_l4_mask(const struct dp_packet *b OVS_UNUSED) -{ - return 0; -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline bool -dp_packet_hwol_l4_is_tcp(const struct dp_packet *b OVS_UNUSED) -{ - return false; -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline bool -dp_packet_hwol_l4_is_udp(const struct dp_packet *b OVS_UNUSED) -{ - return false; -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline bool -dp_packet_hwol_l4_is_sctp(const struct dp_packet *b OVS_UNUSED) -{ - return false; -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline void -dp_packet_hwol_set_tx_ipv4(struct dp_packet *b OVS_UNUSED) -{ -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline void -dp_packet_hwol_set_tx_ipv6(struct dp_packet *b OVS_UNUSED) -{ -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline void -dp_packet_hwol_set_csum_tcp(struct dp_packet *b OVS_UNUSED) -{ -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline void -dp_packet_hwol_set_csum_udp(struct dp_packet *b OVS_UNUSED) -{ -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline void -dp_packet_hwol_set_csum_sctp(struct dp_packet *b OVS_UNUSED) -{ -} - -/* There are no implementation when not DPDK enabled datapath. */ -static inline void -dp_packet_hwol_set_tcp_seg(struct dp_packet *b OVS_UNUSED) -{ -} - -/* Returns the RSS hash of the packet 'p'. Note that the returned value is - * correct only if 'dp_packet_rss_valid(p)' returns true */ -static inline uint32_t -dp_packet_get_rss_hash(const struct dp_packet *p) -{ - return p->rss_hash; -} - -static inline void -dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash) -{ - p->rss_hash = hash; - p->ol_flags |= DP_PACKET_OL_RSS_HASH_MASK; -} - -static inline bool -dp_packet_rss_valid(const struct dp_packet *p) -{ - return p->ol_flags & DP_PACKET_OL_RSS_HASH_MASK; -} - -static inline void -dp_packet_reset_offload(struct dp_packet *p) -{ - p->ol_flags = 0; -} - -static inline bool -dp_packet_ip_checksum_valid(const struct dp_packet *p OVS_UNUSED) -{ - return false; -} - -static inline bool -dp_packet_ip_checksum_bad(const struct dp_packet *p OVS_UNUSED) -{ - return false; -} - -static inline bool -dp_packet_l4_checksum_valid(const struct dp_packet *p OVS_UNUSED) -{ - return false; -} - -static inline bool -dp_packet_l4_checksum_bad(const struct dp_packet *p OVS_UNUSED) -{ - return false; -} - -static inline bool -dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark) -{ - if (p->ol_flags & DP_PACKET_OL_FLOW_MARK_MASK) { - *mark = p->flow_mark; - return true; - } - return false; -} - -static inline void -dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark) -{ - p->flow_mark = mark; - p->ol_flags |= DP_PACKET_OL_FLOW_MARK_MASK; -} #endif /* DPDK_NETDEV */ static inline void @@ -1112,6 +888,58 @@ dp_packet_batch_reset_cutlen(struct dp_packet_batch *batch) } } +/* Returns the RSS hash of the packet 'p'. Note that the returned value is + * correct only if 'dp_packet_rss_valid(p)' returns 'true'. */ +static inline uint32_t +dp_packet_get_rss_hash(const struct dp_packet *p) +{ + return *dp_packet_rss_ptr(p); +} + +static inline void +dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash) +{ + *dp_packet_rss_ptr(p) = hash; + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RSS_HASH; +} + +static inline bool +dp_packet_rss_valid(const struct dp_packet *p) +{ + return *dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RSS_HASH; +} + +static inline void +dp_packet_reset_offload(struct dp_packet *p) +{ + *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_SUPPORTED_MASK; +} + +static inline bool +dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark) +{ + if (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_FLOW_MARK) { + *mark = *dp_packet_flow_mark_ptr(p); + return true; + } + + return false; +} + +static inline void +dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark) +{ + *dp_packet_flow_mark_ptr(p) = mark; + *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_FLOW_MARK; +} + +/* Returns the L4 cksum offload bitmask. */ +static inline uint64_t +dp_packet_hwol_l4_mask(const struct dp_packet *b) +{ + return *dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK; +} + /* Return true if the packet 'b' requested L4 checksum offload. */ static inline bool dp_packet_hwol_tx_l4_checksum(const struct dp_packet *b) @@ -1119,6 +947,119 @@ dp_packet_hwol_tx_l4_checksum(const struct dp_packet *b) return !!dp_packet_hwol_l4_mask(b); } +/* Returns 'true' if packet 'b' is marked for TCP segmentation offloading. */ +static inline bool +dp_packet_hwol_is_tso(const struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TCP_SEG); +} + +/* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */ +static inline bool +dp_packet_hwol_is_ipv4(const struct dp_packet *b) +{ + return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4); +} + +/* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */ +static inline bool +dp_packet_hwol_l4_is_tcp(const struct dp_packet *b) +{ + return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_TCP_CKSUM; +} + +/* Returns 'true' if packet 'b' is marked for UDP checksum offloading. */ +static inline bool +dp_packet_hwol_l4_is_udp(struct dp_packet *b) +{ + return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_UDP_CKSUM; +} + +/* Returns 'true' if packet 'b' is marked for SCTP checksum offloading. */ +static inline bool +dp_packet_hwol_l4_is_sctp(struct dp_packet *b) +{ + return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK) == + DP_PACKET_OL_TX_SCTP_CKSUM; +} + +/* Mark packet 'b' for IPv4 checksum offloading. */ +static inline void +dp_packet_hwol_set_tx_ipv4(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4; +} + +/* Mark packet 'b' for IPv6 checksum offloading. */ +static inline void +dp_packet_hwol_set_tx_ipv6(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6; +} + +/* Mark packet 'b' for TCP checksum offloading. It implies that either + * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */ +static inline void +dp_packet_hwol_set_csum_tcp(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_CKSUM; +} + +/* Mark packet 'b' for UDP checksum offloading. It implies that either + * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */ +static inline void +dp_packet_hwol_set_csum_udp(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_CKSUM; +} + +/* Mark packet 'b' for SCTP checksum offloading. It implies that either + * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */ +static inline void +dp_packet_hwol_set_csum_sctp(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_SCTP_CKSUM; +} + +/* Mark packet 'b' for TCP segmentation offloading. It implies that + * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading + * and also for TCP checksum offloading. */ +static inline void +dp_packet_hwol_set_tcp_seg(struct dp_packet *b) +{ + *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG; +} + +static inline bool +dp_packet_ip_checksum_valid(const struct dp_packet *p) +{ + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) == + DP_PACKET_OL_RX_IP_CKSUM_GOOD; +} + +static inline bool +dp_packet_ip_checksum_bad(const struct dp_packet *p) +{ + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) == + DP_PACKET_OL_RX_IP_CKSUM_BAD; +} + +static inline bool +dp_packet_l4_checksum_valid(const struct dp_packet *p) +{ + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_GOOD; +} + +static inline bool +dp_packet_l4_checksum_bad(const struct dp_packet *p) +{ + return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) == + DP_PACKET_OL_RX_L4_CKSUM_BAD; +} + #ifdef __cplusplus } #endif diff --git a/lib/userspace-tso.c b/lib/userspace-tso.c index 6a4a0149b..f843c2a76 100644 --- a/lib/userspace-tso.c +++ b/lib/userspace-tso.c @@ -34,13 +34,8 @@ userspace_tso_init(const struct smap *ovs_other_config) static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { -#ifdef DPDK_NETDEV VLOG_INFO("Userspace TCP Segmentation Offloading support enabled"); userspace_tso = true; -#else - VLOG_WARN("Userspace TCP Segmentation Offloading can not be enabled" - "since OVS is built without DPDK support."); -#endif ovsthread_once_done(&once); } } diff --git a/tests/.gitignore b/tests/.gitignore index 99fdf70d5..45b4f67b2 100644 --- a/tests/.gitignore +++ b/tests/.gitignore @@ -24,6 +24,9 @@ /system-userspace-testsuite /system-userspace-testsuite.dir/ /system-userspace-testsuite.log +/system-tso-testsuite +/system-tso-testsuite.dir/ +/system-tso-testsuite.log /system-offloads-testsuite /system-offloads-testsuite.dir/ /system-offloads-testsuite.log diff --git a/tests/automake.mk b/tests/automake.mk index 81eb2a9b8..66859d537 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -4,6 +4,7 @@ EXTRA_DIST += \ $(SYSTEM_TESTSUITE_AT) \ $(SYSTEM_KMOD_TESTSUITE_AT) \ $(SYSTEM_USERSPACE_TESTSUITE_AT) \ + $(SYSTEM_TSO_TESTSUITE_AT) \ $(SYSTEM_AFXDP_TESTSUITE_AT) \ $(SYSTEM_OFFLOADS_TESTSUITE_AT) \ $(SYSTEM_DPDK_TESTSUITE_AT) \ @@ -11,6 +12,7 @@ EXTRA_DIST += \ $(TESTSUITE) \ $(SYSTEM_KMOD_TESTSUITE) \ $(SYSTEM_USERSPACE_TESTSUITE) \ + $(SYSTEM_TSO_TESTSUITE) \ $(SYSTEM_AFXDP_TESTSUITE) \ $(SYSTEM_OFFLOADS_TESTSUITE) \ $(SYSTEM_DPDK_TESTSUITE) \ @@ -154,6 +156,10 @@ SYSTEM_USERSPACE_TESTSUITE_AT = \ tests/system-userspace-macros.at \ tests/system-userspace-packet-type-aware.at +SYSTEM_TSO_TESTSUITE_AT = \ + tests/system-tso-testsuite.at \ + tests/system-tso-macros.at + SYSTEM_AFXDP_TESTSUITE_AT = \ tests/system-userspace-macros.at \ tests/system-afxdp-testsuite.at \ @@ -183,6 +189,7 @@ TESTSUITE = $(srcdir)/tests/testsuite TESTSUITE_PATCH = $(srcdir)/tests/testsuite.patch SYSTEM_KMOD_TESTSUITE = $(srcdir)/tests/system-kmod-testsuite SYSTEM_USERSPACE_TESTSUITE = $(srcdir)/tests/system-userspace-testsuite +SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite @@ -296,6 +303,12 @@ check-offloads-valgrind: all $(valgrind_wrappers) $(check_DATA) @echo '----------------------------------------------------------------------' @echo 'Valgrind output can be found in tests/system-offloads-testsuite.dir/*/valgrind.*' @echo '----------------------------------------------------------------------' +check-tso-valgrind: all $(valgrind_wrappers) $(check_DATA) + $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests VALGRIND='$(VALGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) -j1 + @echo + @echo '----------------------------------------------------------------------' + @echo 'Valgrind output can be found in tests/system-tso-testsuite.dir/*/valgrind.*' + @echo '----------------------------------------------------------------------' check-helgrind: all $(valgrind_wrappers) $(check_DATA) -$(SHELL) '$(TESTSUITE)' -C tests CHECK_VALGRIND=true VALGRIND='$(HELGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) @@ -326,6 +339,10 @@ check-system-userspace: all set $(SHELL) '$(SYSTEM_USERSPACE_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) +check-system-tso: all + set $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \ + "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) + check-afxdp: all set $(SHELL) '$(SYSTEM_AFXDP_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \ "$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck) @@ -367,6 +384,10 @@ $(SYSTEM_USERSPACE_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_USERSP $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at $(AM_V_at)mv $@.tmp $@ +$(SYSTEM_TSO_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_TSO_TESTSUITE_AT) $(COMMON_MACROS_AT) + $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at + $(AM_V_at)mv $@.tmp $@ + $(SYSTEM_AFXDP_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_AFXDP_TESTSUITE_AT) $(COMMON_MACROS_AT) $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at $(AM_V_at)mv $@.tmp $@ diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at new file mode 100644 index 000000000..406334f3e --- /dev/null +++ b/tests/system-tso-macros.at @@ -0,0 +1,31 @@ +# _ADD_BR([name]) +# +# Expands into the proper ovs-vsctl commands to create a bridge with the +# appropriate type and properties +m4_define([_ADD_BR], [[add-br $1 -- set Bridge $1 datapath_type="netdev" protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13,OpenFlow14,OpenFlow15 fail-mode=secure ]]) + +# OVS_TRAFFIC_VSWITCHD_START([vsctl-args], [vsctl-output], [=override]) +# +# Creates a database and starts ovsdb-server, starts ovs-vswitchd +# connected to that database, calls ovs-vsctl to create a bridge named +# br0 with predictable settings, passing 'vsctl-args' as additional +# commands to ovs-vsctl. If 'vsctl-args' causes ovs-vsctl to provide +# output (e.g. because it includes "create" commands) then 'vsctl-output' +# specifies the expected output after filtering through uuidfilt. +m4_define([OVS_TRAFFIC_VSWITCHD_START], + [ + OVS_WAIT_WHILE([ip link show ovs-netdev]) + _OVS_VSWITCHD_START([--disable-system]) + dnl Add bridges, ports, etc. + OVS_WAIT_WHILE([ip link show br0]) + AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true]) + AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2]) +]) + +# CONFIGURE_VETH_OFFLOADS([VETH]) +# +# Enable TCP segmentation offload and scatter-gather for veths. +m4_define([CONFIGURE_VETH_OFFLOADS], + [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])] + [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])] +) diff --git a/tests/system-tso-testsuite.at b/tests/system-tso-testsuite.at new file mode 100644 index 000000000..99d748006 --- /dev/null +++ b/tests/system-tso-testsuite.at @@ -0,0 +1,26 @@ +AT_INIT + +AT_COPYRIGHT([Copyright (c) 2020 VMware, Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at: + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License.]) + +m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) + +m4_include([tests/ovs-macros.at]) +m4_include([tests/ovsdb-macros.at]) +m4_include([tests/ofproto-macros.at]) +m4_include([tests/system-common-macros.at]) +m4_include([tests/system-userspace-macros.at]) +m4_include([tests/system-tso-macros.at]) + +m4_include([tests/system-traffic.at]) -- GitLab From 5f01a9019f0d5ad18bd7e2e2093299ad48895ea5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 24 Mar 2020 15:10:51 -0700 Subject: [PATCH 130/432] tests: Add tests using tap device. Similar to using veth across namespaces, this patch creates tap devices, assigns to namespaces, and allows traffic to go through different test cases. Acked-by: Flavio Leitner Signed-off-by: William Tu --- tests/automake.mk | 1 + tests/system-tap.at | 34 ++++++++++++++++++++++++++++++++++ tests/system-tso-testsuite.at | 1 + 3 files changed, 36 insertions(+) create mode 100644 tests/system-tap.at diff --git a/tests/automake.mk b/tests/automake.mk index 66859d537..cbba5b170 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -158,6 +158,7 @@ SYSTEM_USERSPACE_TESTSUITE_AT = \ SYSTEM_TSO_TESTSUITE_AT = \ tests/system-tso-testsuite.at \ + tests/system-tap.at \ tests/system-tso-macros.at SYSTEM_AFXDP_TESTSUITE_AT = \ diff --git a/tests/system-tap.at b/tests/system-tap.at new file mode 100644 index 000000000..871a3bda4 --- /dev/null +++ b/tests/system-tap.at @@ -0,0 +1,34 @@ +AT_SETUP([traffic between namespaces using tap]) +AT_KEYWORDS([http_tap]) +OVS_TRAFFIC_VSWITCHD_START() +AT_SKIP_IF([test $HAVE_TUNCTL = no]) + +AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +AT_CHECK([ip tuntap add tap0 mode tap]) +on_exit 'ip tuntap del tap0 mode tap' +AT_CHECK([ip tuntap add tap1 mode tap]) +on_exit 'ip tuntap del tap1 mode tap' + +AT_CHECK([ovs-vsctl add-port br0 tap0 -- set int tap0 type=tap]) +AT_CHECK([ovs-vsctl add-port br0 tap1 -- set int tap1 type=tap]) +AT_CHECK([ip link set tap0 netns at_ns0]) +AT_CHECK([ip link set tap1 netns at_ns1]) + +AT_CHECK([ip netns exec at_ns0 ip link set dev tap0 up]) +AT_CHECK([ip netns exec at_ns1 ip link set dev tap1 up]) +AT_CHECK([ip netns exec at_ns0 ip addr add 10.1.1.1/24 dev tap0]) +AT_CHECK([ip netns exec at_ns1 ip addr add 10.1.1.2/24 dev tap1]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +OVS_START_L7([at_ns1], [http]) +NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log]) + +OVS_TRAFFIC_VSWITCHD_STOP(["/.*ethtool command ETHTOOL_G.*/d"]) + +AT_CLEANUP diff --git a/tests/system-tso-testsuite.at b/tests/system-tso-testsuite.at index 99d748006..594d1a6fd 100644 --- a/tests/system-tso-testsuite.at +++ b/tests/system-tso-testsuite.at @@ -23,4 +23,5 @@ m4_include([tests/system-common-macros.at]) m4_include([tests/system-userspace-macros.at]) m4_include([tests/system-tso-macros.at]) +m4_include([tests/system-tap.at]) m4_include([tests/system-traffic.at]) -- GitLab From 1740aaf49dad6f533705dc3dce8d955a1840052a Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 13 May 2020 13:11:17 -0700 Subject: [PATCH 131/432] metaflow: Fix maskable conntrack orig tuple fields From man ovs-fields(7), the conntrack origin tuple fields ct_nw_src/dst, ct_ipv6_src/dst, and ct_tp_src/dst are supposed to be bitwise maskable, but they are not. This patch enables those fields to be maskable, and adds a regression test. Fixes: daf4d3c18da4 ("odp: Support conntrack orig tuple key.") Reported-by: Wenying Dong Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- lib/meta-flow.c | 30 ++++++++++++++++++----- tests/ofproto-dpif.at | 56 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 9ab82460b..c808d205d 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -2328,12 +2328,6 @@ mf_set(const struct mf_field *mf, switch (mf->id) { case MFF_CT_ZONE: case MFF_CT_NW_PROTO: - case MFF_CT_NW_SRC: - case MFF_CT_NW_DST: - case MFF_CT_IPV6_SRC: - case MFF_CT_IPV6_DST: - case MFF_CT_TP_SRC: - case MFF_CT_TP_DST: case MFF_RECIRC_ID: case MFF_PACKET_TYPE: case MFF_CONJ_ID: @@ -2457,6 +2451,30 @@ mf_set(const struct mf_field *mf, ntoh128(mask->be128)); break; + case MFF_CT_NW_SRC: + match_set_ct_nw_src_masked(match, value->be32, mask->be32); + break; + + case MFF_CT_NW_DST: + match_set_ct_nw_dst_masked(match, value->be32, mask->be32); + break; + + case MFF_CT_IPV6_SRC: + match_set_ct_ipv6_src_masked(match, &value->ipv6, &mask->ipv6); + break; + + case MFF_CT_IPV6_DST: + match_set_ct_ipv6_dst_masked(match, &value->ipv6, &mask->ipv6); + break; + + case MFF_CT_TP_SRC: + match_set_ct_tp_src_masked(match, value->be16, mask->be16); + break; + + case MFF_CT_TP_DST: + match_set_ct_tp_dst_masked(match, value->be16, mask->be16); + break; + case MFF_ETH_DST: match_set_dl_dst_masked(match, value->mac, mask->mac); break; diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index d444cf084..41164d735 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -10570,6 +10570,62 @@ udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10. OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif - conntrack - match masked ct fields]) +OVS_VSWITCHD_START + +add_of_ports br0 1 2 + +AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info]) + +dnl Allow new connections on p1->p2. Allow only established connections p2->p1 +AT_DATA([flows.txt], [dnl +table=0,arp,action=normal +table=0,ip,in_port=1,udp,nw_src=10.1.2.1/24,action=ct(commit) +table=0,ip,in_port=1,udp6,ipv6_dst=2001:db8::1/64,action=ct(commit) +table=0,ip,in_port=1,udp,tp_src=3/0x1,action=ct(commit) +table=0,ip,in_port=2,actions=ct(table=1) +table=0,ip6,in_port=2,actions=ct(table=1) +table=1,priority=10,udp,ct_state=+trk+rpl,ct_nw_src=10.1.2.1/24,actions=controller +table=1,priority=10,udp6,ct_state=+trk+rpl,ct_ipv6_dst=2001:db8::1/64,actions=controller +table=1,priority=10,udp,ct_state=+trk+rpl,ct_tp_src=3/0x1,actions=controller +table=1,priority=1,action=drop +]) + +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) + +AT_CAPTURE_FILE([ofctl_monitor.log]) +AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log]) + +dnl Match ct_nw_src=10.1.2.1/24 +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.2.100,dst=10.1.2.200,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.2.200,dst=10.1.2.100,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)']) + +dnl Match ct_ipv6_dst=2001:db8::1/64 +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=2001:db8::2,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=1,dst=2)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x86dd),ipv6(src=2001:db8::2,dst=2001:db8::1,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=2,dst=1)']) + +dnl Match ct_tp_src=3/0x1 +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1,dst=2)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=2,dst=1)']) + +OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6]) +OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit]) + +dnl Check this output. +AT_CHECK([cat ofctl_monitor.log], [0], [dnl +NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.2.100,ct_nw_dst=10.1.2.200,ct_nw_proto=17,ct_tp_src=6,ct_tp_dst=6,ip,in_port=2 (via action) data_len=106 (unbuffered) +udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.2.200,nw_dst=10.1.2.100,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=6,tp_dst=6 udp_csum:221 +dnl +NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=126 ct_state=est|rpl|trk,ct_ipv6_src=2001:db8::1,ct_ipv6_dst=2001:db8::2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ipv6,in_port=2 (via action) data_len=126 (unbuffered) +udp6,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,ipv6_src=2001:db8::2,ipv6_dst=2001:db8::1,ipv6_label=0x00000,nw_tos=112,nw_ecn=0,nw_ttl=128,tp_src=2,tp_dst=1 udp_csum:bfe2 +dnl +NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ip,in_port=2 (via action) data_len=106 (unbuffered) +udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:553 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto-dpif - conntrack - ofproto/trace]) OVS_VSWITCHD_START -- GitLab From fae687c85ef56d5d3d680aa23587fcc083ddeb7f Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 12 May 2020 08:22:31 -0700 Subject: [PATCH 132/432] oss-fuzz: Fix miniflow_target.c. Clang reports: tests/oss-fuzz/miniflow_target.c:209:26: error: suggest braces around \ initialization of subobject [-Werror,-Wmissing-braces] struct flow flow2 = {0}; Fix it by using memset. Cc: Bhargava Shastry Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- tests/oss-fuzz/miniflow_target.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/oss-fuzz/miniflow_target.c b/tests/oss-fuzz/miniflow_target.c index 393443061..50b8b0e64 100644 --- a/tests/oss-fuzz/miniflow_target.c +++ b/tests/oss-fuzz/miniflow_target.c @@ -206,8 +206,9 @@ test_minimask_combine(struct flow *flow) struct minimask minicombined; uint64_t storage[FLOW_U64S]; } m; - struct flow flow2 = {0}; + struct flow flow2; + memset(&flow2, 0, sizeof flow2); mask.masks = *flow; minimask = minimask_create(&mask); -- GitLab From 3c18bb0fe9f23308061217f72e2245f0e311b20b Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Thu, 14 May 2020 16:25:10 +0300 Subject: [PATCH 133/432] debian: Fix package dependencies In python2 package was python-twisted-conch but it looks like for python3 it's just python3-twisted. For zope interface the python3 package name is python3-zope.interface. Fixes: 1ca0323e7c29 ("Require Python 3 and remove support for Python 2.") Signed-off-by: Roi Dayan Acked-by: Ansis Atteka --- debian/control | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index a50e97249..e47767d75 100644 --- a/debian/control +++ b/debian/control @@ -14,8 +14,8 @@ Build-Depends: graphviz, openssl, procps, python3-all, - python3-twisted-conch, - python3-zopeinterface, + python3-twisted, + python3-zope.interface, libunbound-dev, libunwind-dev Standards-Version: 3.9.3 -- GitLab From c36bba351ba7af8d48501dfa062a772f357536ba Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 14 May 2020 20:20:56 +0200 Subject: [PATCH 134/432] ofproto: Fix statistics of removed flow. 'fr' is a new variable on the stack. '+=' here adds the real statistics to a random stack memory. Fixes: 164413156cf9 ("Add offload packets statistics") Acked-by: Roi Dayan Signed-off-by: Ilya Maximets --- ofproto/ofproto.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 0fbd6c380..59f06aa94 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -6085,8 +6085,8 @@ ofproto_rule_send_removed(struct rule *rule) fr.hard_timeout = rule->hard_timeout; ovs_mutex_unlock(&rule->mutex); rule->ofproto->ofproto_class->rule_get_stats(rule, &stats, &used); - fr.packet_count += stats.n_packets; - fr.byte_count += stats.n_bytes; + fr.packet_count = stats.n_packets; + fr.byte_count = stats.n_bytes; connmgr_send_flow_removed(connmgr, &fr); ovs_mutex_unlock(&ofproto_mutex); } -- GitLab From fe175ac17352ceb2dbc9958112b4b1bc114d82f0 Mon Sep 17 00:00:00 2001 From: Ansis Atteka Date: Fri, 15 May 2020 12:08:13 -0700 Subject: [PATCH 135/432] debian: Add python3-sphinx to ovs build dependencies python3-sphinx has become mandatory build dependency since patch 39b5e46 ("Documentation: Convert multiple manpages to ReST."), because, otherwise, without this dependency installed, packaging of OVS debian packages fails with an error that generated man pages can't be found. Fixes: 39b5e46312 ("Documentation: Convert multiple manpages to ReST.") CC: Ben Pfaff Signed-off-by: Ansis Atteka Reported-by: Artem Teleshev Acked-by: Greg Rose --- debian/control | 1 + 1 file changed, 1 insertion(+) diff --git a/debian/control b/debian/control index e47767d75..0646b22a1 100644 --- a/debian/control +++ b/debian/control @@ -14,6 +14,7 @@ Build-Depends: graphviz, openssl, procps, python3-all, + python3-sphinx, python3-twisted, python3-zope.interface, libunbound-dev, -- GitLab From 7a076a53716394742d0ae44652451501ae17335d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Fri, 15 May 2020 16:36:18 -0400 Subject: [PATCH 136/432] netdev-linux: Update LAG in all cases. In some cases, when processing a netlink change event, it's possible for an alternate part of OvS (like the IPv6 endpoint processing) to hold an active netdev interface. This creates a race-condition, where sometimes the OvS change processing will take the normal path. This doesn't work because the netdev device object won't actually be enslaved to the ovs-system (for instance, a linux bond) and ingress qdisc entries will be missing. To address this, we update the LAG information in ALL cases where LAG information could come in. Fixes: d22f8927c3c9 ("netdev-linux: monitor and offload LAG slaves to TC") Cc: Marcelo Leitner Cc: John Hurley Acked-by: Roi Dayan Signed-off-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/netdev-linux.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index b52071e92..6269c24ac 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -655,10 +655,6 @@ netdev_linux_update_lag(struct rtnetlink_change *change) { struct linux_lag_slave *lag; - if (!rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { - return; - } - if (change->slave && netdev_linux_kind_is_lag(change->slave)) { lag = shash_find_data(&lag_shash, change->ifname); @@ -756,8 +752,11 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) netdev_linux_update(netdev, nsid, &change); ovs_mutex_unlock(&netdev->mutex); } - else if (!netdev_ && change.ifname) { - /* Netdev is not present in OvS but its master could be. */ + + if (change.ifname && + rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) { + + /* Need to try updating the LAG information. */ ovs_mutex_lock(&lag_mutex); netdev_linux_update_lag(&change); ovs_mutex_unlock(&lag_mutex); -- GitLab From 8508a57228560e154963c542823d36d8098e6610 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Fri, 15 May 2020 16:36:19 -0400 Subject: [PATCH 137/432] netdev-offload-tc: Re-fetch block ID after probing. It's possible that block_id could changes after the probe for block support. Therefore, fetch the block_id again after the probe. Fixes: edc2055a2bf7 ("netdev-offload-tc: Flush rules on ingress block when init tc flow api") Cc: Dmytro Linkin Acked-by: Roi Dayan Co-authored-by: Marcelo Leitner Signed-off-by: Marcelo Leitner Signed-off-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/netdev-offload-tc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 875ebef71..e188e63e5 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1931,6 +1931,8 @@ netdev_tc_init_flow_api(struct netdev *netdev) if (ovsthread_once_start(&block_once)) { probe_tc_block_support(ifindex); + /* Need to re-fetch block id as it depends on feature availability. */ + block_id = get_block_id_from_netdev(netdev); ovsthread_once_done(&block_once); } -- GitLab From dff2a6e97c9fefcb6b07b4ca9611cf188477d601 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 19 May 2020 15:01:46 -0700 Subject: [PATCH 138/432] Documentation: Fix kernel support matrix The documentation matrix for OVS branches and which kernels they support is out of date. Update it to show that since 2.10 the lowest kernel that we test and support is Linux 3.16. RHEL and CentOS kernels based upon the original 3.10 kernel are still supported. Reported-by: Han Zhou Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-May/370742.html Acked-by: Han Zhou Signed-off-by: Greg Rose Signed-off-by: William Tu --- Documentation/faq/releases.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 3903e5922..e5cef3915 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -67,10 +67,10 @@ Q: What Linux kernel versions does each Open vSwitch release work with? 2.7.x 3.10 to 4.9 2.8.x 3.10 to 4.12 2.9.x 3.10 to 4.13 - 2.10.x 3.10 to 4.17 - 2.11.x 3.10 to 4.18 - 2.12.x 3.10 to 5.0 - 2.14.x 3.10 to 5.5 + 2.10.x 3.16 to 4.17 + 2.11.x 3.16 to 4.18 + 2.12.x 3.16 to 5.0 + 2.14.x 3.16 to 5.5 ============ ============== Open vSwitch userspace should also work with the Linux kernel module built @@ -79,6 +79,10 @@ Q: What Linux kernel versions does each Open vSwitch release work with? Open vSwitch userspace is not sensitive to the Linux kernel version. It should build against almost any kernel, certainly against 2.6.32 and later. + Open vSwitch branches 2.10 through 2.14 will still compile against the + RHEL and CentOS 7 3.10 based kernels since they have diverged from the + Linux kernel.org 3.10 kernels. + Q: Are all features available with all datapaths? A: Open vSwitch supports different datapaths on different platforms. Each -- GitLab From c6e9348ed488ea88d8945b96d35433093d2b835a Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 14 May 2020 07:02:43 -0700 Subject: [PATCH 139/432] ovs-bugtool: Add -m option to dump-flows. This patch adds 'ovs-appctl dpctl/dump-flows -m' to bugtool, the output will include wildcarded fields and the miniflow bits, such as 'dp-extra-info:miniflow_bits(4,1)'. Cc: Emma Finn Acked-by: Greg Rose Signed-off-by: William Tu --- utilities/bugtool/plugins/network-status/openvswitch.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml b/utilities/bugtool/plugins/network-status/openvswitch.xml index e6fa4fd15..56e091feb 100644 --- a/utilities/bugtool/plugins/network-status/openvswitch.xml +++ b/utilities/bugtool/plugins/network-status/openvswitch.xml @@ -32,6 +32,7 @@ /usr/share/openvswitch/scripts/ovs-bugtool-ovs-appctl-dpif ovs-appctl -t ovsdb-server ovsdb-server/list-dbs ovs-appctl dpctl/dump-flows netdev@ovs-netdev + ovs-appctl dpctl/dump-flows -m netdev@ovs-netdev ovs-appctl dpctl/dump-flows system@ovs-system ovs-appctl dpctl/show -s /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "show" -- GitLab From 68bc6f88a3a36549fcd3b6248c25c5e2e6deb8f3 Mon Sep 17 00:00:00 2001 From: William Tu Date: Fri, 15 May 2020 06:46:55 -0700 Subject: [PATCH 140/432] ovsdb-idl: Fix NULL deref reported by Coverity. When 'datum.values' or 'datum.keys' is NULL, some code path calling into ovsdb_idl_txn_write__ triggers NULL deref. An example: ovsrec_open_vswitch_set_cur_cfg(const struct ovsrec_open_vswitch { struct ovsdb_datum datum; union ovsdb_atom key; datum.n = 1; datum.keys = &key; key.integer = cur_cfg; // 1. assign_zero: Assigning: datum.values = NULL. datum.values = NULL; // CID 1421356 (#1 of 1): Explicit null dereferenced (FORWARD_NULL) // 2. var_deref_model: Passing &datum to ovsdb_idl_txn_write_clone,\ // which dereferences null datum.values. ovsdb_idl_txn_write_clone(&row->header_, &ovsrec_open_vswitch_col } And with the following calls: ovsdb_idl_txn_write_clone ovsdb_idl_txn_write__ 6. deref_parm_in_call: Function ovsdb_datum_destroy dereferences datum->values ovsdb_datum_destroy Reviewed-by: Yifeng Sun Signed-off-by: William Tu --- lib/ovsdb-idl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 1535ad7b5..6614ea161 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -4449,7 +4449,8 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, * transaction only does writes of existing values, without making any real * changes, we will drop the whole transaction later in * ovsdb_idl_txn_commit().) */ - if (write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column), + if (datum->keys && datum->values && + write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column), datum, &column->type)) { goto discard_datum; } -- GitLab From 3822ea067f18d784c92adf72880548e9b8a7197d Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 21 May 2020 14:16:48 -0700 Subject: [PATCH 141/432] netdev-vport: Fix typo in log message. Acked-by: Greg Rose Signed-off-by: Ben Pfaff --- lib/netdev-vport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 8efd1eee8..0252b61de 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -754,7 +754,7 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) enum tunnel_layers layers = tunnel_supported_layers(type, &tnl_cfg); const char *full_type = (strcmp(type, "vxlan") ? type : (tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GPE) - ? "VXLAN-GPE" : "VXLAN (without GPE")); + ? "VXLAN-GPE" : "VXLAN (without GPE)")); const char *packet_type = smap_get(args, "packet_type"); if (!packet_type) { tnl_cfg.pt_mode = default_pt_mode(layers); -- GitLab From 33f9c873b19a4993183e0c29a76a114646ca2977 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 21 May 2020 14:54:03 -0700 Subject: [PATCH 142/432] compat: Backport ipv6_stub change A patch backported to the Linux stable 4.14 tree and present in the latest stable 4.14.181 kernel breaks ipv6_stub usage. The commit is 8ab8786f78c3 ("net ipv6_stub: use ip6_dst_lookup_flow instead of ip6_dst_lookup"). Create the compat layer define to check for it and fixup usage in vxlan and geneve modules. Passes Travis here: https://travis-ci.org/github/gvrose8192/ovs-experimental/builds/689798733 Signed-off-by: Greg Rose Signed-off-by: William Tu --- acinclude.m4 | 2 ++ datapath/linux/compat/geneve.c | 11 ++++++++++- datapath/linux/compat/vxlan.c | 18 +++++++++++++++++- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index dabbffd01..3b0eea020 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -587,6 +587,8 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/net/ip6_fib.h], [rt6_get_cookie], [OVS_DEFINE([HAVE_RT6_GET_COOKIE])]) + OVS_FIND_FIELD_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_stub], + [dst_entry]) OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup.*net], [OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_NET])]) OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow.*net], diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c index 7bfc6d882..02c6403e6 100644 --- a/datapath/linux/compat/geneve.c +++ b/datapath/linux/compat/geneve.c @@ -962,7 +962,16 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, return dst; } -#if defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) +#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW) +#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET + dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6, + NULL); +#else + dst = ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, fl6, + NULL); +#endif + if (IS_ERR(dst)) { +#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) if (ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, &dst, fl6)) { #elif defined(HAVE_IPV6_DST_LOOKUP_FLOW) diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c index b334870b7..e65d955e9 100644 --- a/datapath/linux/compat/vxlan.c +++ b/datapath/linux/compat/vxlan.c @@ -967,7 +967,10 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, bool use_cache = (dst_cache && ip_tunnel_dst_cache_usable(skb, info)); struct dst_entry *ndst; struct flowi6 fl6; +#if !defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) || \ + !defined(HAVE_IPV6_DST_LOOKUP_FLOW) int err; +#endif if (!sock6) return ERR_PTR(-EIO); @@ -990,7 +993,15 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, fl6.fl6_dport = dport; fl6.fl6_sport = sport; -#if defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) +#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW) +#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET + ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, + &fl6, NULL); +#else + ndst = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &fl6, NULL); +#endif + if (unlikely(IS_ERR(ndst))) { +#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET) err = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk, &ndst, &fl6); #elif defined(HAVE_IPV6_DST_LOOKUP_FLOW) @@ -1004,8 +1015,13 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, #else err = ip6_dst_lookup(vxlan->vn6_sock->sock->sk, &ndst, &fl6); #endif +#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW) + return ERR_PTR(-ENETUNREACH); + } +#else if (err < 0) return ERR_PTR(err); +#endif *saddr = fl6.saddr; if (use_cache) -- GitLab From 3423cd97f88fe6a8de8b649d79fe6ac83bce94d1 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 22 May 2020 18:31:19 +0200 Subject: [PATCH 143/432] ovsdb: Add raft memory usage to memory report. Memory reports could be found in logs or by calling 'memory/show' appctl command. For ovsdb-server it includes information about db cells, monitor connections with their backlog size, etc. But it doesn't contain any information about memory consumed by raft. Backlogs of raft connections could be insanely large because of snapshot installation requests that simply contains the whole database. In not that healthy clusters where one of ovsdb servers is not able to timely handle all the incoming raft traffic, backlog on a sender's side could cause significant memory consumption issues. Adding new 'raft-connections' and 'raft-backlog' counters to the memory report to better track such conditions. Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/ovsdb.c | 4 ++++ ovsdb/raft.c | 16 ++++++++++++++++ ovsdb/raft.h | 2 ++ ovsdb/storage.c | 10 ++++++++++ ovsdb/storage.h | 3 +++ 5 files changed, 35 insertions(+) diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index 7e683e681..2da117cb3 100644 --- a/ovsdb/ovsdb.c +++ b/ovsdb/ovsdb.c @@ -502,6 +502,10 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage) } simap_increase(usage, "cells", cells); + + if (db->storage) { + ovsdb_storage_get_memory_usage(db->storage, usage); + } } struct ovsdb_table * diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 18f29973e..515eadab3 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -36,6 +36,7 @@ #include "ovsdb/log.h" #include "raft-rpc.h" #include "random.h" +#include "simap.h" #include "socket-util.h" #include "stream.h" #include "timeval.h" @@ -1014,6 +1015,21 @@ raft_get_sid(const struct raft *raft) return &raft->sid; } +/* Adds memory consumption info to 'usage' for later use by memory_report(). */ +void +raft_get_memory_usage(const struct raft *raft, struct simap *usage) +{ + struct raft_conn *conn; + int cnt = 0; + + LIST_FOR_EACH (conn, list_node, &raft->conns) { + simap_increase(usage, "raft-backlog", + jsonrpc_session_get_backlog(conn->js)); + cnt++; + } + simap_increase(usage, "raft-connections", cnt); +} + /* Returns true if 'raft' has completed joining its cluster, has not left or * initiated leaving the cluster, does not have failed disk storage, and is * apparently connected to the leader in a healthy way (or is itself the diff --git a/ovsdb/raft.h b/ovsdb/raft.h index 3d448995a..99d5307e5 100644 --- a/ovsdb/raft.h +++ b/ovsdb/raft.h @@ -67,6 +67,7 @@ struct json; struct ovsdb_log; struct raft; +struct simap; struct sset; #define RAFT_MAGIC "CLUSTER" @@ -113,6 +114,7 @@ const struct uuid *raft_get_cid(const struct raft *); const struct uuid *raft_get_sid(const struct raft *); bool raft_is_connected(const struct raft *); bool raft_is_leader(const struct raft *); +void raft_get_memory_usage(const struct raft *, struct simap *usage); /* Joining a cluster. */ bool raft_is_joining(const struct raft *); diff --git a/ovsdb/storage.c b/ovsdb/storage.c index e26252b06..7b4ad16f6 100644 --- a/ovsdb/storage.c +++ b/ovsdb/storage.c @@ -26,6 +26,7 @@ #include "ovsdb.h" #include "raft.h" #include "random.h" +#include "simap.h" #include "timeval.h" #include "util.h" @@ -188,6 +189,15 @@ ovsdb_storage_get_applied_index(const struct ovsdb_storage *storage) return storage->raft ? raft_get_applied_index(storage->raft) : 0; } +void +ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage, + struct simap *usage) +{ + if (storage->raft) { + raft_get_memory_usage(storage->raft, usage); + } +} + void ovsdb_storage_run(struct ovsdb_storage *storage) { diff --git a/ovsdb/storage.h b/ovsdb/storage.h index 8a9bbab70..a22396891 100644 --- a/ovsdb/storage.h +++ b/ovsdb/storage.h @@ -23,6 +23,7 @@ struct json; struct ovsdb_schema; struct ovsdb_storage; +struct simap; struct uuid; struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw, @@ -39,6 +40,8 @@ bool ovsdb_storage_is_leader(const struct ovsdb_storage *); const struct uuid *ovsdb_storage_get_cid(const struct ovsdb_storage *); const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *); uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *); +void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *, + struct simap *usage); void ovsdb_storage_run(struct ovsdb_storage *); void ovsdb_storage_wait(struct ovsdb_storage *); -- GitLab From ede446678f5da3ef4be5e3f6de0702e62342b833 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 27 May 2020 12:11:32 -0700 Subject: [PATCH 144/432] pvector: Document that multiple elements with a given priority are allowed. Acked-by: Greg Rose Signed-off-by: Ben Pfaff --- lib/pvector.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/pvector.h b/lib/pvector.h index 0d3290dc3..6da8c5b63 100644 --- a/lib/pvector.h +++ b/lib/pvector.h @@ -26,10 +26,12 @@ /* Concurrent Priority Vector * ========================== * - * Concurrent priority vector holds non-NULL pointers to objects in an - * increasing priority order and allows readers to traverse the vector without - * being concerned about writers modifying the vector as they are traversing - * it. + * Concurrent priority vector holds non-NULL pointers to objects in a + * nondecreasing priority order and allows readers to traverse the vector + * without being concerned about writers modifying the vector as they are + * traversing it. + * + * Multiple elements of a given priority are allowed. * * The priority order is maintained as a linear vector of elements to allow * for efficient memory prefetching. -- GitLab From 16e3a80cf646f6c53d22ef98599d5aecb8310414 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 14 May 2020 22:10:45 +0200 Subject: [PATCH 145/432] ovsdb-server: Fix schema leak while reading db. parse_txn() function doesn't always take ownership of the 'schema' passed. So, if the schema of the clustered db has same version as the one that already in use, parse_txn() will not use it, resulting with a memory leak: 7,827 (56 direct, 7,771 indirect) bytes in 1 blocks are definitely lost at 0x483BB1A: calloc (vg_replace_malloc.c:762) by 0x44AD02: xcalloc (util.c:121) by 0x40E70E: ovsdb_schema_create (ovsdb.c:41) by 0x40EA6D: ovsdb_schema_from_json (ovsdb.c:217) by 0x415EDD: ovsdb_storage_read (storage.c:280) by 0x408968: read_db (ovsdb-server.c:607) by 0x40733D: main_loop (ovsdb-server.c:227) by 0x40733D: main (ovsdb-server.c:469) While we could put ovsdb_schema_destroy() in a few places inside 'parse_txn()', from the users' point of view it seems better to have a constant argument and just clone the 'schema' if needed. The caller will be responsible for destroying the 'schema' it owns. Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/ovsdb-server.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index d416f1b60..ef4e996df 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -540,7 +540,7 @@ close_db(struct server_config *config, struct db *db, char *comment) static struct ovsdb_error * OVS_WARN_UNUSED_RESULT parse_txn(struct server_config *config, struct db *db, - struct ovsdb_schema *schema, const struct json *txn_json, + const struct ovsdb_schema *schema, const struct json *txn_json, const struct uuid *txnid) { if (schema && (!db->db->schema || strcmp(schema->version, @@ -565,7 +565,7 @@ parse_txn(struct server_config *config, struct db *db, ? xasprintf("database %s schema changed", db->db->name) : xasprintf("database %s connected to storage", db->db->name))); - ovsdb_replace(db->db, ovsdb_create(schema, NULL)); + ovsdb_replace(db->db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); /* Force update to schema in _Server database. */ db->row_uuid = UUID_ZERO; @@ -614,6 +614,7 @@ read_db(struct server_config *config, struct db *db) } else { error = parse_txn(config, db, schema, txn_json, &txnid); json_destroy(txn_json); + ovsdb_schema_destroy(schema); if (error) { break; } -- GitLab From 8c2c503bdb0da1ce6044a53d462f905fd4f8acf5 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 22 May 2020 22:36:27 +0200 Subject: [PATCH 146/432] raft: Avoid sending equal snapshots. Snapshots are huge. In some cases we could receive several outdated append replies from the remote server. This could happen in high scale cases if the remote server is overloaded and not able to process all the raft requests in time. As an action to each outdated append reply we're sending full database snapshot. While remote server is already overloaded those snapshots will stuck in jsonrpc backlog for a long time making it grow up to few GB. Since remote server wasn't able to timely process incoming messages it will likely not able to process snapshots leading to the same situation with low chances to recover. Remote server will likely stuck in 'candidate' state, other servers will grow their memory consumption due to growing jsonrpc backlogs: jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:192.16.0.3:6644, num of msgs: 3795, backlog: 8838994624. This patch is trying to avoid that situation by avoiding sending of equal snapshot install requests. This helps maintain reasonable memory consumption and allows the cluster to recover on a larger scale. Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/raft-private.c | 1 + ovsdb/raft-private.h | 4 ++++ ovsdb/raft.c | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c index 26d39a087..9468fdaf4 100644 --- a/ovsdb/raft-private.c +++ b/ovsdb/raft-private.c @@ -137,6 +137,7 @@ raft_server_destroy(struct raft_server *s) if (s) { free(s->address); free(s->nickname); + free(s->last_install_snapshot_request); free(s); } } diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h index ac8656d42..1f366b4ab 100644 --- a/ovsdb/raft-private.h +++ b/ovsdb/raft-private.h @@ -27,6 +27,7 @@ struct ds; struct ovsdb_parser; +struct raft_install_snapshot_request; /* Formatting server IDs and cluster IDs for use in human-readable logs. Do * not use these in cases where the whole server or cluster ID is needed; use @@ -83,6 +84,9 @@ struct raft_server { bool replied; /* Reply to append_request was received from this node during current election_timeout interval. */ + /* Copy of the last install_snapshot_request sent to this server. */ + struct raft_install_snapshot_request *last_install_snapshot_request; + /* For use in adding and removing servers: */ struct uuid requester_sid; /* Nonzero if requested via RPC. */ struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */ diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 515eadab3..708b0624c 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -1421,8 +1421,20 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn) jsonrpc_session_run(conn->js); unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js); - bool just_connected = (new_seqno != conn->js_seqno + bool reconnected = new_seqno != conn->js_seqno; + bool just_connected = (reconnected && jsonrpc_session_is_connected(conn->js)); + + if (reconnected) { + /* Clear 'last_install_snapshot_request' since it might not reach the + * destination or server was restarted. */ + struct raft_server *server = raft_find_server(raft, &conn->sid); + if (server) { + free(server->last_install_snapshot_request); + server->last_install_snapshot_request = NULL; + } + } + conn->js_seqno = new_seqno; if (just_connected) { if (raft->joining) { @@ -3296,6 +3308,31 @@ raft_send_install_snapshot_request(struct raft *raft, .election_timer = raft->election_timer, /* use latest value */ } }; + + if (s->last_install_snapshot_request) { + struct raft_install_snapshot_request *old, *new; + + old = s->last_install_snapshot_request; + new = &rpc.install_snapshot_request; + if ( old->term == new->term + && old->last_index == new->last_index + && old->last_term == new->last_term + && old->last_servers == new->last_servers + && old->data == new->data + && old->election_timer == new->election_timer + && uuid_equals(&old->last_eid, &new->last_eid)) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + + VLOG_WARN_RL(&rl, "not sending exact same install_snapshot_request" + " to server %s again", s->nickname); + return; + } + } + free(s->last_install_snapshot_request); + CONST_CAST(struct raft_server *, s)->last_install_snapshot_request + = xmemdup(&rpc.install_snapshot_request, + sizeof rpc.install_snapshot_request); + raft_send(raft, &rpc); } -- GitLab From a6117059904bb692039c926221964dd6d49b3bfd Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Wed, 27 May 2020 11:13:34 +0900 Subject: [PATCH 147/432] classifier: Prevent tries vs n_tries race leading to NULL dereference. Currently classifier tries and n_tries can be updated not atomically, there is a race condition which can lead to NULL dereference. The race can happen when main thread updates a classifier tries and n_tries in classifier_set_prefix_fields() and at the same time revalidator or handler thread try to lookup them in classifier_lookup__(). Such race can be triggered when user changes prefixes of flow_table. Race(user changes flow_table prefixes: ip_dst,ip_src => none): [main thread] [revalidator/handler thread] =========================================================== /* cls->n_tries == 2 */ for (int i = 0; i < cls->n_tries; i++) { trie_init(cls, i, NULL); /* n_tries == 0 */ cls->n_tries = n_tries; /* cls->tries[i]->feild is NULL */ trie_ctx_init(&trie_ctx[i],&cls->tries[i]); /* trie->field is NULL */ ctx->be32ofs = trie->field->flow_be32ofs; To prevent the race, instead of re-introducing internal mutex implemented in the commit fccd7c092e09 ("classifier: Remove internal mutex."), this patch makes trie field RCU protected and checks it after read. Fixes: fccd7c092e09 ("classifier: Remove internal mutex.") Signed-off-by: Eiichi Tsukata Signed-off-by: Ilya Maximets --- lib/classifier.c | 45 +++++++++++++++++++++++++---------------- lib/classifier.h | 6 ++++-- tests/test-classifier.c | 5 +++-- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/lib/classifier.c b/lib/classifier.c index 0fad95321..f2c3497c2 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls, bitmap_set1(fields.bm, trie_fields[i]); new_fields[n_tries] = NULL; - if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) { + const struct mf_field *cls_field + = ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field); + if (n_tries >= cls->n_tries || field != cls_field) { new_fields[n_tries] = field; changed = true; } @@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field) } else { ovsrcu_set_hidden(&trie->root, NULL); } - trie->field = field; + ovsrcu_set_hidden(&trie->field, CONST_CAST(struct mf_field *, field)); /* Add existing rules to the new trie. */ CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { @@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls, struct trie_ctx { const struct cls_trie *trie; bool lookup_done; /* Status of the lookup. */ - uint8_t be32ofs; /* U32 offset of the field in question. */ unsigned int maskbits; /* Prefix length needed to avoid false matches. */ union trie_prefix match_plens; /* Bitmask of prefix lengths with possible * matches. */ @@ -849,7 +850,6 @@ static void trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) { ctx->trie = trie; - ctx->be32ofs = trie->field->flow_be32ofs; ctx->lookup_done = false; } @@ -1531,8 +1531,10 @@ insert_subtable(struct classifier *cls, const struct minimask *mask) *CONST_CAST(uint8_t *, &subtable->n_indices) = index; for (i = 0; i < cls->n_tries; i++) { - subtable->trie_plen[i] = minimask_get_prefix_len(mask, - cls->tries[i].field); + const struct mf_field *field + = ovsrcu_get(struct mf_field *, &cls->tries[i].field); + subtable->trie_plen[i] + = field ? minimask_get_prefix_len(mask, field) : 0; } /* Ports trie. */ @@ -1575,11 +1577,17 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, * fields using the prefix tries. The trie checks are done only as * needed to avoid folding in additional bits to the wildcards mask. */ for (j = 0; j < n_tries; j++) { - /* Is the trie field relevant for this subtable, and - is the trie field within the current range of fields? */ - if (field_plen[j] && - flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) { + /* Is the trie field relevant for this subtable? */ + if (field_plen[j]) { struct trie_ctx *ctx = &trie_ctx[j]; + const struct mf_field *ctx_field + = ovsrcu_get(struct mf_field *, &ctx->trie->field); + + /* Is the trie field within the current range of fields? */ + if (!ctx_field + || !flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) { + continue; + } /* On-demand trie lookup. */ if (!ctx->lookup_done) { @@ -1601,14 +1609,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries, * than this subtable would otherwise. */ if (ctx->maskbits <= field_plen[j]) { /* Unwildcard the bits and skip the rest. */ - mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits); + mask_set_prefix_bits(wc, ctx_field->flow_be32ofs, + ctx->maskbits); /* Note: Prerequisite already unwildcarded, as the only * prerequisite of the supported trie lookup fields is * the ethertype, which is always unwildcarded. */ return true; } /* Can skip if the field is already unwildcarded. */ - if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) { + if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs, + ctx->maskbits)) { return true; } } @@ -2001,12 +2011,12 @@ static unsigned int trie_lookup(const struct cls_trie *trie, const struct flow *flow, union trie_prefix *plens) { - const struct mf_field *mf = trie->field; + const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field); /* Check that current flow matches the prerequisites for the trie * field. Some match fields are used for multiple purposes, so we * must check that the trie is relevant for this flow. */ - if (mf_are_prereqs_ok(mf, flow, NULL)) { + if (mf && mf_are_prereqs_ok(mf, flow, NULL)) { return trie_lookup_value(&trie->root, &((ovs_be32 *)flow)[mf->flow_be32ofs], &plens->be32, mf->n_bits); @@ -2053,8 +2063,9 @@ minimask_get_prefix_len(const struct minimask *minimask, * happened to be zeros. */ static const ovs_be32 * -minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf) +minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field) { + struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field); size_t u64_ofs = mf->flow_be32ofs / 2; return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs) @@ -2068,7 +2079,7 @@ static void trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) { trie_insert_prefix(&trie->root, - minimatch_get_prefix(&rule->match, trie->field), mlen); + minimatch_get_prefix(&rule->match, &trie->field), mlen); } static void @@ -2123,7 +2134,7 @@ static void trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) { trie_remove_prefix(&trie->root, - minimatch_get_prefix(&rule->match, trie->field), mlen); + minimatch_get_prefix(&rule->match, &trie->field), mlen); } /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask diff --git a/lib/classifier.h b/lib/classifier.h index d1bd4aa12..f646a8f74 100644 --- a/lib/classifier.h +++ b/lib/classifier.h @@ -314,13 +314,15 @@ extern "C" { struct cls_subtable; struct cls_match; +struct mf_field; +typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr; struct trie_node; typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr; /* Prefix trie for a 'field' */ struct cls_trie { - const struct mf_field *field; /* Trie field, or NULL. */ - rcu_trie_ptr root; /* NULL if none. */ + rcu_field_ptr field; /* Trie field, or NULL. */ + rcu_trie_ptr root; /* NULL if none. */ }; enum { diff --git a/tests/test-classifier.c b/tests/test-classifier.c index 6d53d016d..2d98fad48 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -512,8 +512,9 @@ verify_tries(struct classifier *cls) int i; for (i = 0; i < cls->n_tries; i++) { - n_rules += trie_verify(&cls->tries[i].root, 0, - cls->tries[i].field->n_bits); + const struct mf_field * cls_field + = ovsrcu_get(struct mf_field *, &cls->tries[i].field); + n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits); } assert(n_rules <= cls->n_rules); } -- GitLab From 21ad0088bedbd9b4070f4c5e3e6b29c7d9349beb Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 25 May 2020 14:09:42 +0200 Subject: [PATCH 148/432] AUTHORS: Add Eiichi Tsukata. Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 3d805412d..3f7eee54f 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -128,6 +128,7 @@ Ed Maste emaste@freebsd.org Ed Swierk eswierk@skyportsystems.com Edouard Bourguignon madko@linuxed.net Eelco Chaudron echaudro@redhat.com +Eiichi Tsukata eiichi.tsukata@nutanix.com Eli Britstein elibr@mellanox.com Emma Finn emma.finn@intel.com Eric Lapointe elapointe@corsa.com -- GitLab From 9ba57fc7cccca85a753bc3d5c12271defb5619c1 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Mon, 25 May 2020 22:35:44 -0700 Subject: [PATCH 149/432] datapath: Add hash info to upcall. This patch backports below upstream patches, and add __skb_set_hash to compat for older kernels. commit b5ab1f1be6180a2e975eede18731804b5164a05d Author: Jakub Kicinski Date: Mon Mar 2 21:05:18 2020 -0800 openvswitch: add missing attribute validation for hash Add missing attribute validation for OVS_PACKET_ATTR_HASH to the netlink policy. Fixes: bd1903b7c459 ("net: openvswitch: add hash info to upcall") Signed-off-by: Jakub Kicinski Reviewed-by: Greg Rose Signed-off-by: David S. Miller commit bd1903b7c4596ba6f7677d0dfefd05ba5876707d Author: Tonghao Zhang Date: Wed Nov 13 23:04:49 2019 +0800 net: openvswitch: add hash info to upcall When using the kernel datapath, the upcall don't include skb hash info relatived. That will introduce some problem, because the hash of skb is important in kernel stack. For example, VXLAN module uses it to select UDP src port. The tx queue selection may also use the hash in stack. Hash is computed in different ways. Hash is random for a TCP socket, and hash may be computed in hardware, or software stack. Recalculation hash is not easy. Hash of TCP socket is computed: tcp_v4_connect -> sk_set_txhash (is random) __tcp_transmit_skb -> skb_set_hash_from_sk There will be one upcall, without information of skb hash, to ovs-vswitchd, for the first packet of a TCP session. The rest packets will be processed in Open vSwitch modules, hash kept. If this tcp session is forward to VXLAN module, then the UDP src port of first tcp packet is different from rest packets. TCP packets may come from the host or dockers, to Open vSwitch. To fix it, we store the hash info to upcall, and restore hash when packets sent back. +---------------+ +-------------------------+ | Docker/VMs | | ovs-vswitchd | +----+----------+ +-+--------------------+--+ | ^ | | | | | | upcall v restore packet hash (not recalculate) | +-+--------------------+--+ | tap netdev | | vxlan module +---------------> +--> Open vSwitch ko +--> or internal type | | +-------------------------+ Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2019-October/364062.html Signed-off-by: Tonghao Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Tested-by: Aliasgar Ginwala Acked-by: Tonghao Zhang Signed-off-by: Han Zhou Signed-off-by: Ilya Maximets --- acinclude.m4 | 4 +++ datapath/datapath.c | 33 +++++++++++++++++++- datapath/datapath.h | 12 +++++++ datapath/linux/compat/include/linux/skbuff.h | 31 ++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) diff --git a/acinclude.m4 b/acinclude.m4 index 3b0eea020..8847b8145 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1103,6 +1103,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_FIND_OP_PARAM_IFELSE([$KSRC/include/net/rtnetlink.h], [validate], [extack], [OVS_DEFINE([HAVE_RTNLOP_VALIDATE_WITH_EXTACK])]) + OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], + [__skb_set_hash]) + OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [sw_hash]) + OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_get_hash_raw]) if cmp -s datapath/linux/kcompat.h.new \ datapath/linux/kcompat.h >/dev/null 2>&1; then diff --git a/datapath/datapath.c b/datapath/datapath.c index a7af7849a..05c1e4274 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -371,7 +371,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */ - + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */ + + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */ + + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */ /* OVS_PACKET_ATTR_USERDATA */ if (upcall_info->userdata) @@ -414,6 +415,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, size_t len; unsigned int hlen; int err, dp_ifindex; + u64 hash; dp_ifindex = get_dpifindex(dp); if (!dp_ifindex) @@ -523,6 +525,25 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, pad_packet(dp, user_skb); } + /* Add OVS_PACKET_ATTR_HASH */ + hash = skb_get_hash_raw(skb); +#ifdef HAVE_SW_HASH + if (skb->sw_hash) + hash |= OVS_PACKET_HASH_SW_BIT; +#endif + +#ifdef HAVE_L4_RXHASH + if (skb->l4_rxhash) +#else + if (skb->l4_hash) +#endif + hash |= OVS_PACKET_HASH_L4_BIT; + + if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { + err = -ENOBUFS; + goto out; + } + /* Only reserve room for attribute header, packet data is added * in skb_zerocopy() */ @@ -563,6 +584,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *input_vport; u16 mru = 0; + u64 hash; int len; int err; bool log = !a[OVS_PACKET_ATTR_PROBE]; @@ -588,6 +610,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) } OVS_CB(packet)->mru = mru; + if (a[OVS_PACKET_ATTR_HASH]) { + hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]); + + __skb_set_hash(packet, hash & 0xFFFFFFFFULL, + !!(hash & OVS_PACKET_HASH_SW_BIT), + !!(hash & OVS_PACKET_HASH_L4_BIT)); + } + /* Build an sw_flow for sending this packet. */ flow = ovs_flow_alloc(); err = PTR_ERR(flow); @@ -649,6 +679,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = { [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG }, [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 }, + [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 }, }; static struct genl_ops dp_packet_genl_ops[] = { diff --git a/datapath/datapath.h b/datapath/datapath.h index 3bffa1dcb..f99db1fde 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -159,6 +159,18 @@ struct ovs_net { #endif }; +/** + * enum ovs_pkt_hash_types - hash info to include with a packet + * to send to userspace. + * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack. + * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash + * over transport ports. + */ +enum ovs_pkt_hash_types { + OVS_PACKET_HASH_SW_BIT = (1ULL << 32), + OVS_PACKET_HASH_L4_BIT = (1ULL << 33), +}; + extern unsigned int ovs_net_id; void ovs_lock(void); void ovs_unlock(void); diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index 63972891b..6d248b3ed 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -456,4 +456,35 @@ static inline void skb_set_inner_ipproto(struct sk_buff *skb, #define nf_reset_ct nf_reset #endif +#ifndef HAVE___SKB_SET_HASH +static inline void +__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4) +{ +#ifdef HAVE_RXHASH + skb->rxhash = hash; +#else + skb->hash = hash; +#endif +#if defined(HAVE_L4_RXHASH) + skb->l4_rxhash = is_l4; +#else + skb->l4_hash = is_l4; +#endif +#ifdef HAVE_SW_HASH + skb->sw_hash = is_sw; +#endif +} +#endif + +#ifndef HAVE_SKB_GET_HASH_RAW +static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) +{ +#ifdef HAVE_RXHASH + return skb->rxhash; +#else + return skb->hash; +#endif +} +#endif + #endif -- GitLab From 89b522aee379f7ebd21ec67ffb622118af7e9db1 Mon Sep 17 00:00:00 2001 From: Mark Michelson Date: Fri, 1 May 2020 15:13:08 -0400 Subject: [PATCH 150/432] ovsdb-idl: Add function to reset min_index. If an administrator removes all of the databases in a cluster from disk, then ovsdb IDL clients will have a problem. The databases will all reset their stored indexes to 0, so The IDL client's min_index will be higher than the indexes of all databases in the cluster. This results in the client constantly connecting to databases, detecting the data as "stale", and then attempting to connect to another. This function provides a way to reset the IDL to an initial state with min_index of 0. This way, the client will not wrongly detect the database data as stale and will recover properly. Notice that this function is not actually used anywhere in this patch. This will be used by OVN, though, since OVN is the primary user of clustered OVSDB. Signed-off-by: Mark Michelson Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 10 ++++++++++ lib/ovsdb-idl.h | 1 + 2 files changed, 11 insertions(+) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 6614ea161..f54e360e3 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -553,6 +553,16 @@ ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *idl, bool shuffle) idl->shuffle_remotes = shuffle; } +/* Reset min_index to 0. This prevents a situation where the client + * thinks all databases have stale data, when they actually have all + * been destroyed and rebuilt from scratch. + */ +void +ovsdb_idl_reset_min_index(struct ovsdb_idl *idl) +{ + idl->min_index = 0; +} + static void ovsdb_idl_db_destroy(struct ovsdb_idl_db *db) { diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index 9f12ce320..c56cd19b1 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -64,6 +64,7 @@ struct ovsdb_idl *ovsdb_idl_create_unconnected( const struct ovsdb_idl_class *, bool monitor_everything_by_default); void ovsdb_idl_set_remote(struct ovsdb_idl *, const char *, bool); void ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *, bool); +void ovsdb_idl_reset_min_index(struct ovsdb_idl *); void ovsdb_idl_destroy(struct ovsdb_idl *); void ovsdb_idl_set_leader_only(struct ovsdb_idl *, bool leader_only); -- GitLab From ace0310f3fcb3fa17378711972f9696f7d667c63 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 25 May 2020 18:21:39 +0200 Subject: [PATCH 151/432] ovsdb: Fix timeout type for wait operation. According to RFC 7047, 'timeout' is an integer field: 5.2.6. Wait The "wait" object contains the following members: "op": "wait" required "timeout": optional ... For some reason initial implementation treated it as a real number. This causes a build issue with clang that complains that LLONG_MAX could not be represented as double: ovsdb/execution.c:733:32: error: implicit conversion from 'long long' to 'double' changes value from 9223372036854775807 to 9223372036854775808 timeout_msec = MIN(LLONG_MAX, json_real(timeout)); ~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /usr/include/sys/limits.h:69:19: note: expanded from macro 'LLONG_MAX' #define LLONG_MAX __LLONG_MAX /* max for a long long */ ^~~~~~~~~~~ /usr/include/x86/_limits.h:74:21: note: expanded from macro '__LLONG_MAX' #define __LLONG_MAX 0x7fffffffffffffffLL /* max value for a long long */ ^~~~~~~~~~~~~~~~~~~~ ./lib/util.h:90:21: note: expanded from macro 'MIN' #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) ^ ~ Fix that by changing parser to treat 'timeout' as integer. Fixes clang build on FreeBSD 12.1 in CirrusCI. Fixes: f85f8ebbfac9 ("Initial implementation of OVSDB.") Acked-by: Han Zhou Acked-by: Numan Siddique Signed-off-by: Ilya Maximets --- ovsdb/execution.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ovsdb/execution.c b/ovsdb/execution.c index e45f3d679..3a0dad5d0 100644 --- a/ovsdb/execution.c +++ b/ovsdb/execution.c @@ -712,7 +712,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, long long int timeout_msec = 0; size_t i; - timeout = ovsdb_parser_member(parser, "timeout", OP_NUMBER | OP_OPTIONAL); + timeout = ovsdb_parser_member(parser, "timeout", OP_INTEGER | OP_OPTIONAL); where = ovsdb_parser_member(parser, "where", OP_ARRAY); columns_json = ovsdb_parser_member(parser, "columns", OP_ARRAY | OP_OPTIONAL); @@ -730,7 +730,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser, } if (!error) { if (timeout) { - timeout_msec = MIN(LLONG_MAX, json_real(timeout)); + timeout_msec = json_integer(timeout); if (timeout_msec < 0) { error = ovsdb_syntax_error(timeout, NULL, "timeout must be nonnegative"); -- GitLab From e61984e781e6c7d621568428788cb87c11be8f1f Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 2 Jun 2020 21:50:22 +0800 Subject: [PATCH 152/432] dpif-netlink: Generate ufids for installing TC flowers To support installing the TC flowers to HW, via "ovs-appctl dpctl/add-flow" command, there should be an ufid. This patch will check whether ufid exists, if not, generate an ufid. Should to know that when processing upcall packets, ufid is generated in parse_odp_packet for kernel datapath. Configuring the max-idle/max-revalidator, may help testing this patch. Signed-off-by: Tonghao Zhang Acked-by: Roi Dayan Signed-off-by: Simon Horman --- lib/dpif-netlink.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index dc642100f..a19ed7e53 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2231,12 +2231,55 @@ dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops, } } +static void +dpif_netlink_try_update_ufid__(struct dpif_op *op, ovs_u128 *ufid) +{ + switch (op->type) { + case DPIF_OP_FLOW_PUT: + if (!op->flow_put.ufid) { + odp_flow_key_hash(op->flow_put.key, op->flow_put.key_len, + ufid); + op->flow_put.ufid = ufid; + } + break; + case DPIF_OP_FLOW_DEL: + if (!op->flow_del.ufid) { + odp_flow_key_hash(op->flow_del.key, op->flow_del.key_len, + ufid); + op->flow_del.ufid = ufid; + } + break; + case DPIF_OP_FLOW_GET: + if (!op->flow_get.ufid) { + odp_flow_key_hash(op->flow_get.key, op->flow_get.key_len, + ufid); + op->flow_get.ufid = ufid; + } + break; + case DPIF_OP_EXECUTE: + default: + break; + } +} + +static void +dpif_netlink_try_update_ufid(struct dpif_op **ops, ovs_u128 *ufid, + size_t n_ops) +{ + int i; + + for (i = 0; i < n_ops; i++) { + dpif_netlink_try_update_ufid__(ops[i], &ufid[i]); + } +} + static void dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops, enum dpif_offload_type offload_type) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_op *new_ops[OPERATE_MAX_OPS]; + ovs_u128 ufids[OPERATE_MAX_OPS]; int count = 0; int i = 0; int err = 0; @@ -2246,6 +2289,8 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops, return; } + dpif_netlink_try_update_ufid(ops, ufids, n_ops); + if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) { while (n_ops > 0) { count = 0; -- GitLab From 4f4be08e4713941285ecb71555c9a64c46cdb8ae Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 2 Jun 2020 21:50:23 +0800 Subject: [PATCH 153/432] netdev-offload-tc: Use ipv6_addr_is_set instead of is_all_zeros Not bugfix, make the codes more readable. Signed-off-by: Tonghao Zhang Acked-by: Roi Dayan Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 6 ++---- lib/tc.c | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index e188e63e5..ba97ae9cf 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -734,13 +734,11 @@ parse_tc_flower_to_match(struct tc_flower *flower, nl_msg_put_be32(buf, OVS_TUNNEL_KEY_ATTR_IPV4_DST, action->encap.ipv4.ipv4_dst); } - if (!is_all_zeros(&action->encap.ipv6.ipv6_src, - sizeof action->encap.ipv6.ipv6_src)) { + if (ipv6_addr_is_set(&action->encap.ipv6.ipv6_src)) { nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &action->encap.ipv6.ipv6_src); } - if (!is_all_zeros(&action->encap.ipv6.ipv6_dst, - sizeof action->encap.ipv6.ipv6_dst)) { + if (ipv6_addr_is_set(&action->encap.ipv6.ipv6_dst)) { nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &action->encap.ipv6.ipv6_dst); } diff --git a/lib/tc.c b/lib/tc.c index 12af0192b..a6297445c 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -2038,7 +2038,7 @@ nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, bool id_present, if (ipv4_dst) { nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src); nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst); - } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) { + } else if (ipv6_addr_is_set(ipv6_dst)) { nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_DST, ipv6_dst); nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_SRC, @@ -2135,12 +2135,10 @@ nl_msg_put_act_ct(struct ofpbuf *request, struct tc_action *action) action->ct.range.ipv4.max); } } else if (action->ct.range.ip_family == AF_INET6) { - size_t ipv6_sz = sizeof(action->ct.range.ipv6.max); nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MIN, &action->ct.range.ipv6.min); - if (!is_all_zeros(&action->ct.range.ipv6.max, - ipv6_sz)) { + if (ipv6_addr_is_set(&action->ct.range.ipv6.max)) { nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MAX, &action->ct.range.ipv6.max); } -- GitLab From 5f568d049130fb481e41fdf9a290b6d1e3a7f23a Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 2 Jun 2020 21:50:24 +0800 Subject: [PATCH 154/432] netdev-offload-tc: Allow to match the IP and port mask of tunnel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch allows users to offload the TC flower rules with tunnel mask. This patch allows masked match of the following, where previously supported an exact match was supported: * Remote (dst) tunnel endpoint address * Local (src) tunnel endpoint address * Remote (dst) tunnel endpoint UDP port And also allows masked match of the following, where previously no match was supported: * Local (src) tunnel endpoint UDP port In some case, mask is useful as wildcards. For example, DDOS, in that case, we don’t want to allow specified hosts IPs or only source Ports to access the targeted host. For example: $ ovs-appctl dpctl/add-flow "tunnel(dst=2.2.2.100,src=2.2.2.0/255.255.255.0,tp_dst=4789),\ recirc_id(0),in_port(3),eth(),eth_type(0x0800),ipv4()" "" $ tc filter show dev vxlan_sys_4789 ingress ... eth_type ipv4 enc_dst_ip 2.2.2.100 enc_src_ip 2.2.2.0/24 enc_dst_port 4789 enc_ttl 64 in_hw in_hw_count 2 action order 1: gact action drop ... Signed-off-by: Tonghao Zhang Acked-by: Roi Dayan Signed-off-by: Simon Horman --- NEWS | 5 ++++ include/openvswitch/match.h | 3 ++ lib/match.c | 13 +++++++++ lib/netdev-offload-tc.c | 38 +++++++++++++++++++------ lib/tc.c | 57 +++++++++++++++++++++++++++++++++---- tests/tunnel.at | 22 ++++++++++++++ 6 files changed, 123 insertions(+), 15 deletions(-) diff --git a/NEWS b/NEWS index 3dbd8ec0e..88b273a0a 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,11 @@ Post-v2.13.0 by enabling interrupt mode. - Userspace datapath: * Add support for conntrack zone-based timeout policy. + - Tunnels: TC Flower offload + * Tunnel Local endpoint address masked match are supported. + * Tunnel Romte endpoint address masked match are supported. + * Tunnel Local endpoint ports masked match are supported. + * Tunnel Romte endpoint ports masked match are supported. v2.13.0 - 14 Feb 2020 diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h index 8af3b74ed..3b196c7fa 100644 --- a/include/openvswitch/match.h +++ b/include/openvswitch/match.h @@ -105,6 +105,9 @@ void match_set_tun_flags(struct match *match, uint16_t flags); void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t mask); void match_set_tun_tp_dst(struct match *match, ovs_be16 tp_dst); void match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 mask); +void match_set_tun_tp_src(struct match *match, ovs_be16 tp_src); +void match_set_tun_tp_src_masked(struct match *match, + ovs_be16 port, ovs_be16 mask); void match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, ovs_be16 mask); void match_set_tun_gbp_id(struct match *match, ovs_be16 gbp_id); void match_set_tun_gbp_flags_masked(struct match *match, uint8_t flags, uint8_t mask); diff --git a/lib/match.c b/lib/match.c index 25c277cc6..29b25a73b 100644 --- a/lib/match.c +++ b/lib/match.c @@ -293,6 +293,19 @@ match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 mask) match->flow.tunnel.tp_dst = port & mask; } +void +match_set_tun_tp_src(struct match *match, ovs_be16 tp_src) +{ + match_set_tun_tp_src_masked(match, tp_src, OVS_BE16_MAX); +} + +void +match_set_tun_tp_src_masked(struct match *match, ovs_be16 port, ovs_be16 mask) +{ + match->wc.masks.tunnel.tp_src = mask; + match->flow.tunnel.tp_src = port & mask; +} + void match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, ovs_be16 mask) { diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index ba97ae9cf..fcb331c25 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -633,13 +633,20 @@ parse_tc_flower_to_match(struct tc_flower *flower, match_set_tun_id(match, flower->key.tunnel.id); match->flow.tunnel.flags |= FLOW_TNL_F_KEY; } - if (flower->key.tunnel.ipv4.ipv4_dst) { - match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src); - match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst); - } else if (!is_all_zeros(&flower->key.tunnel.ipv6.ipv6_dst, - sizeof flower->key.tunnel.ipv6.ipv6_dst)) { - match_set_tun_ipv6_src(match, &flower->key.tunnel.ipv6.ipv6_src); - match_set_tun_ipv6_dst(match, &flower->key.tunnel.ipv6.ipv6_dst); + if (flower->mask.tunnel.ipv4.ipv4_dst) { + match_set_tun_dst_masked(match, + flower->key.tunnel.ipv4.ipv4_dst, + flower->mask.tunnel.ipv4.ipv4_dst); + match_set_tun_src_masked(match, + flower->key.tunnel.ipv4.ipv4_src, + flower->mask.tunnel.ipv4.ipv4_src); + } else if (ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_dst)) { + match_set_tun_ipv6_dst_masked(match, + &flower->key.tunnel.ipv6.ipv6_dst, + &flower->mask.tunnel.ipv6.ipv6_dst); + match_set_tun_ipv6_src_masked(match, + &flower->key.tunnel.ipv6.ipv6_src, + &flower->mask.tunnel.ipv6.ipv6_src); } if (flower->key.tunnel.tos) { match_set_tun_tos_masked(match, flower->key.tunnel.tos, @@ -649,8 +656,15 @@ parse_tc_flower_to_match(struct tc_flower *flower, match_set_tun_ttl_masked(match, flower->key.tunnel.ttl, flower->mask.tunnel.ttl); } - if (flower->key.tunnel.tp_dst) { - match_set_tun_tp_dst(match, flower->key.tunnel.tp_dst); + if (flower->mask.tunnel.tp_dst) { + match_set_tun_tp_dst_masked(match, + flower->key.tunnel.tp_dst, + flower->mask.tunnel.tp_dst); + } + if (flower->mask.tunnel.tp_src) { + match_set_tun_tp_src_masked(match, + flower->key.tunnel.tp_src, + flower->mask.tunnel.tp_src); } if (flower->key.tunnel.metadata.present.len) { flower_tun_opt_to_match(match, flower); @@ -1402,8 +1416,14 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, flower.key.tunnel.ttl = tnl->ip_ttl; flower.key.tunnel.tp_src = tnl->tp_src; flower.key.tunnel.tp_dst = tnl->tp_dst; + flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src; + flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst; + flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src; + flower.mask.tunnel.ipv6.ipv6_dst = tnl_mask->ipv6_dst; flower.mask.tunnel.tos = tnl_mask->ip_tos; flower.mask.tunnel.ttl = tnl_mask->ip_ttl; + flower.mask.tunnel.tp_src = tnl_mask->tp_src; + flower.mask.tunnel.tp_dst = tnl_mask->tp_dst; flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? tnl_mask->tun_id : 0; flower_match_to_tun_opt(&flower, tnl, tnl_mask); flower.tunnel = true; diff --git a/lib/tc.c b/lib/tc.c index a6297445c..ac5ecc2b7 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -372,6 +372,12 @@ static const struct nl_policy tca_flower_policy[] = { .optional = true, }, [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_U16, .optional = true, }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NL_A_U16, + .optional = true, }, + [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NL_A_U16, + .optional = true, }, + [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NL_A_U16, + .optional = true, }, [TCA_FLOWER_KEY_FLAGS] = { .type = NL_A_BE32, .optional = true, }, [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NL_A_BE32, .optional = true, }, [TCA_FLOWER_KEY_IP_TTL] = { .type = NL_A_U8, @@ -650,22 +656,38 @@ nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower) flower->mask.tunnel.id = OVS_BE64_MAX; } if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]) { + flower->mask.tunnel.ipv4.ipv4_src = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]); flower->key.tunnel.ipv4.ipv4_src = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]); } if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]) { + flower->mask.tunnel.ipv4.ipv4_dst = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]); flower->key.tunnel.ipv4.ipv4_dst = nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]); } if (attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]) { + flower->mask.tunnel.ipv6.ipv6_src = + nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]); flower->key.tunnel.ipv6.ipv6_src = nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC]); } if (attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]) { + flower->mask.tunnel.ipv6.ipv6_dst = + nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]); flower->key.tunnel.ipv6.ipv6_dst = nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST]); } - if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) { + if (attrs[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]) { + flower->mask.tunnel.tp_src = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]); + flower->key.tunnel.tp_src = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]); + } + if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]) { + flower->mask.tunnel.tp_dst = + nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]); flower->key.tunnel.tp_dst = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]); } @@ -2592,11 +2614,18 @@ nl_msg_put_flower_tunnel_opts(struct ofpbuf *request, uint16_t type, static void nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) { + ovs_be32 ipv4_src_mask = flower->mask.tunnel.ipv4.ipv4_src; + ovs_be32 ipv4_dst_mask = flower->mask.tunnel.ipv4.ipv4_dst; ovs_be32 ipv4_src = flower->key.tunnel.ipv4.ipv4_src; ovs_be32 ipv4_dst = flower->key.tunnel.ipv4.ipv4_dst; + struct in6_addr *ipv6_src_mask = &flower->mask.tunnel.ipv6.ipv6_src; + struct in6_addr *ipv6_dst_mask = &flower->mask.tunnel.ipv6.ipv6_dst; struct in6_addr *ipv6_src = &flower->key.tunnel.ipv6.ipv6_src; struct in6_addr *ipv6_dst = &flower->key.tunnel.ipv6.ipv6_dst; + ovs_be16 tp_dst_mask = flower->mask.tunnel.tp_dst; + ovs_be16 tp_src_mask = flower->mask.tunnel.tp_src; ovs_be16 tp_dst = flower->key.tunnel.tp_dst; + ovs_be16 tp_src = flower->key.tunnel.tp_src; ovs_be32 id = be64_to_be32(flower->key.tunnel.id); uint8_t tos = flower->key.tunnel.tos; uint8_t ttl = flower->key.tunnel.ttl; @@ -2604,12 +2633,21 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) uint8_t ttl_mask = flower->mask.tunnel.ttl; ovs_be64 id_mask = flower->mask.tunnel.id; - if (ipv4_dst) { - nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src); + if (ipv4_dst_mask || ipv4_src_mask) { + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + ipv4_dst_mask); + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + ipv4_src_mask); nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst); - } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) { - nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src); + nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src); + } else if (ipv6_addr_is_set(ipv6_dst_mask) || + ipv6_addr_is_set(ipv6_src_mask)) { + nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + ipv6_dst_mask); + nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + ipv6_src_mask); nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST, ipv6_dst); + nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src); } if (tos_mask) { nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TOS, tos); @@ -2619,9 +2657,16 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TTL, ttl); nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TTL_MASK, ttl_mask); } - if (tp_dst) { + if (tp_dst_mask) { + nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, + tp_dst_mask); nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst); } + if (tp_src_mask) { + nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, + tp_src_mask); + nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, tp_src); + } if (id_mask) { nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id); } diff --git a/tests/tunnel.at b/tests/tunnel.at index b3764aed8..a74a67aa8 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -110,6 +110,28 @@ Datapath actions: drop OVS_VSWITCHD_STOP(["/dropping tunnel packet marked ECN CE but is not ECN capable/d"]) AT_CLEANUP +AT_SETUP([tunnel - input with matching tunnel mask]) +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ + options:remote_ip=1.1.1.1 \ + ofport_request=1 \ + -- add-port br0 p2 -- set Interface p2 type=dummy \ + ofport_request=2]) + +AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl + br0 65534/100: (dummy-internal) + p1 1/1: (gre: remote_ip=1.1.1.1) + p2 2/2: (dummy) +]) + +AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1/0xf,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) + +AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl +tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1/0xf,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel - output]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 options:local_ip=2.2.2.2 \ -- GitLab From 3f82ac1fe36d6d8ad9b21750e7d878394f031147 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 2 Jun 2020 21:50:25 +0800 Subject: [PATCH 155/432] netdev-offload-tc: Expand tunnel source IPs masked match To support more use case, for example, DDOS, which packets should be dropped in hardware, this patch allows users to match only the tunnel source IPs with masked value. $ ovs-appctl dpctl/add-flow "tunnel(src=2.2.2.0/255.255.255.0,tp_dst=4789,ttl=64),\ recirc_id(2),in_port(3),eth(),eth_type(0x0800),ipv4()" "" $ ovs-appctl dpctl/dump-flows tunnel(src=2.2.2.0/255.255.255.0,ttl=64,tp_dst=4789) ... actions:drop $ tc filter show dev vxlan_sys_4789 ingress ... eth_type ipv4 enc_src_ip 2.2.2.0/24 enc_dst_port 4789 enc_ttl 64 in_hw in_hw_count 2 action order 1: gact action drop ... Signed-off-by: Tonghao Zhang Acked-by: Roi Dayan Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 9 ++++++--- lib/odp-util.c | 3 ++- lib/packets.h | 6 ++++++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index fcb331c25..8b43be52e 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -633,14 +633,16 @@ parse_tc_flower_to_match(struct tc_flower *flower, match_set_tun_id(match, flower->key.tunnel.id); match->flow.tunnel.flags |= FLOW_TNL_F_KEY; } - if (flower->mask.tunnel.ipv4.ipv4_dst) { + if (flower->mask.tunnel.ipv4.ipv4_dst || + flower->mask.tunnel.ipv4.ipv4_src) { match_set_tun_dst_masked(match, flower->key.tunnel.ipv4.ipv4_dst, flower->mask.tunnel.ipv4.ipv4_dst); match_set_tun_src_masked(match, flower->key.tunnel.ipv4.ipv4_src, flower->mask.tunnel.ipv4.ipv4_src); - } else if (ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_dst)) { + } else if (ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_dst) || + ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_src)) { match_set_tun_ipv6_dst_masked(match, &flower->key.tunnel.ipv6.ipv6_dst, &flower->mask.tunnel.ipv6.ipv6_dst); @@ -1400,7 +1402,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, chain = key->recirc_id; mask->recirc_id = 0; - if (flow_tnl_dst_is_set(&key->tunnel)) { + if (flow_tnl_dst_is_set(&key->tunnel) || + flow_tnl_src_is_set(&key->tunnel)) { VLOG_DBG_RL(&rl, "tunnel: id %#" PRIx64 " src " IP_FMT " dst " IP_FMT " tp_src %d tp_dst %d", diff --git a/lib/odp-util.c b/lib/odp-util.c index b66d266cc..72601dc6b 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -6125,7 +6125,8 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); - if (flow_tnl_dst_is_set(&flow->tunnel) || export_mask) { + if (flow_tnl_dst_is_set(&flow->tunnel) || + flow_tnl_src_is_set(&flow->tunnel) || export_mask) { tun_key_to_attr(buf, &data->tunnel, &parms->flow->tunnel, parms->key_buf, NULL); } diff --git a/lib/packets.h b/lib/packets.h index 447e6f6fa..395bc869e 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -52,6 +52,12 @@ flow_tnl_dst_is_set(const struct flow_tnl *tnl) return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst); } +static inline bool +flow_tnl_src_is_set(const struct flow_tnl *tnl) +{ + return tnl->ip_src || ipv6_addr_is_set(&tnl->ipv6_src); +} + struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl); struct in6_addr flow_tnl_src(const struct flow_tnl *tnl); -- GitLab From 191536574e3bd90fd30208ceb02305bd1ce13d11 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 4 Jun 2020 13:47:00 +0300 Subject: [PATCH 156/432] netdev-offload: Implement terse dump support In order to improve revalidator performance by minimizing unnecessary copying of data, extend netdev-offloads to support terse dump mode. Extend netdev_flow_api->flow_dump_create() with 'terse' bool argument. Implement support for terse dump in functions that convert netlink to flower and flower to match. Set flow stats "used" value based on difference in number of flow packets because lastuse timestamp is not included in TC terse dump. Kernel API support is implemented in following patch. Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Simon Horman --- lib/dpif-netlink.c | 70 ++++++++++++++++++----------------- lib/netdev-offload-provider.h | 3 +- lib/netdev-offload-tc.c | 67 +++++++++++++++++++++++++-------- lib/netdev-offload.c | 10 +++-- lib/netdev-offload.h | 6 ++- ofproto/ofproto-dpif-upcall.c | 24 +++++++++++- 6 files changed, 122 insertions(+), 58 deletions(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index a19ed7e53..8e08b3c1c 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -1445,7 +1445,8 @@ start_netdev_dump(const struct dpif *dpif_, dump->netdev_current_dump = 0; dump->netdev_dumps = netdev_ports_flow_dump_create(dpif_->dpif_class, - &dump->netdev_dumps_num); + &dump->netdev_dumps_num, + dump->up.terse); ovs_mutex_unlock(&dump->netdev_lock); } @@ -1640,41 +1641,42 @@ dpif_netlink_netdev_match_to_dpif_flow(struct match *match, struct dpif_flow_attrs *attrs, ovs_u128 *ufid, struct dpif_flow *flow, - bool terse OVS_UNUSED) -{ - - struct odp_flow_key_parms odp_parms = { - .flow = &match->flow, - .mask = &match->wc.masks, - .support = { - .max_vlan_headers = 2, - .recirc = true, - .ct_state = true, - .ct_zone = true, - .ct_mark = true, - .ct_label = true, - }, - }; - size_t offset; - + bool terse) +{ memset(flow, 0, sizeof *flow); - /* Key */ - offset = key_buf->size; - flow->key = ofpbuf_tail(key_buf); - odp_flow_key_from_flow(&odp_parms, key_buf); - flow->key_len = key_buf->size - offset; - - /* Mask */ - offset = mask_buf->size; - flow->mask = ofpbuf_tail(mask_buf); - odp_parms.key_buf = key_buf; - odp_flow_key_from_mask(&odp_parms, mask_buf); - flow->mask_len = mask_buf->size - offset; - - /* Actions */ - flow->actions = nl_attr_get(actions); - flow->actions_len = nl_attr_get_size(actions); + if (!terse) { + struct odp_flow_key_parms odp_parms = { + .flow = &match->flow, + .mask = &match->wc.masks, + .support = { + .max_vlan_headers = 2, + .recirc = true, + .ct_state = true, + .ct_zone = true, + .ct_mark = true, + .ct_label = true, + }, + }; + size_t offset; + + /* Key */ + offset = key_buf->size; + flow->key = ofpbuf_tail(key_buf); + odp_flow_key_from_flow(&odp_parms, key_buf); + flow->key_len = key_buf->size - offset; + + /* Mask */ + offset = mask_buf->size; + flow->mask = ofpbuf_tail(mask_buf); + odp_parms.key_buf = key_buf; + odp_flow_key_from_mask(&odp_parms, mask_buf); + flow->mask_len = mask_buf->size - offset; + + /* Actions */ + flow->actions = nl_attr_get(actions); + flow->actions_len = nl_attr_get_size(actions); + } /* Stats */ memcpy(&flow->stats, stats, sizeof *stats); diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h index 5a809c0cd..0bed7bf61 100644 --- a/lib/netdev-offload-provider.h +++ b/lib/netdev-offload-provider.h @@ -42,7 +42,8 @@ struct netdev_flow_api { * * On success returns 0 and allocates data, on failure returns * positive errno. */ - int (*flow_dump_create)(struct netdev *, struct netdev_flow_dump **dump); + int (*flow_dump_create)(struct netdev *, struct netdev_flow_dump **dump, + bool terse); int (*flow_dump_destroy)(struct netdev_flow_dump *); /* Returns true if there are more flows to dump. diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 8b43be52e..18e1ec835 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -366,7 +366,8 @@ netdev_tc_flow_flush(struct netdev *netdev) static int netdev_tc_flow_dump_create(struct netdev *netdev, - struct netdev_flow_dump **dump_out) + struct netdev_flow_dump **dump_out, + bool terse) { enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev); struct netdev_flow_dump *dump; @@ -386,6 +387,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev, dump = xzalloc(sizeof *dump); dump->nl_dump = xzalloc(sizeof *dump->nl_dump); dump->netdev = netdev_ref(netdev); + dump->terse = terse; id = tc_make_tcf_id(ifindex, block_id, prio, hook); tc_dump_flower_start(&id, dump->nl_dump); @@ -502,13 +504,53 @@ flower_tun_opt_to_match(struct match *match, struct tc_flower *flower) match->wc.masks.tunnel.flags |= FLOW_TNL_F_UDPIF; } +static void +parse_tc_flower_to_stats(struct tc_flower *flower, + struct dpif_flow_stats *stats) +{ + if (!stats) { + return; + } + + memset(stats, 0, sizeof *stats); + stats->n_packets = get_32aligned_u64(&flower->stats.n_packets); + stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes); + stats->used = flower->lastused; +} + +static void +parse_tc_flower_to_attrs(struct tc_flower *flower, + struct dpif_flow_attrs *attrs) +{ + attrs->offloaded = (flower->offloaded_state == TC_OFFLOADED_STATE_IN_HW || + flower->offloaded_state == + TC_OFFLOADED_STATE_UNDEFINED); + attrs->dp_layer = "tc"; + attrs->dp_extra_info = NULL; +} + +static int +parse_tc_flower_terse_to_match(struct tc_flower *flower, + struct match *match, + struct dpif_flow_stats *stats, + struct dpif_flow_attrs *attrs) +{ + match_init_catchall(match); + + parse_tc_flower_to_stats(flower, stats); + parse_tc_flower_to_attrs(flower, attrs); + + return 0; +} + static int parse_tc_flower_to_match(struct tc_flower *flower, struct match *match, struct nlattr **actions, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs, - struct ofpbuf *buf) + struct ofpbuf *buf, + bool terse) { size_t act_off; struct tc_flower_key *key = &flower->key; @@ -517,6 +559,10 @@ parse_tc_flower_to_match(struct tc_flower *flower, struct tc_action *action; int i; + if (terse) { + return parse_tc_flower_terse_to_match(flower, match, stats, attrs); + } + ofpbuf_clear(buf); match_init_catchall(match); @@ -877,17 +923,8 @@ parse_tc_flower_to_match(struct tc_flower *flower, *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr)); - if (stats) { - memset(stats, 0, sizeof *stats); - stats->n_packets = get_32aligned_u64(&flower->stats.n_packets); - stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes); - stats->used = flower->lastused; - } - - attrs->offloaded = (flower->offloaded_state == TC_OFFLOADED_STATE_IN_HW) - || (flower->offloaded_state == TC_OFFLOADED_STATE_UNDEFINED); - attrs->dp_layer = "tc"; - attrs->dp_extra_info = NULL; + parse_tc_flower_to_stats(flower, stats); + parse_tc_flower_to_attrs(flower, attrs); return 0; } @@ -919,7 +956,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump, } if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs, - wbuffer)) { + wbuffer, dump->terse)) { continue; } @@ -1805,7 +1842,7 @@ netdev_tc_flow_get(struct netdev *netdev, } in_port = netdev_ifindex_to_odp_port(id.ifindex); - parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf); + parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf, false); match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX); match->flow.in_port.odp_port = in_port; diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c index 32eab5910..ab97a292e 100644 --- a/lib/netdev-offload.c +++ b/lib/netdev-offload.c @@ -201,13 +201,14 @@ netdev_flow_flush(struct netdev *netdev) } int -netdev_flow_dump_create(struct netdev *netdev, struct netdev_flow_dump **dump) +netdev_flow_dump_create(struct netdev *netdev, struct netdev_flow_dump **dump, + bool terse) { const struct netdev_flow_api *flow_api = ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api); return (flow_api && flow_api->flow_dump_create) - ? flow_api->flow_dump_create(netdev, dump) + ? flow_api->flow_dump_create(netdev, dump, terse) : EOPNOTSUPP; } @@ -436,7 +437,8 @@ netdev_ports_flow_flush(const struct dpif_class *dpif_class) } struct netdev_flow_dump ** -netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports) +netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports, + bool terse) { struct port_to_netdev_data *data; struct netdev_flow_dump **dumps; @@ -454,7 +456,7 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports) HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { if (data->dpif_class == dpif_class) { - if (netdev_flow_dump_create(data->netdev, &dumps[i])) { + if (netdev_flow_dump_create(data->netdev, &dumps[i], terse)) { continue; } diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index b4b882a56..87f5852c8 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -80,7 +80,8 @@ struct offload_info { }; int netdev_flow_flush(struct netdev *); -int netdev_flow_dump_create(struct netdev *, struct netdev_flow_dump **dump); +int netdev_flow_dump_create(struct netdev *, struct netdev_flow_dump **dump, + bool terse); int netdev_flow_dump_destroy(struct netdev_flow_dump *); bool netdev_flow_dump_next(struct netdev_flow_dump *, struct match *, struct nlattr **actions, struct dpif_flow_stats *, @@ -114,7 +115,8 @@ odp_port_t netdev_ifindex_to_odp_port(int ifindex); struct netdev_flow_dump **netdev_ports_flow_dump_create( const struct dpif_class *, - int *ports); + int *ports, + bool terse); void netdev_ports_flow_flush(const struct dpif_class *); int netdev_ports_flow_del(const struct dpif_class *, const ovs_u128 *ufid, struct dpif_flow_stats *stats); diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 5e08ef10d..920f29a6f 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -2576,6 +2576,25 @@ udpif_update_flow_pps(struct udpif *udpif, struct udpif_key *ukey, ukey->flow_time = udpif->dpif->current_ms; } +static long long int +udpif_update_used(struct udpif *udpif, struct udpif_key *ukey, + struct dpif_flow_stats *stats) + OVS_REQUIRES(ukey->mutex) +{ + if (!udpif->dump->terse) { + return ukey->created; + } + + if (stats->n_packets > ukey->stats.n_packets) { + stats->used = udpif->dpif->current_ms; + } else if (ukey->stats.used) { + stats->used = ukey->stats.used; + } else { + stats->used = ukey->created; + } + return stats->used; +} + static void revalidate(struct revalidator *revalidator) { @@ -2631,6 +2650,7 @@ revalidate(struct revalidator *revalidator) for (f = flows; f < &flows[n_dumped]; f++) { long long int used = f->stats.used; struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER; + struct dpif_flow_stats stats = f->stats; enum reval_result result; struct udpif_key *ukey; bool already_dumped; @@ -2675,12 +2695,12 @@ revalidate(struct revalidator *revalidator) } if (!used) { - used = ukey->created; + used = udpif_update_used(udpif, ukey, &stats); } if (kill_them_all || (used && used < now - max_idle)) { result = UKEY_DELETE; } else { - result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions, + result = revalidate_ukey(udpif, ukey, &stats, &odp_actions, reval_seq, &recircs, f->attrs.offloaded); } -- GitLab From 5db012c4ac3630ec99fa6c64bbae38cbbcd0544e Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Thu, 4 Jun 2020 13:47:01 +0300 Subject: [PATCH 157/432] tc: Support new terse dump kernel API When dumping flows in terse mode set TCA_DUMP_FLAGS attribute to TCA_DUMP_FLAGS_TERSE flag to prevent unnecessary copying of data between kernel and user spaces. Only expect kernel to provide cookie, stats and flags when dumping filters in terse mode. Signed-off-by: Vlad Buslov Reviewed-by: Roi Dayan Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 4 +-- lib/tc.c | 59 ++++++++++++++++++++++++++++++++--------- lib/tc.h | 5 ++-- 3 files changed, 51 insertions(+), 17 deletions(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 18e1ec835..19295573f 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -390,7 +390,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev, dump->terse = terse; id = tc_make_tcf_id(ifindex, block_id, prio, hook); - tc_dump_flower_start(&id, dump->nl_dump); + tc_dump_flower_start(&id, dump->nl_dump, terse); *dump_out = dump; @@ -951,7 +951,7 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump, while (nl_dump_next(dump->nl_dump, &nl_flow, rbuffer)) { struct tc_flower flower; - if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower)) { + if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower, dump->terse)) { continue; } diff --git a/lib/tc.c b/lib/tc.c index ac5ecc2b7..9e51ac155 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -51,9 +51,14 @@ #define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU) #endif -#if TCA_MAX < 14 +#ifndef TCA_DUMP_FLAGS_TERSE +#define TCA_DUMP_FLAGS_TERSE (1 << 0) +#endif + +#if TCA_MAX < 15 #define TCA_CHAIN 11 #define TCA_INGRESS_BLOCK 13 +#define TCA_DUMP_FLAGS 15 #endif VLOG_DEFINE_THIS_MODULE(tc); @@ -417,6 +422,11 @@ static const struct nl_policy tca_flower_policy[] = { .optional = true, }, }; +static const struct nl_policy tca_flower_terse_policy[] = { + [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, }, + [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, }, +}; + static void nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower) { @@ -1595,7 +1605,7 @@ nl_parse_act_csum(struct nlattr *options, struct tc_flower *flower) static const struct nl_policy act_policy[] = { [TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, }, [TCA_ACT_COOKIE] = { .type = NL_A_UNSPEC, .optional = true, }, - [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = false, }, + [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = true, }, [TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, }, }; @@ -1606,7 +1616,8 @@ static const struct nl_policy stats_policy[] = { }; static int -nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) +nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, + bool terse) { struct nlattr *act_options; struct nlattr *act_stats; @@ -1619,7 +1630,8 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) int err = 0; if (!nl_parse_nested(action, act_policy, action_attrs, - ARRAY_SIZE(act_policy))) { + ARRAY_SIZE(act_policy)) || + (!terse && !action_attrs[TCA_ACT_OPTIONS])) { VLOG_ERR_RL(&error_rl, "failed to parse single action options"); return EPROTO; } @@ -1628,7 +1640,9 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) act_options = action_attrs[TCA_ACT_OPTIONS]; act_cookie = action_attrs[TCA_ACT_COOKIE]; - if (!strcmp(act_kind, "gact")) { + if (terse) { + /* Terse dump doesn't provide act options attribute. */ + } else if (!strcmp(act_kind, "gact")) { err = nl_parse_act_gact(act_options, flower); } else if (!strcmp(act_kind, "mirred")) { err = nl_parse_act_mirred(act_options, flower); @@ -1678,7 +1692,8 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) #define TCA_ACT_MIN_PRIO 1 static int -nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower) +nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower, + bool terse) { const struct nlattr *actions = attrs[TCA_FLOWER_ACT]; static struct nl_policy actions_orders_policy[TCA_ACT_MAX_NUM + 1] = {}; @@ -1704,7 +1719,7 @@ nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower) VLOG_DBG_RL(&error_rl, "Can only support %d actions", TCA_ACT_MAX_NUM); return EOPNOTSUPP; } - err = nl_parse_single_action(actions_orders[i], flower); + err = nl_parse_single_action(actions_orders[i], flower, terse); if (err) { return err; @@ -1723,11 +1738,21 @@ nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower) } static int -nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower) +nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower, + bool terse) { struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)]; int err; + if (terse) { + if (!nl_parse_nested(nl_options, tca_flower_terse_policy, + attrs, ARRAY_SIZE(tca_flower_terse_policy))) { + VLOG_ERR_RL(&error_rl, "failed to parse flower classifier terse options"); + return EPROTO; + } + goto skip_flower_opts; + } + if (!nl_parse_nested(nl_options, tca_flower_policy, attrs, ARRAY_SIZE(tca_flower_policy))) { VLOG_ERR_RL(&error_rl, "failed to parse flower classifier options"); @@ -1743,13 +1768,14 @@ nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower) return err; } +skip_flower_opts: nl_parse_flower_flags(attrs, flower); - return nl_parse_flower_actions(attrs, flower); + return nl_parse_flower_actions(attrs, flower, terse); } int parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id, - struct tc_flower *flower) + struct tc_flower *flower, bool terse) { struct tcmsg *tc; struct nlattr *ta[ARRAY_SIZE(tca_policy)]; @@ -1792,15 +1818,22 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id, return EPROTO; } - return nl_parse_flower_options(ta[TCA_OPTIONS], flower); + return nl_parse_flower_options(ta[TCA_OPTIONS], flower, terse); } int -tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump) +tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump, bool terse) { struct ofpbuf request; request_from_tcf_id(id, 0, RTM_GETTFILTER, NLM_F_DUMP, &request); + if (terse) { + struct nla_bitfield32 dump_flags = { TCA_DUMP_FLAGS_TERSE, + TCA_DUMP_FLAGS_TERSE }; + + nl_msg_put_unspec(&request, TCA_DUMP_FLAGS, &dump_flags, + sizeof dump_flags); + } nl_dump_start(dump, NETLINK_ROUTE, &request); ofpbuf_uninit(&request); @@ -1829,7 +1862,7 @@ tc_get_flower(struct tcf_id *id, struct tc_flower *flower) return error; } - error = parse_netlink_to_tc_flower(reply, id, flower); + error = parse_netlink_to_tc_flower(reply, id, flower, false); ofpbuf_delete(reply); return error; } diff --git a/lib/tc.h b/lib/tc.h index 24a4994fd..11f3231f9 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -341,10 +341,11 @@ BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite) int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower); int tc_del_filter(struct tcf_id *id); int tc_get_flower(struct tcf_id *id, struct tc_flower *flower); -int tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump); +int tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump, bool terse); int parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id, - struct tc_flower *flower); + struct tc_flower *flower, + bool terse); void tc_set_policy(const char *policy); #endif /* tc.h */ -- GitLab From a3db6e473d9fb6558d0dc065bc4b3e4e1c2f9455 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Fri, 5 Jun 2020 21:17:29 +0800 Subject: [PATCH 158/432] netdev-offload-tc: Allow installing arp rules to TC dp. This patch allows to install arp rules to tc dp. In the future, arp will be offloaded to hardware to be processed. So OvS enable this now. $ ovs-appctl dpctl/add-flow 'recirc_id(0),in_port(3),eth(),\ eth_type(0x0806),arp(op=2,tha=00:50:56:e1:4b:ab,tip=10.255.1.116)' 2 $ ovs-appctl dpctl/dump-flows ... arp(tip=10.255.1.116,op=2,tha=00:50:56:e1:4b:ab) ... $ tc filter show dev ingress ... eth_type arp arp_tip 10.255.1.116 arp_op reply arp_tha 00:50:56:e1:4b:ab not_in_hw action order 1: mirred (Egress Redirect to device ) stolen ... Signed-off-by: Tonghao Zhang Signed-off-by: Simon Horman --- include/openvswitch/match.h | 11 +++++++ lib/match.c | 32 ++++++++++++++++++ lib/netdev-offload-tc.c | 27 +++++++++++++++ lib/odp-util.c | 3 +- lib/tc.c | 66 +++++++++++++++++++++++++++++++++++++ lib/tc.h | 8 +++++ 6 files changed, 146 insertions(+), 1 deletion(-) diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h index 3b196c7fa..9e480318e 100644 --- a/include/openvswitch/match.h +++ b/include/openvswitch/match.h @@ -197,6 +197,8 @@ void match_set_tp_dst_masked(struct match *, ovs_be16 port, ovs_be16 mask); void match_set_tcp_flags(struct match *, ovs_be16); void match_set_tcp_flags_masked(struct match *, ovs_be16 flags, ovs_be16 mask); void match_set_nw_proto(struct match *, uint8_t); +void match_set_nw_proto_masked(struct match *match, + const uint8_t nw_proto, const uint8_t mask); void match_set_nw_src(struct match *, ovs_be32); void match_set_nw_src_masked(struct match *, ovs_be32 ip, ovs_be32 mask); void match_set_nw_dst(struct match *, ovs_be32); @@ -210,6 +212,9 @@ void match_set_nw_frag(struct match *, uint8_t nw_frag); void match_set_nw_frag_masked(struct match *, uint8_t nw_frag, uint8_t mask); void match_set_icmp_type(struct match *, uint8_t); void match_set_icmp_code(struct match *, uint8_t); +void match_set_arp_opcode_masked(struct match *match, + const uint8_t opcode, + const uint8_t mask); void match_set_arp_sha(struct match *, const struct eth_addr); void match_set_arp_sha_masked(struct match *, const struct eth_addr arp_sha, @@ -218,6 +223,12 @@ void match_set_arp_tha(struct match *, const struct eth_addr); void match_set_arp_tha_masked(struct match *, const struct eth_addr arp_tha, const struct eth_addr mask); +void match_set_arp_spa_masked(struct match *match, + const ovs_be32 arp_spa, + const ovs_be32 mask); +void match_set_arp_tpa_masked(struct match *match, + const ovs_be32 arp_tpa, + const ovs_be32 mask); void match_set_ipv6_src(struct match *, const struct in6_addr *); void match_set_ipv6_src_masked(struct match *, const struct in6_addr *, const struct in6_addr *); diff --git a/lib/match.c b/lib/match.c index 29b25a73b..a77554851 100644 --- a/lib/match.c +++ b/lib/match.c @@ -940,6 +940,14 @@ match_set_nw_proto(struct match *match, uint8_t nw_proto) match->wc.masks.nw_proto = UINT8_MAX; } +void +match_set_nw_proto_masked(struct match *match, + const uint8_t nw_proto, const uint8_t mask) +{ + match->flow.nw_proto = nw_proto; + match->wc.masks.nw_proto = mask; +} + void match_set_nw_src(struct match *match, ovs_be32 nw_src) { @@ -1033,6 +1041,30 @@ match_set_icmp_code(struct match *match, uint8_t icmp_code) match_set_tp_dst(match, htons(icmp_code)); } +void +match_set_arp_opcode_masked(struct match *match, + const uint8_t opcode, + const uint8_t mask) +{ + match_set_nw_proto_masked(match, opcode, mask); +} + +void +match_set_arp_spa_masked(struct match *match, + const ovs_be32 arp_spa, + const ovs_be32 mask) +{ + match_set_nw_src_masked(match, arp_spa, mask); +} + +void +match_set_arp_tpa_masked(struct match *match, + const ovs_be32 arp_tpa, + const ovs_be32 mask) +{ + match_set_nw_dst_masked(match, arp_tpa, mask); +} + void match_set_arp_sha(struct match *match, const struct eth_addr sha) { diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 19295573f..aa6d22e74 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -589,6 +589,14 @@ parse_tc_flower_to_match(struct tc_flower *flower, match->flow.mpls_lse[0] = key->mpls_lse & mask->mpls_lse; match->wc.masks.mpls_lse[0] = mask->mpls_lse; match_set_dl_type(match, key->encap_eth_type[0]); + } else if (key->eth_type == htons(ETH_TYPE_ARP)) { + match_set_arp_sha_masked(match, key->arp.sha, mask->arp.sha); + match_set_arp_tha_masked(match, key->arp.tha, mask->arp.tha); + match_set_arp_spa_masked(match, key->arp.spa, mask->arp.spa); + match_set_arp_tpa_masked(match, key->arp.tpa, mask->arp.tpa); + match_set_arp_opcode_masked(match, key->arp.opcode, + mask->arp.opcode); + match_set_dl_type(match, key->eth_type); } else { match_set_dl_type(match, key->eth_type); } @@ -1558,6 +1566,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, mask->dl_type = 0; mask->in_port.odp_port = 0; + if (key->dl_type == htons(ETH_P_ARP)) { + flower.key.arp.spa = key->nw_src; + flower.key.arp.tpa = key->nw_dst; + flower.key.arp.sha = key->arp_sha; + flower.key.arp.tha = key->arp_tha; + flower.key.arp.opcode = key->nw_proto; + flower.mask.arp.spa = mask->nw_src; + flower.mask.arp.tpa = mask->nw_dst; + flower.mask.arp.sha = mask->arp_sha; + flower.mask.arp.tha = mask->arp_tha; + flower.mask.arp.opcode = mask->nw_proto; + + mask->nw_src = 0; + mask->nw_dst = 0; + mask->nw_proto = 0; + memset(&mask->arp_sha, 0, sizeof mask->arp_sha); + memset(&mask->arp_tha, 0, sizeof mask->arp_tha); + } + if (is_ip_any(key)) { flower.key.ip_proto = key->nw_proto; flower.mask.ip_proto = mask->nw_proto; diff --git a/lib/odp-util.c b/lib/odp-util.c index 72601dc6b..9b31244c2 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -7993,7 +7993,8 @@ get_arp_key(const struct flow *flow, struct ovs_key_arp *arp) arp->arp_sip = flow->nw_src; arp->arp_tip = flow->nw_dst; - arp->arp_op = htons(flow->nw_proto); + arp->arp_op = flow->nw_proto == UINT8_MAX ? + OVS_BE16_MAX : htons(flow->nw_proto); arp->arp_sha = flow->arp_sha; arp->arp_tha = flow->arp_tha; } diff --git a/lib/tc.c b/lib/tc.c index 9e51ac155..29b4328d8 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -318,6 +318,24 @@ static const struct nl_policy tca_flower_policy[] = { .min_len = ETH_ALEN, .optional = true, }, [TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, }, + [TCA_FLOWER_KEY_ARP_SIP] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_ARP_TIP] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_ARP_SHA] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, + .optional = true, }, + [TCA_FLOWER_KEY_ARP_THA] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, + .optional = true, }, + [TCA_FLOWER_KEY_ARP_OP] = { .type = NL_A_U8, .optional = true, }, + [TCA_FLOWER_KEY_ARP_SIP_MASK] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_ARP_TIP_MASK] = { .type = NL_A_U32, .optional = true, }, + [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, + .optional = true, }, + [TCA_FLOWER_KEY_ARP_THA_MASK] = { .type = NL_A_UNSPEC, + .min_len = ETH_ALEN, + .optional = true, }, + [TCA_FLOWER_KEY_ARP_OP_MASK] = { .type = NL_A_U8, .optional = true, }, [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, }, [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, }, [TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, }, @@ -427,6 +445,45 @@ static const struct nl_policy tca_flower_terse_policy[] = { [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, }, }; +static void +nl_parse_flower_arp(struct nlattr **attrs, struct tc_flower *flower) +{ + const struct eth_addr *eth; + + if (attrs[TCA_FLOWER_KEY_ARP_SIP_MASK]) { + flower->key.arp.spa = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_SIP]); + flower->mask.arp.spa = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_SIP_MASK]); + } + if (attrs[TCA_FLOWER_KEY_ARP_TIP_MASK]) { + flower->key.arp.tpa = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_TIP]); + flower->mask.arp.tpa = + nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_TIP_MASK]); + } + if (attrs[TCA_FLOWER_KEY_ARP_SHA_MASK]) { + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_SHA], ETH_ALEN); + memcpy(&flower->key.arp.sha, eth, sizeof flower->key.arp.sha); + + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_SHA_MASK], ETH_ALEN); + memcpy(&flower->mask.arp.sha, eth, sizeof flower->mask.arp.sha); + } + if (attrs[TCA_FLOWER_KEY_ARP_THA_MASK]) { + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_THA], ETH_ALEN); + memcpy(&flower->key.arp.tha, eth, sizeof flower->key.arp.tha); + + eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_THA_MASK], ETH_ALEN); + memcpy(&flower->mask.arp.tha, eth, sizeof flower->mask.arp.tha); + } + if (attrs[TCA_FLOWER_KEY_ARP_OP_MASK]) { + flower->key.arp.opcode = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ARP_OP]); + flower->mask.arp.opcode = + nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ARP_OP_MASK]); + } +} + static void nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower) { @@ -1760,6 +1817,7 @@ nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower, } nl_parse_flower_eth(attrs, flower); + nl_parse_flower_arp(attrs, flower); nl_parse_flower_mpls(attrs, flower); nl_parse_flower_vlan(attrs, flower); nl_parse_flower_ip(attrs, flower); @@ -2745,6 +2803,14 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) FLOWER_PUT_MASKED_VALUE(dst_mac, TCA_FLOWER_KEY_ETH_DST); FLOWER_PUT_MASKED_VALUE(src_mac, TCA_FLOWER_KEY_ETH_SRC); + if (host_eth_type == ETH_P_ARP) { + FLOWER_PUT_MASKED_VALUE(arp.spa, TCA_FLOWER_KEY_ARP_SIP); + FLOWER_PUT_MASKED_VALUE(arp.tpa, TCA_FLOWER_KEY_ARP_TIP); + FLOWER_PUT_MASKED_VALUE(arp.sha, TCA_FLOWER_KEY_ARP_SHA); + FLOWER_PUT_MASKED_VALUE(arp.tha, TCA_FLOWER_KEY_ARP_THA); + FLOWER_PUT_MASKED_VALUE(arp.opcode, TCA_FLOWER_KEY_ARP_OP); + } + if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) { FLOWER_PUT_MASKED_VALUE(ip_ttl, TCA_FLOWER_KEY_IP_TTL); FLOWER_PUT_MASKED_VALUE(ip_tos, TCA_FLOWER_KEY_IP_TOS); diff --git a/lib/tc.h b/lib/tc.h index 11f3231f9..028eed5d0 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -121,6 +121,14 @@ struct tc_flower_key { uint32_t ct_mark; ovs_u128 ct_label; + struct { + ovs_be32 spa; + ovs_be32 tpa; + struct eth_addr sha; + struct eth_addr tha; + uint8_t opcode; + } arp; + struct { ovs_be32 ipv4_src; ovs_be32 ipv4_dst; -- GitLab From b9de30d350879533ce3ffd6f8fb946a1c9c9f6cd Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Fri, 29 May 2020 15:09:12 +0530 Subject: [PATCH 159/432] ovs-actions.xml: Fix a typo in the description of check_pkt_larger. Signed-off-by: Numan Siddique Acked-by: William Tu Signed-off-by: Ilya Maximets --- lib/ovs-actions.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml index 060a079b4..7169b15c0 100644 --- a/lib/ovs-actions.xml +++ b/lib/ovs-actions.xml @@ -1529,7 +1529,7 @@ for i in [1,n_slaves]:

    - The packet length to check againt the argument pkt_len + The packet length to check against the argument pkt_len includes the L2 header and L2 payload of the packet, but not the VLAN tag (if present).

    -- GitLab From 5042815d8fe58e19b58e77d4d87b59951f6fdeb8 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 3 Jun 2020 16:58:16 +0200 Subject: [PATCH 160/432] ovs-rcu: Avoid flushing callbacks during postponing. ovsrcu_flush_cbset() call during ovsrcu_postpone() could cause use after free in case the caller sets new pointer only after postponing free for the old one: ------------------ ------------------ ------------------- Thread 1 Thread 2 RCU Thread ------------------ ------------------ ------------------- pointer = A ovsrcu_quiesce(): thread->seqno = 30 global_seqno = 31 quiesced read pointer A postpone(free(A)): flush cbset pop flushed_cbsets ovsrcu_synchronize: target_seqno = 31 ovsrcu_quiesce(): thread->seqno = 31 global_seqno = 32 quiesced read pointer A use pointer A ovsrcu_quiesce(): thread->seqno = 32 global_seqno = 33 quiesced read pointer A pointer = B ovsrcu_quiesce(): thread->seqno = 33 global_seqno = 34 quiesced target_seqno exceeded by all threads call cbs to free A use pointer A (use after free) ----------------------------------------------------------- Fix that by using dynamically re-allocated array without flushing to the global flushed_cbsets until writer enters quiescent state. Fixes: 0f2ea84841e1 ("ovs-rcu: New library.") Reported-by: Linhaifeng Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-June/371265.html Acked-by: Ben Pfaff Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + lib/ovs-rcu.c | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 3f7eee54f..7a3b12610 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -563,6 +563,7 @@ Krishna Miriyala miriyalak@vmware.com Krishna Mohan Elluru elluru.kri.mohan@hpe.com László Sürü laszlo.suru@ericsson.com Len Gao leng@vmware.com +Linhaifeng haifeng.lin@huawei.com Logan Rosen logatronico@gmail.com Luca Falavigna dktrkranz@debian.org Luiz Henrique Ozaki luiz.ozaki@gmail.com diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index ebc8120f0..cde1e925b 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -30,6 +30,8 @@ VLOG_DEFINE_THIS_MODULE(ovs_rcu); +#define MIN_CBS 16 + struct ovsrcu_cb { void (*function)(void *aux); void *aux; @@ -37,7 +39,8 @@ struct ovsrcu_cb { struct ovsrcu_cbset { struct ovs_list list_node; - struct ovsrcu_cb cbs[16]; + struct ovsrcu_cb *cbs; + size_t n_allocated; int n_cbs; }; @@ -310,16 +313,19 @@ ovsrcu_postpone__(void (*function)(void *aux), void *aux) cbset = perthread->cbset; if (!cbset) { cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset); + cbset->cbs = xmalloc(MIN_CBS * sizeof *cbset->cbs); + cbset->n_allocated = MIN_CBS; cbset->n_cbs = 0; } + if (cbset->n_cbs == cbset->n_allocated) { + cbset->cbs = x2nrealloc(cbset->cbs, &cbset->n_allocated, + sizeof *cbset->cbs); + } + cb = &cbset->cbs[cbset->n_cbs++]; cb->function = function; cb->aux = aux; - - if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) { - ovsrcu_flush_cbset(perthread); - } } static bool @@ -341,6 +347,7 @@ ovsrcu_call_postponed(void) for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) { cb->function(cb->aux); } + free(cbset->cbs); free(cbset); } -- GitLab From f05d17e2789c30ad38bd3c502cdbe4719f9832fe Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 28 May 2020 14:32:17 +0200 Subject: [PATCH 161/432] ovsdb-server.7: Mention update3 as replies to monitor_cond_change. Monitor_cond_change might trigger updates to be sent to clients as results to condition changes. These updates can be either update2 (for monitor_cond monitors) or update3 (for monitor_cond_since monitors). The documentation used to mention only update2. Signed-off-by: Dumitru Ceara Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- Documentation/ref/ovsdb-server.7.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/ref/ovsdb-server.7.rst b/Documentation/ref/ovsdb-server.7.rst index 967761bdf..04414350a 100644 --- a/Documentation/ref/ovsdb-server.7.rst +++ b/Documentation/ref/ovsdb-server.7.rst @@ -316,9 +316,9 @@ monitor request, will contain any matched rows by old condition and not matched by the new condition. Changes according to the new conditions are automatically sent to the client -using the ``update2`` monitor notification. An update, if any, as a result of -a condition change, will be sent to the client before the reply to the -``monitor_cond_change`` request. +using the ``update2`` or ``update3`` monitor notification depending on the +monitor method. An update, if any, as a result of a condition change, will +be sent to the client before the reply to the ``monitor_cond_change`` request. 4.1.14 Update2 notification --------------------------- -- GitLab From ae25f8c8fff80a58cd0a15e2d3ae7ab1b4994e48 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 28 May 2020 14:32:31 +0200 Subject: [PATCH 162/432] ovsdb-idl: Avoid inconsistent IDL state with OVSDB_MONITOR_V3. Assuming an ovsdb client connected to a database using OVSDB_MONITOR_V3 (i.e., "monitor_cond_since" method) with the initial monitor condition MC1. Assuming the following two transactions are executed on the ovsdb-server: TXN1: "insert record R1 in table T1" TXN2: "insert record R2 in table T2" If the client's monitor condition MC1 for table T2 matches R2 then the client will receive the following update3 message: method="update3", "insert record R2 in table T2", last-txn-id=TXN2 At this point, if the presence of the new record R2 in the IDL triggers the client to update its monitor condition to MC2 and add a clause for table T1 which matches R1, a monitor_cond_change message is sent to the server: method="monitor_cond_change", "clauses from MC2" In normal operation the ovsdb-server will reply with a new update3 message of the form: method="update3", "insert record R1 in table T1", last-txn-id=TXN2 However, if the connection drops in the meantime, this last update might get lost. It might happen that during the reconnect a new transaction happens that modifies the original record R1: TXN3: "modify record R1 in table T1" When the client reconnects, it will try to perform a fast resync by sending: method="monitor_cond_since", "clauses from MC2", last-txn-id=TXN2 Because TXN2 is still in the ovsdb-server transaction history, the server replies with the changes from the most recent transactions only, i.e., TXN3: result="true", last-txbb-id=TXN3, "modify record R1 in table T1" This causes the IDL on the client in to end up in an inconsistent state because it has never seen the update that created R1. Such a scenario is described in: https://bugzilla.redhat.com/show_bug.cgi?id=1808580#c22 To avoid this issue, the IDL will now maintain (up to) 3 different types of conditions for each DB table: - new_cond: condition that has been set by the IDL client but has not yet been sent to the server through monitor_cond_change. - req_cond: condition that has been sent to the server but the reply acknowledging the change hasn't been received yet. - ack_cond: condition that has been acknowledged by the server. Whenever the IDL FSM is restarted (e.g., voluntary or involuntary disconnect): - if there is a known last_id txn-id the code ensures that new_cond will contain the most recent condition set by the IDL client (either req_cond if there was a request in flight, or new_cond if the IDL client set a condition while the IDL was disconnected) - if there is no known last_id txn-id the code ensures that ack_cond will contain the most recent conditions set by the IDL client regardless whether they were acked by the server or not. When monitor_cond_since/monitor_cond requests are sent they will always include ack_cond and if new_cond is not NULL a follow up monitor_cond_change will be generated afterwards. On the other hand ovsdb_idl_db_set_condition() will always modify new_cond. This ensures that updates of type "insert" that happened before the last transaction known by the IDL but didn't match old monitor conditions are sent upon reconnect if the monitor condition has changed to include them in the meantime. Fixes: 403a6a0cb003 ("ovsdb-idl: Fast resync from server when connection reset.") Signed-off-by: Dumitru Ceara Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl-provider.h | 8 +- lib/ovsdb-idl.c | 167 +++++++++++++++++++++++++++++++++------ tests/ovsdb-idl.at | 56 +++++++++++++ 3 files changed, 206 insertions(+), 25 deletions(-) diff --git a/lib/ovsdb-idl-provider.h b/lib/ovsdb-idl-provider.h index 30d1d08eb..00497d940 100644 --- a/lib/ovsdb-idl-provider.h +++ b/lib/ovsdb-idl-provider.h @@ -122,8 +122,12 @@ struct ovsdb_idl_table { unsigned int change_seqno[OVSDB_IDL_CHANGE_MAX]; struct ovs_list indexes; /* Contains "struct ovsdb_idl_index"s */ struct ovs_list track_list; /* Tracked rows (ovsdb_idl_row.track_node). */ - struct ovsdb_idl_condition condition; - bool cond_changed; + struct ovsdb_idl_condition *ack_cond; /* Last condition acked by the + * server. */ + struct ovsdb_idl_condition *req_cond; /* Last condition requested to the + * server. */ + struct ovsdb_idl_condition *new_cond; /* Latest condition set by the IDL + * client. */ }; struct ovsdb_idl_class { diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index f54e360e3..c4371e988 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -240,6 +240,10 @@ static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *, struct ovsdb_idl_db *, enum ovsdb_idl_monitor_method); static void ovsdb_idl_db_clear(struct ovsdb_idl_db *db); +static void ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db); +static void ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db); +static void ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst, + struct ovsdb_idl_condition **src); struct ovsdb_idl { struct ovsdb_idl_db server; @@ -422,9 +426,11 @@ ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class, = table->change_seqno[OVSDB_IDL_CHANGE_MODIFY] = table->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0; table->db = db; - ovsdb_idl_condition_init(&table->condition); - ovsdb_idl_condition_add_clause_true(&table->condition); - table->cond_changed = false; + table->ack_cond = NULL; + table->req_cond = NULL; + table->new_cond = xmalloc(sizeof *table->new_cond); + ovsdb_idl_condition_init(table->new_cond); + ovsdb_idl_condition_add_clause_true(table->new_cond); } db->monitor_id = json_array_create_2(json_string_create("monid"), json_string_create(class->database)); @@ -566,12 +572,15 @@ ovsdb_idl_reset_min_index(struct ovsdb_idl *idl) static void ovsdb_idl_db_destroy(struct ovsdb_idl_db *db) { + struct ovsdb_idl_condition *null_cond = NULL; ovs_assert(!db->txn); ovsdb_idl_db_txn_abort_all(db); ovsdb_idl_db_clear(db); for (size_t i = 0; i < db->class_->n_tables; i++) { struct ovsdb_idl_table *table = &db->tables[i]; - ovsdb_idl_condition_destroy(&table->condition); + ovsdb_idl_condition_move(&table->ack_cond, &null_cond); + ovsdb_idl_condition_move(&table->req_cond, &null_cond); + ovsdb_idl_condition_move(&table->new_cond, &null_cond); ovsdb_idl_destroy_indexes(table); shash_destroy(&table->columns); hmap_destroy(&table->rows); @@ -700,6 +709,12 @@ ovsdb_idl_send_request(struct ovsdb_idl *idl, struct jsonrpc_msg *request) static void ovsdb_idl_restart_fsm(struct ovsdb_idl *idl) { + /* Resync data DB table conditions to avoid missing updates due to + * conditions that were in flight or changed locally while the connection + * was down. + */ + ovsdb_idl_db_sync_condition(&idl->data); + ovsdb_idl_send_schema_request(idl, &idl->server); ovsdb_idl_transition(idl, IDL_S_SERVER_SCHEMA_REQUESTED); idl->data.monitoring = OVSDB_IDL_NOT_MONITORING; @@ -807,7 +822,9 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg) * do, it's a "monitor_cond_change", which means that the conditional * monitor clauses were updated. * - * If further condition changes were pending, send them now. */ + * Mark the last requested conditions as acked and if further + * condition changes were pending, send them now. */ + ovsdb_idl_db_ack_condition(&idl->data); ovsdb_idl_send_cond_change(idl); idl->data.cond_seqno++; break; @@ -1503,30 +1520,60 @@ ovsdb_idl_condition_equals(const struct ovsdb_idl_condition *a, } static void -ovsdb_idl_condition_clone(struct ovsdb_idl_condition *dst, +ovsdb_idl_condition_clone(struct ovsdb_idl_condition **dst, const struct ovsdb_idl_condition *src) { - ovsdb_idl_condition_init(dst); + if (*dst) { + ovsdb_idl_condition_destroy(*dst); + } else { + *dst = xmalloc(sizeof **dst); + } + ovsdb_idl_condition_init(*dst); - dst->is_true = src->is_true; + (*dst)->is_true = src->is_true; const struct ovsdb_idl_clause *clause; HMAP_FOR_EACH (clause, hmap_node, &src->clauses) { - ovsdb_idl_condition_add_clause__(dst, clause, clause->hmap_node.hash); + ovsdb_idl_condition_add_clause__(*dst, clause, clause->hmap_node.hash); } } +static void +ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst, + struct ovsdb_idl_condition **src) +{ + if (*dst) { + ovsdb_idl_condition_destroy(*dst); + free(*dst); + } + *dst = *src; + *src = NULL; +} + static unsigned int ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db, const struct ovsdb_idl_table_class *tc, const struct ovsdb_idl_condition *condition) { + struct ovsdb_idl_condition *table_cond; struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc); unsigned int seqno = db->cond_seqno; - if (!ovsdb_idl_condition_equals(condition, &table->condition)) { - ovsdb_idl_condition_destroy(&table->condition); - ovsdb_idl_condition_clone(&table->condition, condition); - db->cond_changed = table->cond_changed = true; + + /* Compare the new condition to the last known condition which can be + * either "new" (not sent yet), "requested" or "acked", in this order. + */ + if (table->new_cond) { + table_cond = table->new_cond; + } else if (table->req_cond) { + table_cond = table->req_cond; + } else { + table_cond = table->ack_cond; + } + ovs_assert(table_cond); + + if (!ovsdb_idl_condition_equals(condition, table_cond)) { + ovsdb_idl_condition_clone(&table->new_cond, condition); + db->cond_changed = true; poll_immediate_wake(); return seqno + 1; } @@ -1571,9 +1618,8 @@ ovsdb_idl_condition_to_json(const struct ovsdb_idl_condition *cnd) } static struct json * -ovsdb_idl_create_cond_change_req(struct ovsdb_idl_table *table) +ovsdb_idl_create_cond_change_req(const struct ovsdb_idl_condition *cond) { - const struct ovsdb_idl_condition *cond = &table->condition; struct json *monitor_cond_change_request = json_object_create(); struct json *cond_json = ovsdb_idl_condition_to_json(cond); @@ -1593,8 +1639,12 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db) for (size_t i = 0; i < db->class_->n_tables; i++) { struct ovsdb_idl_table *table = &db->tables[i]; - if (table->cond_changed) { - struct json *req = ovsdb_idl_create_cond_change_req(table); + /* Always use the most recent conditions set by the IDL client when + * requesting monitor_cond_change, i.e., table->new_cond. + */ + if (table->new_cond) { + struct json *req = + ovsdb_idl_create_cond_change_req(table->new_cond); if (req) { if (!monitor_cond_change_requests) { monitor_cond_change_requests = json_object_create(); @@ -1603,7 +1653,11 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db) table->class_->name, json_array_create_1(req)); } - table->cond_changed = false; + /* Mark the new condition as requested by moving it to req_cond. + * If there's already requested condition that's a bug. + */ + ovs_assert(table->req_cond == NULL); + ovsdb_idl_condition_move(&table->req_cond, &table->new_cond); } } @@ -1618,6 +1672,73 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db) return jsonrpc_create_request("monitor_cond_change", params, NULL); } +/* Marks all requested table conditions in 'db' as acked by the server. + * It should be called when the server replies to monitor_cond_change + * requests. + */ +static void +ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db) +{ + for (size_t i = 0; i < db->class_->n_tables; i++) { + struct ovsdb_idl_table *table = &db->tables[i]; + + if (table->req_cond) { + ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond); + } + } +} + +/* Should be called when the IDL fsm is restarted and resyncs table conditions + * based on the state the DB is in: + * - if a non-zero last_id is available for the DB then upon reconnect + * the IDL should first request acked conditions to avoid missing updates + * about records that were added before the transaction with + * txn-id == last_id. If there were requested condition changes in flight + * (i.e., req_cond not NULL) and the IDL client didn't set new conditions + * (i.e., new_cond is NULL) then move req_cond to new_cond to trigger a + * follow up monitor_cond_change request. + * - if there's no last_id available for the DB then it's safe to use the + * latest conditions set by the IDL client even if they weren't acked yet. + */ +static void +ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db) +{ + bool ack_all = uuid_is_zero(&db->last_id); + + db->cond_changed = false; + for (size_t i = 0; i < db->class_->n_tables; i++) { + struct ovsdb_idl_table *table = &db->tables[i]; + + /* When monitor_cond_since requests will be issued, the + * table->ack_cond condition will be added to the "where" clause". + * Follow up monitor_cond_change requests will use table->new_cond. + */ + if (ack_all) { + if (table->new_cond) { + ovsdb_idl_condition_move(&table->req_cond, &table->new_cond); + } + + if (table->req_cond) { + ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond); + } + } else { + /* If there was no "unsent" condition but instead a + * monitor_cond_change request was in flight, move table->req_cond + * to table->new_cond and set db->cond_changed to trigger a new + * monitor_cond_change request. + * + * However, if a new condition has been set by the IDL client, + * monitor_cond_change will be sent anyway and will use the most + * recent table->new_cond so there's no need to update it here. + */ + if (table->req_cond && !table->new_cond) { + ovsdb_idl_condition_move(&table->new_cond, &table->req_cond); + db->cond_changed = true; + } + } + } +} + static void ovsdb_idl_send_cond_change(struct ovsdb_idl *idl) { @@ -2072,13 +2193,15 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db, monitor_request = json_object_create(); json_object_put(monitor_request, "columns", columns); - const struct ovsdb_idl_condition *cond = &table->condition; + /* Always use acked conditions when requesting + * monitor_cond/monitor_cond_since. + */ + const struct ovsdb_idl_condition *cond = table->ack_cond; if ((monitor_method == OVSDB_IDL_MM_MONITOR_COND || monitor_method == OVSDB_IDL_MM_MONITOR_COND_SINCE) && - !ovsdb_idl_condition_is_true(cond)) { + cond && !ovsdb_idl_condition_is_true(cond)) { json_object_put(monitor_request, "where", ovsdb_idl_condition_to_json(cond)); - table->cond_changed = false; } json_object_put(monitor_requests, tc->name, json_array_create_1(monitor_request)); @@ -2086,8 +2209,6 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db, } free_schema(schema); - db->cond_changed = false; - struct json *params = json_array_create_3( json_string_create(db->class_->database), json_clone(db->monitor_id), diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index b5cbee7d9..4efed88e4 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -1828,3 +1828,59 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote']) OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL reconnects to leader], 3, ['remote' '+remotestop' 'remote']) + +# same as OVSDB_CHECK_IDL but uses C IDL implementation with tcp +# with multiple remotes. +m4_define([OVSDB_CHECK_CLUSTER_IDL_C], + [AT_SETUP([$1 - C - tcp]) + AT_KEYWORDS([ovsdb server idl positive tcp socket $5]) + m4_define([LPBK],[127.0.0.1]) + AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK]) + PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1]) + PARSE_LISTENING_PORT([s2.log], [TCP_PORT_2]) + PARSE_LISTENING_PORT([s3.log], [TCP_PORT_3]) + remotes=tcp:LPBK:$TCP_PORT_1,tcp:LPBK:$TCP_PORT_2,tcp:LPBK:$TCP_PORT_3 + + m4_if([$3], [], [], + [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])]) + AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl tcp:LPBK:$TCP_PORT_1 $4], + [0], [stdout], [ignore]) + AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]), + [0], [$5]) + AT_CLEANUP]) + +# Checks that monitor_cond_since works fine when disconnects happen +# with cond_change requests in flight (i.e., IDL is properly updated). +OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect], + 3, + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"i": 1, + "r": 1.0, + "b": true}}, + {"op": "insert", + "table": "simple", + "row": {"i": 2, + "r": 1.0, + "b": true}}]']], + [['condition simple []' \ + 'condition simple [["i","==",2]]' \ + 'condition simple [["i","==",1]]' \ + '+reconnect' \ + '["idltest", + {"op": "update", + "table": "simple", + "where": [["i", "==", 1]], + "row": {"r": 2.0 }}]']], + [[000: change conditions +001: empty +002: change conditions +003: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +004: change conditions +005: reconnect +006: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> +007: {"error":null,"result":[{"count":1}]} +008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +009: done +]]) -- GitLab From 25a2af4fe952f6702b856d1381da217e3b6d2a57 Mon Sep 17 00:00:00 2001 From: Rui Cao Date: Mon, 15 Jun 2020 14:05:13 +0800 Subject: [PATCH 163/432] dpif-netlink: Fix Windows incompatibility when setting new feature OVS_DP_ATTR_NAME field is required when sending OVS_DP_CMD_SET to windows kernel driver. The function "dpif_netlink_set_features" dose not set the OVS_DP_ATTR_NAME field which will cause set feature failure and ovs-vswitchd will exist. This patch fixes the issue by setting "request.name" in request. Reported-at: https://github.com/openvswitch/ovs-issues/issues/187 Submitted-at: https://github.com/openvswitch/ovs/pull/319 Signed-off-by: Rui Cao Signed-off-by: Ben Pfaff --- lib/dpif-netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 8e08b3c1c..1817e9f84 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -691,6 +691,7 @@ dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features) dpif_netlink_dp_init(&request); request.cmd = OVS_DP_CMD_SET; + request.name = dpif_->base_name; request.dp_ifindex = dpif->dp_ifindex; request.user_features = dpif->user_features | new_features; -- GitLab From 0401cf5f9e0669dc894e89105983098ef69763c3 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Fri, 5 Jun 2020 14:00:29 +0530 Subject: [PATCH 164/432] ovsdb idl: Try committing the pending txn in ovsdb_idl_loop_run. The function ovsdb_idl_loop_run(), after calling ovsdb_idl_run(), returns a transaction object (of type 'struct ovsdb_idl_txn'). The returned transaction object can be NULL if there is a pending transaction (loop->committing_txn) in the idl loop object. Normally the clients of idl library, first call ovsdb_idl_loop_run(), then do their own processing and create any idl transactions during this processing and then finally call ovsdb_idl_loop_commit_and_wait(). If ovsdb_idl_loop_run() returns NULL transaction object, then much of the processing done by the client gets wasted as in the case of ovn-controller. The client (in this case ovn-controller), can skip the processing and instead call ovsdb_idl_loop_commit_and_wait() if the transaction oject is NULL. But ovn-controller uses IDL tracking and it may loose the tracked changes in that run. This patch tries to improve this scenario, by checking if the pending transaction can be committed in the ovsdb_idl_loop_run() itself and if the pending transaction is cleared (because of the response messages from ovsdb-server due to a transaction message in the previous run), ovsdb_idl_loop_run() can return a valid transaction object. CC: Han Zhou Signed-off-by: Numan Siddique Signed-off-by: Ben Pfaff --- lib/ovsdb-idl.c | 143 +++++++++++++++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 50 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index c4371e988..0a18261fc 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -389,6 +389,8 @@ static void ovsdb_idl_send_cond_change(struct ovsdb_idl *idl); static void ovsdb_idl_destroy_indexes(struct ovsdb_idl_table *); static void ovsdb_idl_add_to_indexes(const struct ovsdb_idl_row *); static void ovsdb_idl_remove_from_indexes(const struct ovsdb_idl_row *); +static int ovsdb_idl_try_commit_loop_txn(struct ovsdb_idl_loop *loop, + bool *may_need_wakeup); static void ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class, @@ -5461,6 +5463,12 @@ struct ovsdb_idl_txn * ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop) { ovsdb_idl_run(loop->idl); + + /* See if we can commit the loop->committing_txn. */ + if (loop->committing_txn) { + ovsdb_idl_try_commit_loop_txn(loop, NULL); + } + loop->open_txn = (loop->committing_txn || ovsdb_idl_get_seqno(loop->idl) == loop->skip_seqno ? NULL @@ -5468,6 +5476,87 @@ ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop) return loop->open_txn; } +/* Attempts to commit the current transaction, if one is open. + * + * If a transaction was open, in this or a previous iteration of the main loop, + * and had not before finished committing (successfully or unsuccessfully), the + * return value is one of: + * + * 1: The transaction committed successfully (or it did not change anything in + * the database). + * 0: The transaction failed. + * -1: The commit is still in progress. + * + * Thus, the return value is -1 if the transaction is in progress and otherwise + * true for success, false for failure. + * + * (In the corner case where the IDL sends a transaction to the database and + * the database commits it, and the connection between the IDL and the database + * drops before the IDL receives the message confirming the commit, this + * function can return 0 even though the transaction succeeded.) + */ +static int +ovsdb_idl_try_commit_loop_txn(struct ovsdb_idl_loop *loop, + bool *may_need_wakeup) +{ + if (!loop->committing_txn) { + /* Not a meaningful return value: no transaction was in progress. */ + return 1; + } + + int retval; + struct ovsdb_idl_txn *txn = loop->committing_txn; + + enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(txn); + if (status != TXN_INCOMPLETE) { + switch (status) { + case TXN_TRY_AGAIN: + /* We want to re-evaluate the database when it's changed from + * the contents that it had when we started the commit. (That + * might have already happened.) */ + loop->skip_seqno = loop->precommit_seqno; + if (ovsdb_idl_get_seqno(loop->idl) != loop->skip_seqno + && may_need_wakeup) { + *may_need_wakeup = true; + } + retval = 0; + break; + + case TXN_SUCCESS: + /* Possibly some work on the database was deferred because no + * further transaction could proceed. Wake up again. */ + retval = 1; + loop->cur_cfg = loop->next_cfg; + if (may_need_wakeup) { + *may_need_wakeup = true; + } + break; + + case TXN_UNCHANGED: + retval = 1; + loop->cur_cfg = loop->next_cfg; + break; + + case TXN_ABORTED: + case TXN_NOT_LOCKED: + case TXN_ERROR: + retval = 0; + break; + + case TXN_UNCOMMITTED: + case TXN_INCOMPLETE: + default: + OVS_NOT_REACHED(); + } + ovsdb_idl_txn_destroy(txn); + loop->committing_txn = NULL; + } else { + retval = -1; + } + + return retval; +} + /* Attempts to commit the current transaction, if one is open, and sets up the * poll loop to wake up when some more work might be needed. * @@ -5498,57 +5587,11 @@ ovsdb_idl_loop_commit_and_wait(struct ovsdb_idl_loop *loop) loop->precommit_seqno = ovsdb_idl_get_seqno(loop->idl); } - struct ovsdb_idl_txn *txn = loop->committing_txn; - int retval; - if (txn) { - enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(txn); - if (status != TXN_INCOMPLETE) { - switch (status) { - case TXN_TRY_AGAIN: - /* We want to re-evaluate the database when it's changed from - * the contents that it had when we started the commit. (That - * might have already happened.) */ - loop->skip_seqno = loop->precommit_seqno; - if (ovsdb_idl_get_seqno(loop->idl) != loop->skip_seqno) { - poll_immediate_wake(); - } - retval = 0; - break; - - case TXN_SUCCESS: - /* Possibly some work on the database was deferred because no - * further transaction could proceed. Wake up again. */ - retval = 1; - loop->cur_cfg = loop->next_cfg; - poll_immediate_wake(); - break; - - case TXN_UNCHANGED: - retval = 1; - loop->cur_cfg = loop->next_cfg; - break; - - case TXN_ABORTED: - case TXN_NOT_LOCKED: - case TXN_ERROR: - retval = 0; - break; - - case TXN_UNCOMMITTED: - case TXN_INCOMPLETE: - default: - OVS_NOT_REACHED(); - } - ovsdb_idl_txn_destroy(txn); - loop->committing_txn = NULL; - } else { - retval = -1; - } - } else { - /* Not a meaningful return value: no transaction was in progress. */ - retval = 1; + bool may_need_wakeup = false; + int retval = ovsdb_idl_try_commit_loop_txn(loop, &may_need_wakeup); + if (may_need_wakeup) { + poll_immediate_wake(); } - ovsdb_idl_wait(loop->idl); return retval; -- GitLab From 29b1dd934f8d0c4cf3d58abc2c10aa9d0ae68277 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Fri, 15 May 2020 00:17:47 -0700 Subject: [PATCH 165/432] odp-util.c: Fix dp_hash execution with slowpath actions. When dp_hash is executed with slowpath actions, it results in endless recirc loop in kernel datapath, and finally drops the packet, with kernel logs: openvswitch: ovs-system: deferred action limit reached, drop recirc action The root cause is that the dp_hash value calculated by slowpath is not passed to datapath when executing the recirc action, thus when the recirced packet miss upcall comes to userspace again, it generates the dp_hash and recirc action again, with same recirc_id, which in turn generates a megaflow with recirc action with the recird_id same as the recirc_id in its match condition, which causes a loop in datapath. For example, this can be reproduced with below setup of OVN environment: LS1 LS2 | | |------R1------| VIF--LS0---R0-----| |------R3 |------R2------| Assume there is a route from the VIF to R3: R0 -> R1 -> R3, and there are two routes (ECMP) from R3 to the VIF: R3 -> R1 -> R0 R3 -> R2 -> R0 Now if we ping from the VIF to R3, the OVS flow execution on the HV of the VIF will hit the R3's datapath which has flows that responds to the ICMP packet by setting ICMP fields, which requires slowpath actions, and in later flow tables it will hit the "group" action that selects between the ECMP routes. By default OVN uses "dp_hash" method for the "group" action. For the first miss upcall packet, dp_hash value is empty, so the group action will be translated to "dp_hash" and "recirc". During action execution, because of the previous actions that sets ICMP fields, the whole execution requires slowpath, so it tries to execute all actions in userspace in odp_execute_actions(), including dp_hash action, except the recirc action, which can only be executed in datapath. So the dp_hash value is calculated in userspace, and then the packet is injected to datapath for recirc action execution. However, the dp_hash calculated by the userspace is not passed to datapath. Because of this, the packet recirc in datapath doesn't have dp_hash value, and the miss upcall for the recirced packet hits the same flow tables and triggers same "dp_hash" and "recirc" action again, with exactly same recirc_id! This time, the new upcall doesn't require any slowpath execution, so both the dp_hash and recirc actions are executed in datapath, after creating a datapath megaflow like: recirc_id(XYZ),..., actions:hash(l4(0)),recirc(XYZ) with match recirc_id equals the recirc id in the action, thus creating a loop. This patch fixes the problem by passing the calculated dp_hash value to datapath in odp_key_from_dp_packet(). Fixes: 572f732ab078 ("dpif-netdev: user space datapath recirculation") Signed-off-by: Han Zhou Signed-off-by: Ben Pfaff --- lib/odp-util.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/odp-util.c b/lib/odp-util.c index 9b31244c2..e907804aa 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -6393,6 +6393,10 @@ odp_key_from_dp_packet(struct ofpbuf *buf, const struct dp_packet *packet) nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority); + if (md->dp_hash) { + nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, md->dp_hash); + } + if (flow_tnl_dst_is_set(&md->tunnel)) { tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL, NULL); } -- GitLab From d072d2de011b5874e16a0fe81953c2448658746a Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Fri, 10 Jan 2020 10:34:43 +0100 Subject: [PATCH 166/432] ofproto-dpif-trace: Improve NAT tracing. When ofproto/trace detects a recirc action it resumes execution at the specified next table. However, if the ct action performs SNAT/DNAT, e.g., ct(commit,nat(src=1.1.1.1:4000),table=42), the src/dst IPs and ports in the oftrace_recirc_node->flow field are not updated. This leads to misleading outputs from ofproto/trace as real packets would actually first get NATed and might match different flows when recirculated. Assume the first IP/port from the NAT src/dst action will be used by conntrack for the translation and update the oftrace_recirc_node->flow accordingly. This is not entirely correct as conntrack might choose a different IP/port but the result is more realistic than before. This fix covers new connections. However, for reply traffic that executes actions of the form ct(nat, table=42) we still don't update the flow as we don't have any information about conntrack state when tracing. Also move the oftrace_recirc_node processing out of ofproto_trace() and to its own function, ofproto_trace_recirc_node() for better readability/ Signed-off-by: Dumitru Ceara Signed-off-by: Ben Pfaff --- ofproto/ofproto-dpif-trace.c | 114 +++++++++++++++++++++++++++-------- ofproto/ofproto-dpif-trace.h | 2 + ofproto/ofproto-dpif-xlate.c | 6 +- tests/ofproto-dpif.at | 36 +++++++++++ 4 files changed, 131 insertions(+), 27 deletions(-) diff --git a/ofproto/ofproto-dpif-trace.c b/ofproto/ofproto-dpif-trace.c index 8ae8a221a..78a54c715 100644 --- a/ofproto/ofproto-dpif-trace.c +++ b/ofproto/ofproto-dpif-trace.c @@ -86,6 +86,7 @@ oftrace_node_destroy(struct oftrace_node *node) bool oftrace_add_recirc_node(struct ovs_list *recirc_queue, enum oftrace_recirc_type type, const struct flow *flow, + const struct ofpact_nat *ofn, const struct dp_packet *packet, uint32_t recirc_id, const uint16_t zone) { @@ -101,6 +102,7 @@ oftrace_add_recirc_node(struct ovs_list *recirc_queue, node->flow = *flow; node->flow.recirc_id = recirc_id; node->flow.ct_zone = zone; + node->nat_act = ofn; node->packet = packet ? dp_packet_clone(packet) : NULL; return true; @@ -179,6 +181,25 @@ oftrace_node_print_details(struct ds *output, } } +static void +oftrace_print_ip_flow(const struct flow *flow, int af, struct ds *output) +{ + if (af == AF_INET) { + ds_put_format(output, "nw_src="IP_FMT",tp_src=%"PRIu16"," + "nw_dst="IP_FMT",tp_dst=%"PRIu16, + IP_ARGS(flow->nw_src), ntohs(flow->tp_src), + IP_ARGS(flow->nw_dst), ntohs(flow->tp_dst)); + } else if (af == AF_INET6) { + ds_put_cstr(output, "ipv6_src="); + ipv6_format_addr_bracket(&flow->ipv6_src, output, true); + ds_put_format(output, ",tp_src=%"PRIu16, ntohs(flow->tp_src)); + ds_put_cstr(output, ",ipv6_dst="); + ipv6_format_addr_bracket(&flow->ipv6_dst, output, true); + ds_put_format(output, ",tp_dst=%"PRIu16, ntohs(flow->tp_dst)); + } + ds_put_char(output, '\n'); +} + /* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following * forms are supported: * @@ -637,6 +658,73 @@ execute_actions_except_outputs(struct dpif *dpif, ofpbuf_uninit(&pruned_actions); } +static void +ofproto_trace_recirc_node(struct oftrace_recirc_node *node, + struct ovs_list *next_ct_states, + struct ds *output) +{ + ds_put_cstr(output, "\n\n"); + ds_put_char_multiple(output, '=', 79); + ds_put_format(output, "\nrecirc(%#"PRIx32")", node->recirc_id); + + if (next_ct_states && node->type == OFT_RECIRC_CONNTRACK) { + uint32_t ct_state; + if (ovs_list_is_empty(next_ct_states)) { + ct_state = CS_TRACKED | CS_NEW; + ds_put_cstr(output, " - resume conntrack with default " + "ct_state=trk|new (use --ct-next to customize)"); + } else { + ct_state = oftrace_pop_ct_state(next_ct_states); + struct ds s = DS_EMPTY_INITIALIZER; + format_flags(&s, ct_state_to_string, ct_state, '|'); + ds_put_format(output, " - resume conntrack with ct_state=%s", + ds_cstr(&s)); + ds_destroy(&s); + } + node->flow.ct_state = ct_state; + } + ds_put_char(output, '\n'); + + /* If there's any snat/dnat information assume we always translate to + * the first IP/port to make sure we don't match on incorrect flows later + * on. + */ + if (node->nat_act) { + const struct ofpact_nat *ofn = node->nat_act; + + ds_put_cstr(output, "Replacing src/dst IP/ports to simulate NAT:\n"); + ds_put_cstr(output, " Initial flow: "); + oftrace_print_ip_flow(&node->flow, ofn->range_af, output); + + if (ofn->flags & NX_NAT_F_SRC) { + if (ofn->range_af == AF_INET) { + node->flow.nw_src = ofn->range.addr.ipv4.min; + } else if (ofn->range_af == AF_INET6) { + node->flow.ipv6_src = ofn->range.addr.ipv6.min; + } + + if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) { + node->flow.tp_src = htons(ofn->range.proto.min); + } + } + if (ofn->flags & NX_NAT_F_DST) { + if (ofn->range_af == AF_INET) { + node->flow.nw_dst = ofn->range.addr.ipv4.min; + } else if (ofn->range_af == AF_INET6) { + node->flow.ipv6_dst = ofn->range.addr.ipv6.min; + } + + if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) { + node->flow.tp_dst = htons(ofn->range.proto.min); + } + } + ds_put_cstr(output, " Modified flow: "); + oftrace_print_ip_flow(&node->flow, ofn->range_af, output); + } + ds_put_char_multiple(output, '=', 79); + ds_put_cstr(output, "\n\n"); +} + static void ofproto_trace__(struct ofproto_dpif *ofproto, const struct flow *flow, const struct dp_packet *packet, struct ovs_list *recirc_queue, @@ -729,31 +817,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow, struct oftrace_recirc_node *recirc_node; LIST_FOR_EACH_POP (recirc_node, node, &recirc_queue) { - ds_put_cstr(output, "\n\n"); - ds_put_char_multiple(output, '=', 79); - ds_put_format(output, "\nrecirc(%#"PRIx32")", - recirc_node->recirc_id); - - if (next_ct_states && recirc_node->type == OFT_RECIRC_CONNTRACK) { - uint32_t ct_state; - if (ovs_list_is_empty(next_ct_states)) { - ct_state = CS_TRACKED | CS_NEW; - ds_put_cstr(output, " - resume conntrack with default " - "ct_state=trk|new (use --ct-next to customize)"); - } else { - ct_state = oftrace_pop_ct_state(next_ct_states); - struct ds s = DS_EMPTY_INITIALIZER; - format_flags(&s, ct_state_to_string, ct_state, '|'); - ds_put_format(output, " - resume conntrack with ct_state=%s", - ds_cstr(&s)); - ds_destroy(&s); - } - recirc_node->flow.ct_state = ct_state; - } - ds_put_char(output, '\n'); - ds_put_char_multiple(output, '=', 79); - ds_put_cstr(output, "\n\n"); - + ofproto_trace_recirc_node(recirc_node, next_ct_states, output); ofproto_trace__(ofproto, &recirc_node->flow, recirc_node->packet, &recirc_queue, ofpacts, ofpacts_len, output); oftrace_recirc_node_destroy(recirc_node); diff --git a/ofproto/ofproto-dpif-trace.h b/ofproto/ofproto-dpif-trace.h index 63dbb50ba..4b04f1756 100644 --- a/ofproto/ofproto-dpif-trace.h +++ b/ofproto/ofproto-dpif-trace.h @@ -73,6 +73,7 @@ struct oftrace_recirc_node { uint32_t recirc_id; struct flow flow; struct dp_packet *packet; + const struct ofpact_nat *nat_act; }; /* A node within a next_ct_states list. */ @@ -91,6 +92,7 @@ struct oftrace_node *oftrace_report(struct ovs_list *, enum oftrace_node_type, const char *text); bool oftrace_add_recirc_node(struct ovs_list *recirc_queue, enum oftrace_recirc_type, const struct flow *, + const struct ofpact_nat *, const struct dp_packet *, uint32_t recirc_id, const uint16_t zone); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 80fba84cb..e64c6d477 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -4999,7 +4999,8 @@ compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table, if (OVS_UNLIKELY(ctx->xin->trace) && recirc_id) { if (oftrace_add_recirc_node(ctx->xin->recirc_queue, OFT_RECIRC_CONNTRACK, &ctx->xin->flow, - ctx->xin->packet, recirc_id, zone)) { + ctx->ct_nat_action, ctx->xin->packet, + recirc_id, zone)) { xlate_report(ctx, OFT_DETAIL, "A clone of the packet is forked to " "recirculate. The forked pipeline will be resumed at " "table %u.", table); @@ -6205,7 +6206,6 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc); put_ct_helper(ctx, ctx->odp_actions, ofc); put_ct_nat(ctx); - ctx->ct_nat_action = NULL; nl_msg_end_nested(ctx->odp_actions, ct_offset); ctx->wc->masks.ct_mark = old_ct_mark_mask; @@ -6216,6 +6216,8 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc, compose_recirculate_and_fork(ctx, ofc->recirc_table, zone); } + ctx->ct_nat_action = NULL; + /* The ct_* fields are only available in the scope of the 'recirc_table' * call chain. */ flow_clear_conntrack(&ctx->xin->flow); diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 41164d735..a03a63ac0 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -10684,6 +10684,42 @@ AT_CHECK([tail -1 stdout], [0], OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif - nat - ofproto/trace]) +OVS_VSWITCHD_START + +add_of_ports br0 1 2 3 + +flow="in_port=1,udp,nw_src=1.1.1.1,nw_dst=1.1.1.2,udp_src=100,udp_dst=200" +AT_DATA([flows.txt], [dnl +table=0,priority=100,ip,nw_src=1.1.1.1,ct_state=-trk,action=ct(commit,nat(src=10.0.0.1-10.0.0.42:1000-1042),table=0) +table=0,priority=100,udp,ct_state=+trk,nw_src=10.0.0.1,nw_dst=1.1.1.2,tp_src=1000,tp_dst=200,action=ct(commit,nat(dst=20.0.0.1-20.0.0.42:2000-2042),table=0) +table=0,priority=100,udp,ct_state=+trk,nw_src=10.0.0.1,nw_dst=20.0.0.1,tp_src=1000,tp_dst=2000,action=3 +table=0,priority=90,ip,ct_state=+trk,action=2 +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl ofproto/trace br0 "$flow"], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: 3 +]) + +flow="in_port=1,udp6,ipv6_src=1::1,ipv6_dst=1::2,udp_src=100,udp_dst=200" +AT_DATA([flows.txt], [dnl +table=0,priority=100,ip6,ipv6_src=1::1,ct_state=-trk,action=ct(commit,nat(src=[[10::1]]-[[10::42]]:1000-1042),table=0) +table=0,priority=100,udp6,ct_state=+trk,ipv6_src=10::1,ipv6_dst=1::2,tp_src=1000,tp_dst=200,action=ct(commit,nat(dst=[[20::1]]-[[20::42]]:2000-2042),table=0) +table=0,priority=100,udp6,ct_state=+trk,ipv6_src=10::1,ipv6_dst=20::1,tp_src=1000,tp_dst=2000,action=3 +table=0,priority=90,ip6,ct_state=+trk,action=2 +]) +AT_CHECK([ovs-ofctl del-flows br0]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl ofproto/trace br0 "$flow"], [0], [stdout]) +AT_CHECK([tail -1 stdout], [0], + [Datapath actions: 3 +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([ofproto - set mtu]) OVS_VSWITCHD_START -- GitLab From f0aed51ca70f2f7ff38b292b6aaf34b27ebd9575 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 9 Jun 2020 10:42:12 -0700 Subject: [PATCH 167/432] docs: Add note for AF_XDP installation Add notes about some configuration issues when enabling AF_XDP support. Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- Documentation/intro/install/afxdp.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index 99003e4db..3c8f78825 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -146,11 +146,20 @@ Make sure the libbpf.so is installed correctly:: ldconfig ldconfig -p | grep libbpf +.. note:: + Check /etc/ld.so.conf if libbpf is installed but can not be found by + ldconfig. + Third, ensure the standard OVS requirements are installed and bootstrap/configure the package:: ./boot.sh && ./configure --enable-afxdp +.. note:: + If you encounter "WARNING: bpf/libbpf.h: present but cannot be compiled", + check the Linux headers are in line with libbpf. For example, in Ubuntu, + check the installed linux-headers* and linux-libc-dev* dpkg. + Finally, build and install OVS:: make && make install -- GitLab From fecb28051b357c636d7a5fed8ad5bb9add4f5b44 Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 17 Jun 2020 08:38:40 -0700 Subject: [PATCH 168/432] rhel: Support RHEL 7.8 kernel module rpm build. Add support for RHEL7.8 GA release with kernel 3.10.0-1127. VMware-BZ: #2582834 Acked-by: Yi-Hung Wei Signed-off-by: William Tu --- rhel/openvswitch-kmod-fedora.spec.in | 5 +++-- rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh | 9 ++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/rhel/openvswitch-kmod-fedora.spec.in b/rhel/openvswitch-kmod-fedora.spec.in index c94f2f535..15eec6d4c 100644 --- a/rhel/openvswitch-kmod-fedora.spec.in +++ b/rhel/openvswitch-kmod-fedora.spec.in @@ -17,7 +17,8 @@ # - 3.10.0 major revision 693 (RHEL 7.4) # - 3.10.0 major revision 957 (RHEL 7.6) # - 3.10.0 major revision 1062 (RHEL 7.7) -# - 3.10.0 major revision 1101 (RHEL 7.8) +# - 3.10.0 major revision 1101 (RHEL 7.8 Beta) +# - 3.10.0 major revision 1127 (RHEL 7.8 GA) # By default, build against the current running kernel version #%define kernel 3.1.5-1.fc16.x86_64 #define kernel %{kernel_source} @@ -97,7 +98,7 @@ if grep -qs "suse" /etc/os-release; then elif [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ] && { [ "$major_rev" = "327" ] || [ "$major_rev" = "693" ] || \ [ "$major_rev" = "957" ] || [ "$major_rev" == "1062" ] || \ - [ "$major_rev" = "1101" ]; }; then + [ "$major_rev" = "1101" ] || [ "$major_rev" = "1127" ] ; }; then # For RHEL 7.2, 7.4, 7.6, 7.7, and 7.8 if [ -x "%{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh" ]; then %{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh diff --git a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh index a9b5cdd81..93d487101 100644 --- a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh +++ b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh @@ -19,7 +19,8 @@ # - 3.10.0 major revision 693 (RHEL 7.4) # - 3.10.0 major revision 957 (RHEL 7.6) # - 3.10.0 major revision 1062 (RHEL 7.7) -# - 3.10.0 major revision 1101 (RHEL 7.8) +# - 3.10.0 major revision 1101 (RHEL 7.8 Beta) +# - 3.10.0 major revision 1127 (RHEL 7.8 GA) # - 4.4.x, x >= 73 (SLES 12 SP3) # - 4.12.x, x >= 14 (SLES 12 SP4). # It is packaged in the openvswitch kmod RPM and run in the post-install @@ -108,6 +109,12 @@ if [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ]; then ver_offset=4 installed_ver="$minor_rev" elif [ "$major_rev" = "1101" ]; then +# echo "rhel78" + comp_ver=10 + ver_offset=4 + installed_ver="$minor_rev" + fi + elif [ "$major_rev" = "1127" ]; then # echo "rhel78" comp_ver=10 ver_offset=4 -- GitLab From 1fe42975639854bc6cf4784b2554b438301c0b92 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 16 Jun 2020 16:03:57 +0300 Subject: [PATCH 169/432] netdev-offload-tc: Revert tunnel src/dst port masks handling The cited commit intended to add tc support for masking tunnel src/dst ips and ports. It's not possible to do tunnel ports masking with openflow rules and the default mask for tunnel ports set to 0 in tnl_wc_init(), unlike tunnel ports default mask which is full mask. So instead of never passing tunnel ports to tc, revert the changes to tunnel ports to always pass the tunnel port. In sw classification is done by the kernel, but for hw we must match the tunnel dst port. Fixes: 5f568d049130 ("netdev-offload-tc: Allow to match the IP and port mask of tunnel") Signed-off-by: Roi Dayan Reviewed-by: Eli Britstein Signed-off-by: Simon Horman --- NEWS | 2 -- include/openvswitch/match.h | 3 --- lib/match.c | 13 ------------- lib/netdev-offload-tc.c | 13 ++----------- lib/tc.c | 28 ++-------------------------- tests/tunnel.at | 4 ++-- 6 files changed, 6 insertions(+), 57 deletions(-) diff --git a/NEWS b/NEWS index 88b273a0a..22cacda20 100644 --- a/NEWS +++ b/NEWS @@ -19,8 +19,6 @@ Post-v2.13.0 - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. - * Tunnel Local endpoint ports masked match are supported. - * Tunnel Romte endpoint ports masked match are supported. v2.13.0 - 14 Feb 2020 diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h index 9e480318e..2e8812048 100644 --- a/include/openvswitch/match.h +++ b/include/openvswitch/match.h @@ -105,9 +105,6 @@ void match_set_tun_flags(struct match *match, uint16_t flags); void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t mask); void match_set_tun_tp_dst(struct match *match, ovs_be16 tp_dst); void match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 mask); -void match_set_tun_tp_src(struct match *match, ovs_be16 tp_src); -void match_set_tun_tp_src_masked(struct match *match, - ovs_be16 port, ovs_be16 mask); void match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, ovs_be16 mask); void match_set_tun_gbp_id(struct match *match, ovs_be16 gbp_id); void match_set_tun_gbp_flags_masked(struct match *match, uint8_t flags, uint8_t mask); diff --git a/lib/match.c b/lib/match.c index a77554851..ba716579d 100644 --- a/lib/match.c +++ b/lib/match.c @@ -293,19 +293,6 @@ match_set_tun_tp_dst_masked(struct match *match, ovs_be16 port, ovs_be16 mask) match->flow.tunnel.tp_dst = port & mask; } -void -match_set_tun_tp_src(struct match *match, ovs_be16 tp_src) -{ - match_set_tun_tp_src_masked(match, tp_src, OVS_BE16_MAX); -} - -void -match_set_tun_tp_src_masked(struct match *match, ovs_be16 port, ovs_be16 mask) -{ - match->wc.masks.tunnel.tp_src = mask; - match->flow.tunnel.tp_src = port & mask; -} - void match_set_tun_gbp_id_masked(struct match *match, ovs_be16 gbp_id, ovs_be16 mask) { diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index aa6d22e74..258d31f54 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -712,15 +712,8 @@ parse_tc_flower_to_match(struct tc_flower *flower, match_set_tun_ttl_masked(match, flower->key.tunnel.ttl, flower->mask.tunnel.ttl); } - if (flower->mask.tunnel.tp_dst) { - match_set_tun_tp_dst_masked(match, - flower->key.tunnel.tp_dst, - flower->mask.tunnel.tp_dst); - } - if (flower->mask.tunnel.tp_src) { - match_set_tun_tp_src_masked(match, - flower->key.tunnel.tp_src, - flower->mask.tunnel.tp_src); + if (flower->key.tunnel.tp_dst) { + match_set_tun_tp_dst(match, flower->key.tunnel.tp_dst); } if (flower->key.tunnel.metadata.present.len) { flower_tun_opt_to_match(match, flower); @@ -1470,8 +1463,6 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, flower.mask.tunnel.ipv6.ipv6_dst = tnl_mask->ipv6_dst; flower.mask.tunnel.tos = tnl_mask->ip_tos; flower.mask.tunnel.ttl = tnl_mask->ip_ttl; - flower.mask.tunnel.tp_src = tnl_mask->tp_src; - flower.mask.tunnel.tp_dst = tnl_mask->tp_dst; flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? tnl_mask->tun_id : 0; flower_match_to_tun_opt(&flower, tnl, tnl_mask); flower.tunnel = true; diff --git a/lib/tc.c b/lib/tc.c index 29b4328d8..c2ab77553 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -395,12 +395,6 @@ static const struct nl_policy tca_flower_policy[] = { .optional = true, }, [TCA_FLOWER_KEY_ENC_UDP_DST_PORT] = { .type = NL_A_U16, .optional = true, }, - [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT] = { .type = NL_A_U16, - .optional = true, }, - [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK] = { .type = NL_A_U16, - .optional = true, }, - [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK] = { .type = NL_A_U16, - .optional = true, }, [TCA_FLOWER_KEY_FLAGS] = { .type = NL_A_BE32, .optional = true, }, [TCA_FLOWER_KEY_FLAGS_MASK] = { .type = NL_A_BE32, .optional = true, }, [TCA_FLOWER_KEY_IP_TTL] = { .type = NL_A_U8, @@ -746,15 +740,7 @@ nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower) flower->key.tunnel.ipv6.ipv6_dst = nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST]); } - if (attrs[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]) { - flower->mask.tunnel.tp_src = - nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]); - flower->key.tunnel.tp_src = - nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]); - } - if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]) { - flower->mask.tunnel.tp_dst = - nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]); + if (attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]) { flower->key.tunnel.tp_dst = nl_attr_get_be16(attrs[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]); } @@ -2713,10 +2699,7 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) struct in6_addr *ipv6_dst_mask = &flower->mask.tunnel.ipv6.ipv6_dst; struct in6_addr *ipv6_src = &flower->key.tunnel.ipv6.ipv6_src; struct in6_addr *ipv6_dst = &flower->key.tunnel.ipv6.ipv6_dst; - ovs_be16 tp_dst_mask = flower->mask.tunnel.tp_dst; - ovs_be16 tp_src_mask = flower->mask.tunnel.tp_src; ovs_be16 tp_dst = flower->key.tunnel.tp_dst; - ovs_be16 tp_src = flower->key.tunnel.tp_src; ovs_be32 id = be64_to_be32(flower->key.tunnel.id); uint8_t tos = flower->key.tunnel.tos; uint8_t ttl = flower->key.tunnel.ttl; @@ -2748,16 +2731,9 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TTL, ttl); nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TTL_MASK, ttl_mask); } - if (tp_dst_mask) { - nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK, - tp_dst_mask); + if (tp_dst) { nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_DST_PORT, tp_dst); } - if (tp_src_mask) { - nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK, - tp_src_mask); - nl_msg_put_be16(request, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT, tp_src); - } if (id_mask) { nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_KEY_ID, id); } diff --git a/tests/tunnel.at b/tests/tunnel.at index a74a67aa8..e08fd1e04 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -123,10 +123,10 @@ AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl p2 2/2: (dummy) ]) -AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1/0xf,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) +AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"]) AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl -tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1/0xf,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 +tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2 ]) OVS_VSWITCHD_STOP -- GitLab From 9df65060cf4c27553ee5e29f74ef6807dd5af992 Mon Sep 17 00:00:00 2001 From: Vishal Deep Ajmera Date: Fri, 22 May 2020 10:50:05 +0200 Subject: [PATCH 170/432] userspace: Avoid dp_hash recirculation for balance-tcp bond mode. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: In OVS, flows with output over a bond interface of type “balance-tcp” gets translated by the ofproto layer into "HASH" and "RECIRC" datapath actions. After recirculation, the packet is forwarded to the bond member port based on 8-bits of the datapath hash value computed through dp_hash. This causes performance degradation in the following ways: 1. The recirculation of the packet implies another lookup of the packet’s flow key in the exact match cache (EMC) and potentially Megaflow classifier (DPCLS). This is the biggest cost factor. 2. The recirculated packets have a new “RSS” hash and compete with the original packets for the scarce number of EMC slots. This implies more EMC misses and potentially EMC thrashing causing costly DPCLS lookups. 3. The 256 extra megaflow entries per bond for dp_hash bond selection put additional load on the revalidation threads. Owing to this performance degradation, deployments stick to “balance-slb” bond mode even though it does not do active-active load balancing for VXLAN- and GRE-tunnelled traffic because all tunnel packet have the same source MAC address. Proposed optimization: This proposal introduces a new load-balancing output action instead of recirculation. Maintain one table per-bond (could just be an array of uint16's) and program it the same way internal flows are created today for each possible hash value (256 entries) from ofproto layer. Use this table to load-balance flows as part of output action processing. Currently xlate_normal() -> output_normal() -> bond_update_post_recirc_rules() -> bond_may_recirc() and compose_output_action__() generate 'dp_hash(hash_l4(0))' and 'recirc()' actions. In this case the RecircID identifies the bond. For the recirculated packets the ofproto layer installs megaflow entries that match on RecircID and masked dp_hash and send them to the corresponding output port. Instead, we will now generate action as 'lb_output()' This combines hash computation (only if needed, else re-use RSS hash) and inline load-balancing over the bond. This action is used *only* for balance-tcp bonds in userspace datapath (the OVS kernel datapath remains unchanged). Example: Current scheme: With 8 UDP flows (with random UDP src port): flow-dump from pmd on cpu core: 2 recirc_id(0),in_port(7),<...> actions:hash(hash_l4(0)),recirc(0x1) recirc_id(0x1),dp_hash(0xf8e02b7e/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb236c260/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x7d89eb18/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0xa78d75df/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0xb58d846f/0xff),<...> actions:2 recirc_id(0x1),dp_hash(0x24534406/0xff),<...> actions:1 recirc_id(0x1),dp_hash(0x3cf32550/0xff),<...> actions:1 New scheme: We can do with a single flow entry (for any number of new flows): in_port(7),<...> actions:lb_output(1) A new CLI has been added to dump datapath bond cache as given below. # ovs-appctl dpif-netdev/bond-show [dp] Bond cache: bond-id 1 : bucket 0 - slave 2 bucket 1 - slave 1 bucket 2 - slave 2 bucket 3 - slave 1 Co-authored-by: Manohar Krishnappa Chidambaraswamy Signed-off-by: Manohar Krishnappa Chidambaraswamy Signed-off-by: Vishal Deep Ajmera Tested-by: Matteo Croce Tested-by: Adrian Moreno Acked-by: Eelco Chaudron Signed-off-by: Ilya Maximets --- NEWS | 3 + .../linux/compat/include/linux/openvswitch.h | 1 + lib/dpif-netdev.c | 425 ++++++++++++++++-- lib/dpif-netlink.c | 3 + lib/dpif-provider.h | 12 + lib/dpif.c | 39 ++ lib/dpif.h | 12 + lib/odp-execute.c | 2 + lib/odp-util.c | 14 + ofproto/bond.c | 111 ++++- ofproto/bond.h | 5 + ofproto/ofproto-dpif-ipfix.c | 1 + ofproto/ofproto-dpif-sflow.c | 3 +- ofproto/ofproto-dpif-xlate.c | 15 +- ofproto/ofproto-dpif.c | 30 ++ ofproto/ofproto-dpif.h | 10 +- tests/lacp.at | 9 + tests/odp.at | 1 + vswitchd/bridge.c | 5 + vswitchd/vswitch.xml | 22 + 20 files changed, 657 insertions(+), 66 deletions(-) diff --git a/NEWS b/NEWS index 22cacda20..309c00f6b 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,9 @@ Post-v2.13.0 by enabling interrupt mode. - Userspace datapath: * Add support for conntrack zone-based timeout policy. + * New configuration knob 'other_config:lb-output-action' for bond ports + that enables new datapath action 'lb_output' to avoid recirculation + in balance-tcp mode. Disabled by default. - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index f7c3b2e99..cc41bbea4 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -1023,6 +1023,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/ OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */ OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */ + OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */ #endif __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 51c888501..1086efd47 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -111,6 +111,7 @@ COVERAGE_DEFINE(datapath_drop_tunnel_push_error); COVERAGE_DEFINE(datapath_drop_tunnel_pop_error); COVERAGE_DEFINE(datapath_drop_recirc_error); COVERAGE_DEFINE(datapath_drop_invalid_port); +COVERAGE_DEFINE(datapath_drop_invalid_bond); COVERAGE_DEFINE(datapath_drop_invalid_tnl_port); COVERAGE_DEFINE(datapath_drop_rx_invalid_packet); @@ -310,6 +311,7 @@ struct pmd_auto_lb { * * dp_netdev_mutex (global) * port_mutex + * bond_mutex * non_pmd_mutex */ struct dp_netdev { @@ -377,6 +379,10 @@ struct dp_netdev { struct conntrack *conntrack; struct pmd_auto_lb pmd_alb; + + /* Bonds. */ + struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. */ + struct cmap tx_bonds; /* Contains 'struct tx_bond'. */ }; static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id) @@ -608,6 +614,20 @@ struct tx_port { struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST]; }; +/* Contained by struct tx_bond 'slave_buckets'. */ +struct slave_entry { + odp_port_t slave_id; + atomic_ullong n_packets; + atomic_ullong n_bytes; +}; + +/* Contained by struct dp_netdev_pmd_thread's 'tx_bonds'. */ +struct tx_bond { + struct cmap_node node; + uint32_t bond_id; + struct slave_entry slave_buckets[BOND_BUCKETS]; +}; + /* A set of properties for the current processing loop that is not directly * associated with the pmd thread itself, but with the packets being * processed or the short-term system configuration (for example, time). @@ -740,6 +760,11 @@ struct dp_netdev_pmd_thread { * read by the pmd thread. */ struct hmap tx_ports OVS_GUARDED; + struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. */ + /* Map of 'tx_bond's used for transmission. Written by the main thread + * and read by the pmd thread. */ + struct cmap tx_bonds; + /* These are thread-local copies of 'tx_ports'. One contains only tunnel * ports (that support push_tunnel/pop_tunnel), the other contains ports * with at least one txq (that support send). A port can be in both. @@ -831,6 +856,12 @@ static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd, static int dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd, bool force); +static void dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, + struct tx_bond *bond, bool update) + OVS_EXCLUDED(pmd->bond_mutex); +static void dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd, + uint32_t bond_id) + OVS_EXCLUDED(pmd->bond_mutex); static void reconfigure_datapath(struct dp_netdev *dp) OVS_REQUIRES(dp->port_mutex); @@ -1397,6 +1428,49 @@ pmd_perf_show_cmd(struct unixctl_conn *conn, int argc, par.command_type = PMD_INFO_PERF_SHOW; dpif_netdev_pmd_info(conn, argc, argv, &par); } + +static void +dpif_netdev_bond_show(struct unixctl_conn *conn, int argc, + const char *argv[], void *aux OVS_UNUSED) +{ + struct ds reply = DS_EMPTY_INITIALIZER; + struct dp_netdev *dp = NULL; + + ovs_mutex_lock(&dp_netdev_mutex); + if (argc == 2) { + dp = shash_find_data(&dp_netdevs, argv[1]); + } else if (shash_count(&dp_netdevs) == 1) { + /* There's only one datapath. */ + dp = shash_first(&dp_netdevs)->data; + } + if (!dp) { + ovs_mutex_unlock(&dp_netdev_mutex); + unixctl_command_reply_error(conn, + "please specify an existing datapath"); + return; + } + + if (cmap_count(&dp->tx_bonds) > 0) { + struct tx_bond *dp_bond_entry; + uint32_t slave_id; + + ds_put_cstr(&reply, "Bonds:\n"); + CMAP_FOR_EACH (dp_bond_entry, node, &dp->tx_bonds) { + ds_put_format(&reply, " bond-id %"PRIu32":\n", + dp_bond_entry->bond_id); + for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) { + slave_id = + odp_to_u32(dp_bond_entry->slave_buckets[bucket].slave_id); + ds_put_format(&reply, " bucket %d - slave %"PRIu32"\n", + bucket, slave_id); + } + } + } + ovs_mutex_unlock(&dp_netdev_mutex); + unixctl_command_reply(conn, ds_cstr(&reply)); + ds_destroy(&reply); +} + static int dpif_netdev_init(void) @@ -1428,6 +1502,9 @@ dpif_netdev_init(void) "[-us usec] [-q qlen]", 0, 10, pmd_perf_log_set_cmd, NULL); + unixctl_command_register("dpif-netdev/bond-show", "[dp]", + 0, 1, dpif_netdev_bond_show, + NULL); return 0; } @@ -1552,6 +1629,9 @@ create_dp_netdev(const char *name, const struct dpif_class *class, ovs_mutex_init_recursive(&dp->port_mutex); hmap_init(&dp->ports); dp->port_seq = seq_create(); + ovs_mutex_init(&dp->bond_mutex); + cmap_init(&dp->tx_bonds); + fat_rwlock_init(&dp->upcall_rwlock); dp->reconfigure_seq = seq_create(); @@ -1658,6 +1738,12 @@ dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id) } } +static uint32_t +hash_bond_id(uint32_t bond_id) +{ + return hash_int(bond_id, 0); +} + /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' * through the 'dp_netdevs' shash while freeing 'dp'. */ static void @@ -1665,6 +1751,7 @@ dp_netdev_free(struct dp_netdev *dp) OVS_REQUIRES(dp_netdev_mutex) { struct dp_netdev_port *port, *next; + struct tx_bond *bond; shash_find_and_delete(&dp_netdevs, dp->name); @@ -1674,6 +1761,13 @@ dp_netdev_free(struct dp_netdev *dp) } ovs_mutex_unlock(&dp->port_mutex); + ovs_mutex_lock(&dp->bond_mutex); + CMAP_FOR_EACH (bond, node, &dp->tx_bonds) { + cmap_remove(&dp->tx_bonds, &bond->node, hash_bond_id(bond->bond_id)); + ovsrcu_postpone(free, bond); + } + ovs_mutex_unlock(&dp->bond_mutex); + dp_netdev_destroy_all_pmds(dp, true); cmap_destroy(&dp->poll_threads); @@ -1692,6 +1786,9 @@ dp_netdev_free(struct dp_netdev *dp) hmap_destroy(&dp->ports); ovs_mutex_destroy(&dp->port_mutex); + cmap_destroy(&dp->tx_bonds); + ovs_mutex_destroy(&dp->bond_mutex); + /* Upcalls must be disabled at this point */ dp_netdev_destroy_upcall_lock(dp); @@ -4423,6 +4520,20 @@ tx_port_lookup(const struct hmap *hmap, odp_port_t port_no) return NULL; } +static struct tx_bond * +tx_bond_lookup(const struct cmap *tx_bonds, uint32_t bond_id) +{ + uint32_t hash = hash_bond_id(bond_id); + struct tx_bond *tx; + + CMAP_FOR_EACH_WITH_HASH (tx, node, hash, tx_bonds) { + if (tx->bond_id == bond_id) { + return tx; + } + } + return NULL; +} + static int port_reconfigure(struct dp_netdev_port *port) { @@ -5070,14 +5181,22 @@ reconfigure_datapath(struct dp_netdev *dp) } } - /* Add every port to the tx cache of every pmd thread, if it's not - * there already and if this pmd has at least one rxq to poll. */ + /* Add every port and bond to the tx port and bond caches of + * every pmd thread, if it's not there already and if this pmd + * has at least one rxq to poll. + */ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { ovs_mutex_lock(&pmd->port_mutex); if (hmap_count(&pmd->poll_list) || pmd->core_id == NON_PMD_CORE_ID) { + struct tx_bond *bond; + HMAP_FOR_EACH (port, node, &dp->ports) { dp_netdev_add_port_tx_to_pmd(pmd, port); } + + CMAP_FOR_EACH (bond, node, &dp->tx_bonds) { + dp_netdev_add_bond_tx_to_pmd(pmd, bond, false); + } } ovs_mutex_unlock(&pmd->port_mutex); } @@ -6125,6 +6244,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, atomic_init(&pmd->reload, false); ovs_mutex_init(&pmd->flow_mutex); ovs_mutex_init(&pmd->port_mutex); + ovs_mutex_init(&pmd->bond_mutex); cmap_init(&pmd->flow_table); cmap_init(&pmd->classifiers); pmd->ctx.last_rxq = NULL; @@ -6135,6 +6255,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, hmap_init(&pmd->tx_ports); hmap_init(&pmd->tnl_port_cache); hmap_init(&pmd->send_port_cache); + cmap_init(&pmd->tx_bonds); /* init the 'flow_cache' since there is no * actual thread created for NON_PMD_CORE_ID. */ if (core_id == NON_PMD_CORE_ID) { @@ -6155,6 +6276,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) hmap_destroy(&pmd->send_port_cache); hmap_destroy(&pmd->tnl_port_cache); hmap_destroy(&pmd->tx_ports); + cmap_destroy(&pmd->tx_bonds); hmap_destroy(&pmd->poll_list); /* All flows (including their dpcls_rules) have been deleted already */ CMAP_FOR_EACH (cls, node, &pmd->classifiers) { @@ -6166,6 +6288,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd) ovs_mutex_destroy(&pmd->flow_mutex); seq_destroy(pmd->reload_seq); ovs_mutex_destroy(&pmd->port_mutex); + ovs_mutex_destroy(&pmd->bond_mutex); free(pmd); } @@ -6235,6 +6358,7 @@ dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd) { struct rxq_poll *poll; struct tx_port *port; + struct tx_bond *tx; ovs_mutex_lock(&pmd->port_mutex); HMAP_FOR_EACH_POP (poll, node, &pmd->poll_list) { @@ -6244,6 +6368,13 @@ dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd) free(port); } ovs_mutex_unlock(&pmd->port_mutex); + + ovs_mutex_lock(&pmd->bond_mutex); + CMAP_FOR_EACH (tx, node, &pmd->tx_bonds) { + cmap_remove(&pmd->tx_bonds, &tx->node, hash_bond_id(tx->bond_id)); + ovsrcu_postpone(free, tx); + } + ovs_mutex_unlock(&pmd->bond_mutex); } /* Adds rx queue to poll_list of PMD thread, if it's not there already. */ @@ -6319,6 +6450,62 @@ dp_netdev_del_port_tx_from_pmd(struct dp_netdev_pmd_thread *pmd, free(tx); pmd->need_reload = true; } + +/* Add bond to the tx bond cmap of 'pmd'. */ +static void +dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, + struct tx_bond *bond, bool update) + OVS_EXCLUDED(pmd->bond_mutex) +{ + struct tx_bond *tx; + + ovs_mutex_lock(&pmd->bond_mutex); + tx = tx_bond_lookup(&pmd->tx_bonds, bond->bond_id); + + if (tx && !update) { + /* It's not an update and the entry already exists. Do nothing. */ + goto unlock; + } + + if (tx) { + struct tx_bond *new_tx = xmemdup(bond, sizeof *bond); + + /* Copy the stats for each bucket. */ + for (int i = 0; i < BOND_BUCKETS; i++) { + uint64_t n_packets, n_bytes; + + atomic_read_relaxed(&tx->slave_buckets[i].n_packets, &n_packets); + atomic_read_relaxed(&tx->slave_buckets[i].n_bytes, &n_bytes); + atomic_init(&new_tx->slave_buckets[i].n_packets, n_packets); + atomic_init(&new_tx->slave_buckets[i].n_bytes, n_bytes); + } + cmap_replace(&pmd->tx_bonds, &tx->node, &new_tx->node, + hash_bond_id(bond->bond_id)); + ovsrcu_postpone(free, tx); + } else { + tx = xmemdup(bond, sizeof *bond); + cmap_insert(&pmd->tx_bonds, &tx->node, hash_bond_id(bond->bond_id)); + } +unlock: + ovs_mutex_unlock(&pmd->bond_mutex); +} + +/* Delete bond from the tx bond cmap of 'pmd'. */ +static void +dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd, + uint32_t bond_id) + OVS_EXCLUDED(pmd->bond_mutex) +{ + struct tx_bond *tx; + + ovs_mutex_lock(&pmd->bond_mutex); + tx = tx_bond_lookup(&pmd->tx_bonds, bond_id); + if (tx) { + cmap_remove(&pmd->tx_bonds, &tx->node, hash_bond_id(tx->bond_id)); + ovsrcu_postpone(free, tx); + } + ovs_mutex_unlock(&pmd->bond_mutex); +} static char * dpif_netdev_get_datapath_version(void) @@ -7144,6 +7331,96 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd, } } +static bool +dp_execute_output_action(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets_, + bool should_steal, odp_port_t port_no) +{ + struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no); + struct dp_packet_batch out; + + if (!OVS_LIKELY(p)) { + COVERAGE_ADD(datapath_drop_invalid_port, + dp_packet_batch_size(packets_)); + dp_packet_delete_batch(packets_, should_steal); + return false; + } + if (!should_steal) { + dp_packet_batch_clone(&out, packets_); + dp_packet_batch_reset_cutlen(packets_); + packets_ = &out; + } + dp_packet_batch_apply_cutlen(packets_); +#ifdef DPDK_NETDEV + if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts) + && packets_->packets[0]->source + != p->output_pkts.packets[0]->source)) { + /* XXX: netdev-dpdk assumes that all packets in a single + * output batch has the same source. Flush here to + * avoid memory access issues. */ + dp_netdev_pmd_flush_output_on_port(pmd, p); + } +#endif + if (dp_packet_batch_size(&p->output_pkts) + + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) { + /* Flush here to avoid overflow. */ + dp_netdev_pmd_flush_output_on_port(pmd, p); + } + if (dp_packet_batch_is_empty(&p->output_pkts)) { + pmd->n_output_batches++; + } + + struct dp_packet *packet; + DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) { + p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] = + pmd->ctx.last_rxq; + dp_packet_batch_add(&p->output_pkts, packet); + } + return true; +} + +static void +dp_execute_lb_output_action(struct dp_netdev_pmd_thread *pmd, + struct dp_packet_batch *packets_, + bool should_steal, uint32_t bond) +{ + struct tx_bond *p_bond = tx_bond_lookup(&pmd->tx_bonds, bond); + struct dp_packet_batch out; + struct dp_packet *packet; + + if (!p_bond) { + COVERAGE_ADD(datapath_drop_invalid_bond, + dp_packet_batch_size(packets_)); + dp_packet_delete_batch(packets_, should_steal); + return; + } + if (!should_steal) { + dp_packet_batch_clone(&out, packets_); + dp_packet_batch_reset_cutlen(packets_); + packets_ = &out; + } + dp_packet_batch_apply_cutlen(packets_); + + DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) { + /* + * Lookup the bond-hash table using hash to get the slave. + */ + uint32_t hash = dp_packet_get_rss_hash(packet); + struct slave_entry *s_entry = &p_bond->slave_buckets[hash & BOND_MASK]; + odp_port_t bond_member = s_entry->slave_id; + uint32_t size = dp_packet_size(packet); + struct dp_packet_batch output_pkt; + + dp_packet_batch_init_packet(&output_pkt, packet); + if (OVS_LIKELY(dp_execute_output_action(pmd, &output_pkt, true, + bond_member))) { + /* Update slave stats. */ + non_atomic_ullong_add(&s_entry->n_packets, 1); + non_atomic_ullong_add(&s_entry->n_bytes, size); + } + } +} + static void dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, const struct nlattr *a, bool should_steal) @@ -7159,49 +7436,14 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_, switch ((enum ovs_action_attr)type) { case OVS_ACTION_ATTR_OUTPUT: - p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a)); - if (OVS_LIKELY(p)) { - struct dp_packet *packet; - struct dp_packet_batch out; - - if (!should_steal) { - dp_packet_batch_clone(&out, packets_); - dp_packet_batch_reset_cutlen(packets_); - packets_ = &out; - } - dp_packet_batch_apply_cutlen(packets_); - -#ifdef DPDK_NETDEV - if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts) - && packets_->packets[0]->source - != p->output_pkts.packets[0]->source)) { - /* XXX: netdev-dpdk assumes that all packets in a single - * output batch has the same source. Flush here to - * avoid memory access issues. */ - dp_netdev_pmd_flush_output_on_port(pmd, p); - } -#endif - if (dp_packet_batch_size(&p->output_pkts) - + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) { - /* Flush here to avoid overflow. */ - dp_netdev_pmd_flush_output_on_port(pmd, p); - } - - if (dp_packet_batch_is_empty(&p->output_pkts)) { - pmd->n_output_batches++; - } + dp_execute_output_action(pmd, packets_, should_steal, + nl_attr_get_odp_port(a)); + return; - DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) { - p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] = - pmd->ctx.last_rxq; - dp_packet_batch_add(&p->output_pkts, packet); - } - return; - } else { - COVERAGE_ADD(datapath_drop_invalid_port, - dp_packet_batch_size(packets_)); - } - break; + case OVS_ACTION_ATTR_LB_OUTPUT: + dp_execute_lb_output_action(pmd, packets_, should_steal, + nl_attr_get_u32(a)); + return; case OVS_ACTION_ATTR_TUNNEL_PUSH: if (should_steal) { @@ -7813,6 +8055,98 @@ dpif_netdev_ipf_dump_done(struct dpif *dpif OVS_UNUSED, void *ipf_dump_ctx) } +static int +dpif_netdev_bond_add(struct dpif *dpif, uint32_t bond_id, + odp_port_t *slave_map) +{ + struct tx_bond *new_tx = xzalloc(sizeof *new_tx); + struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_netdev_pmd_thread *pmd; + + /* Prepare new bond mapping. */ + new_tx->bond_id = bond_id; + for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) { + new_tx->slave_buckets[bucket].slave_id = slave_map[bucket]; + } + + ovs_mutex_lock(&dp->bond_mutex); + /* Check if bond already existed. */ + struct tx_bond *old_tx = tx_bond_lookup(&dp->tx_bonds, bond_id); + if (old_tx) { + cmap_replace(&dp->tx_bonds, &old_tx->node, &new_tx->node, + hash_bond_id(bond_id)); + ovsrcu_postpone(free, old_tx); + } else { + cmap_insert(&dp->tx_bonds, &new_tx->node, hash_bond_id(bond_id)); + } + ovs_mutex_unlock(&dp->bond_mutex); + + /* Update all PMDs with new bond mapping. */ + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + dp_netdev_add_bond_tx_to_pmd(pmd, new_tx, true); + } + return 0; +} + +static int +dpif_netdev_bond_del(struct dpif *dpif, uint32_t bond_id) +{ + struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_netdev_pmd_thread *pmd; + struct tx_bond *tx; + + ovs_mutex_lock(&dp->bond_mutex); + /* Check if bond existed. */ + tx = tx_bond_lookup(&dp->tx_bonds, bond_id); + if (tx) { + cmap_remove(&dp->tx_bonds, &tx->node, hash_bond_id(bond_id)); + ovsrcu_postpone(free, tx); + } else { + /* Bond is not present. */ + ovs_mutex_unlock(&dp->bond_mutex); + return ENOENT; + } + ovs_mutex_unlock(&dp->bond_mutex); + + /* Remove the bond map in all pmds. */ + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + dp_netdev_del_bond_tx_from_pmd(pmd, bond_id); + } + return 0; +} + +static int +dpif_netdev_bond_stats_get(struct dpif *dpif, uint32_t bond_id, + uint64_t *n_bytes) +{ + struct dp_netdev *dp = get_dp_netdev(dpif); + struct dp_netdev_pmd_thread *pmd; + + if (!tx_bond_lookup(&dp->tx_bonds, bond_id)) { + return ENOENT; + } + + /* Search the bond in all PMDs. */ + CMAP_FOR_EACH (pmd, node, &dp->poll_threads) { + struct tx_bond *pmd_bond_entry + = tx_bond_lookup(&pmd->tx_bonds, bond_id); + + if (!pmd_bond_entry) { + continue; + } + + /* Read bond stats. */ + for (int i = 0; i < BOND_BUCKETS; i++) { + uint64_t pmd_n_bytes; + + atomic_read_relaxed(&pmd_bond_entry->slave_buckets[i].n_bytes, + &pmd_n_bytes); + n_bytes[i] += pmd_n_bytes; + } + } + return 0; +} + const struct dpif_class dpif_netdev_class = { "netdev", true, /* cleanup_required */ @@ -7886,6 +8220,9 @@ const struct dpif_class dpif_netdev_class = { dpif_netdev_meter_set, dpif_netdev_meter_get, dpif_netdev_meter_del, + dpif_netdev_bond_add, + dpif_netdev_bond_del, + dpif_netdev_bond_stats_get, }; static void diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 1817e9f84..18322e879 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -4051,6 +4051,9 @@ const struct dpif_class dpif_netlink_class = { dpif_netlink_meter_set, dpif_netlink_meter_get, dpif_netlink_meter_del, + NULL, /* bond_add */ + NULL, /* bond_del */ + NULL, /* bond_stats_get */ }; static int diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index b77317bca..0e024c1c9 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -616,6 +616,18 @@ struct dpif_class { * zero. */ int (*meter_del)(struct dpif *, ofproto_meter_id meter_id, struct ofputil_meter_stats *, uint16_t n_bands); + + /* Adds a bond with 'bond_id' and the slave-map to 'dpif'. */ + int (*bond_add)(struct dpif *dpif, uint32_t bond_id, + odp_port_t *slave_map); + + /* Removes bond identified by 'bond_id' from 'dpif'. */ + int (*bond_del)(struct dpif *dpif, uint32_t bond_id); + + /* Reads bond stats from 'dpif'. 'n_bytes' should be an array with size + * sufficient to store BOND_BUCKETS number of elements. */ + int (*bond_stats_get)(struct dpif *dpif, uint32_t bond_id, + uint64_t *n_bytes); }; extern const struct dpif_class dpif_netlink_class; diff --git a/lib/dpif.c b/lib/dpif.c index 9d9c716c1..c529a93f1 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1170,6 +1170,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_, case OVS_ACTION_ATTR_CT: case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_LB_OUTPUT: case OVS_ACTION_ATTR_TUNNEL_PUSH: case OVS_ACTION_ATTR_TUNNEL_POP: case OVS_ACTION_ATTR_USERSPACE: @@ -1220,6 +1221,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_, struct dp_packet *clone = NULL; uint32_t cutlen = dp_packet_get_cutlen(packet); if (cutlen && (type == OVS_ACTION_ATTR_OUTPUT + || type == OVS_ACTION_ATTR_LB_OUTPUT || type == OVS_ACTION_ATTR_TUNNEL_PUSH || type == OVS_ACTION_ATTR_TUNNEL_POP || type == OVS_ACTION_ATTR_USERSPACE)) { @@ -1879,6 +1881,16 @@ dpif_supports_explicit_drop_action(const struct dpif *dpif) return dpif_is_netdev(dpif); } +bool +dpif_supports_lb_output_action(const struct dpif *dpif) +{ + /* + * Balance-tcp optimization is currently supported in netdev + * datapath only. + */ + return dpif_is_netdev(dpif); +} + /* Meters */ void dpif_meter_get_features(const struct dpif *dpif, @@ -1976,3 +1988,30 @@ dpif_meter_del(struct dpif *dpif, ofproto_meter_id meter_id, } return error; } + +int +dpif_bond_add(struct dpif *dpif, uint32_t bond_id, odp_port_t *slave_map) +{ + return dpif->dpif_class->bond_del + ? dpif->dpif_class->bond_add(dpif, bond_id, slave_map) + : EOPNOTSUPP; +} + +int +dpif_bond_del(struct dpif *dpif, uint32_t bond_id) +{ + return dpif->dpif_class->bond_del + ? dpif->dpif_class->bond_del(dpif, bond_id) + : EOPNOTSUPP; +} + +int +dpif_bond_stats_get(struct dpif *dpif, uint32_t bond_id, + uint64_t *n_bytes) +{ + memset(n_bytes, 0, BOND_BUCKETS * sizeof *n_bytes); + + return dpif->dpif_class->bond_stats_get + ? dpif->dpif_class->bond_stats_get(dpif, bond_id, n_bytes) + : EOPNOTSUPP; +} diff --git a/lib/dpif.h b/lib/dpif.h index 4df8f7c8b..2d52f0186 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -891,6 +891,18 @@ int dpif_meter_get(const struct dpif *, ofproto_meter_id meter_id, struct ofputil_meter_stats *, uint16_t n_bands); int dpif_meter_del(struct dpif *, ofproto_meter_id meter_id, struct ofputil_meter_stats *, uint16_t n_bands); + +/* Bonding. */ + +/* Bit-mask for hashing a flow down to a bucket. */ +#define BOND_MASK 0xff +#define BOND_BUCKETS (BOND_MASK + 1) + +int dpif_bond_add(struct dpif *, uint32_t bond_id, odp_port_t *slave_map); +int dpif_bond_del(struct dpif *, uint32_t bond_id); +int dpif_bond_stats_get(struct dpif *, uint32_t bond_id, uint64_t *n_bytes); +bool dpif_supports_lb_output_action(const struct dpif *); + /* Miscellaneous. */ diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 42d3335f0..6018e378a 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -793,6 +793,7 @@ requires_datapath_assistance(const struct nlattr *a) switch (type) { /* These only make sense in the context of a datapath. */ case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_LB_OUTPUT: case OVS_ACTION_ATTR_TUNNEL_PUSH: case OVS_ACTION_ATTR_TUNNEL_POP: case OVS_ACTION_ATTR_USERSPACE: @@ -1068,6 +1069,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal, return; } case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_LB_OUTPUT: case OVS_ACTION_ATTR_TUNNEL_PUSH: case OVS_ACTION_ATTR_TUNNEL_POP: case OVS_ACTION_ATTR_USERSPACE: diff --git a/lib/odp-util.c b/lib/odp-util.c index e907804aa..011db9ebb 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -119,6 +119,7 @@ odp_action_len(uint16_t type) switch ((enum ovs_action_attr) type) { case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t); + case OVS_ACTION_ATTR_LB_OUTPUT: return sizeof(uint32_t); case OVS_ACTION_ATTR_TRUNC: return sizeof(struct ovs_action_trunc); case OVS_ACTION_ATTR_TUNNEL_PUSH: return ATTR_LEN_VARIABLE; case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t); @@ -1132,6 +1133,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a, case OVS_ACTION_ATTR_OUTPUT: odp_portno_name_format(portno_names, nl_attr_get_odp_port(a), ds); break; + case OVS_ACTION_ATTR_LB_OUTPUT: + ds_put_format(ds, "lb_output(%"PRIu32")", nl_attr_get_u32(a)); + break; case OVS_ACTION_ATTR_TRUNC: { const struct ovs_action_trunc *trunc = nl_attr_get_unspec(a, sizeof *trunc); @@ -2305,6 +2309,16 @@ parse_odp_action__(struct parse_odp_context *context, const char *s, } } + { + uint32_t bond_id; + int n; + + if (ovs_scan(s, "lb_output(%"PRIu32")%n", &bond_id, &n)) { + nl_msg_put_u32(actions, OVS_ACTION_ATTR_LB_OUTPUT, bond_id); + return n; + } + } + { uint32_t max_len; int n; diff --git a/ofproto/bond.c b/ofproto/bond.c index 405202fb6..9947e7531 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -54,10 +54,6 @@ static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER; static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__); static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__; -/* Bit-mask for hashing a flow down to a bucket. */ -#define BOND_MASK 0xff -#define BOND_BUCKETS (BOND_MASK + 1) - /* Priority for internal rules created to handle recirculation */ #define RECIRC_RULE_PRIORITY 20 @@ -126,6 +122,8 @@ struct bond { enum lacp_status lacp_status; /* Status of LACP negotiations. */ bool bond_revalidate; /* True if flows need revalidation. */ uint32_t basis; /* Basis for flow hash function. */ + bool use_lb_output_action; /* Use lb_output action to avoid recirculation. + Applicable only for Balance TCP mode. */ /* SLB specific bonding info. */ struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */ @@ -185,8 +183,10 @@ static struct bond_slave *choose_output_slave(const struct bond *, struct flow_wildcards *, uint16_t vlan) OVS_REQ_RDLOCK(rwlock); -static void update_recirc_rules__(struct bond *bond); +static void update_recirc_rules__(struct bond *); static bool bond_is_falling_back_to_ab(const struct bond *); +static void bond_add_lb_output_buckets(const struct bond *); +static void bond_del_lb_output_buckets(const struct bond *); /* Attempts to parse 's' as the name of a bond balancing mode. If successful, * stores the mode in '*balance' and returns true. Otherwise returns false @@ -282,6 +282,10 @@ bond_unref(struct bond *bond) /* Free bond resources. Remove existing post recirc rules. */ if (bond->recirc_id) { + if (bond_use_lb_output_action(bond)) { + /* Delete bond buckets from datapath if installed. */ + bond_del_lb_output_buckets(bond); + } recirc_free_id(bond->recirc_id); bond->recirc_id = 0; } @@ -336,27 +340,35 @@ update_recirc_rules__(struct bond *bond) struct ofpbuf ofpacts; int i; - ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); - HMAP_FOR_EACH(pr_op, hmap_node, &bond->pr_rule_ops) { pr_op->op = DEL; } if (bond->hash && bond->recirc_id) { - for (i = 0; i < BOND_BUCKETS; i++) { - struct bond_slave *slave = bond->hash[i].slave; + if (bond_use_lb_output_action(bond)) { + bond_add_lb_output_buckets(bond); + /* No need to install post recirculation rules as we are using + * lb_output action with bond buckets. + */ + return; + } else { + for (i = 0; i < BOND_BUCKETS; i++) { + struct bond_slave *slave = bond->hash[i].slave; - if (slave) { - match_init_catchall(&match); - match_set_recirc_id(&match, bond->recirc_id); - match_set_dp_hash_masked(&match, i, BOND_MASK); + if (slave) { + match_init_catchall(&match); + match_set_recirc_id(&match, bond->recirc_id); + match_set_dp_hash_masked(&match, i, BOND_MASK); - add_pr_rule(bond, &match, slave->ofp_port, - &bond->hash[i].pr_rule); + add_pr_rule(bond, &match, slave->ofp_port, + &bond->hash[i].pr_rule); + } } } } + ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub); + HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) { int error; switch (pr_op->op) { @@ -464,9 +476,23 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) bond->recirc_id = recirc_alloc_id(bond->ofproto); } } else if (bond->recirc_id) { + if (bond_use_lb_output_action(bond)) { + /* Delete bond buckets from datapath if installed. */ + bond_del_lb_output_buckets(bond); + } recirc_free_id(bond->recirc_id); bond->recirc_id = 0; } + if (bond->use_lb_output_action != s->use_lb_output_action) { + if (s->use_lb_output_action && + !ovs_lb_output_action_supported(bond->ofproto)) { + VLOG_WARN("%s: Datapath does not support 'lb_output' action, " + "disabled.", bond->name); + } else { + bond->use_lb_output_action = s->use_lb_output_action; + revalidate = true; + } + } if (bond->balance == BM_AB || !bond->hash || revalidate) { bond_entry_reset(bond); @@ -944,19 +970,31 @@ bond_recirculation_account(struct bond *bond) OVS_REQ_WRLOCK(rwlock) { int i; + uint64_t n_bytes[BOND_BUCKETS]; + bool use_lb_output_action = bond_use_lb_output_action(bond); + + if (use_lb_output_action) { + /* Retrieve bond stats from datapath. */ + dpif_bond_stats_get(bond->ofproto->backer->dpif, + bond->recirc_id, n_bytes); + } for (i=0; i<=BOND_MASK; i++) { struct bond_entry *entry = &bond->hash[i]; struct rule *rule = entry->pr_rule; + struct pkt_stats stats; - if (rule) { - struct pkt_stats stats; + if (use_lb_output_action) { + stats.n_bytes = n_bytes[i]; + } else if (rule) { long long int used OVS_UNUSED; rule->ofproto->ofproto_class->rule_get_stats( rule, &stats, &used); - bond_entry_account(entry, stats.n_bytes); + } else { + continue; } + bond_entry_account(entry, stats.n_bytes); } } @@ -1351,6 +1389,7 @@ bond_print_details(struct ds *ds, const struct bond *bond) struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); const struct shash_node **sorted_slaves = NULL; const struct bond_slave *slave; + bool use_lb_output_action; bool may_recirc; uint32_t recirc_id; int i; @@ -1366,6 +1405,11 @@ bond_print_details(struct ds *ds, const struct bond *bond) ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis); + use_lb_output_action = bond_use_lb_output_action(bond); + ds_put_format(ds, "lb_output action: %s, bond-id: %d\n", + use_lb_output_action ? "enabled" : "disabled", + use_lb_output_action ? recirc_id : -1); + ds_put_format(ds, "updelay: %d ms\n", bond->updelay); ds_put_format(ds, "downdelay: %d ms\n", bond->downdelay); @@ -1942,3 +1986,34 @@ bond_get_changed_active_slave(const char *name, struct eth_addr *mac, return false; } + +bool +bond_use_lb_output_action(const struct bond *bond) +{ + return bond_may_recirc(bond) && bond->use_lb_output_action; +} + +static void +bond_add_lb_output_buckets(const struct bond *bond) +{ + ofp_port_t slave_map[BOND_BUCKETS]; + + for (int i = 0; i < BOND_BUCKETS; i++) { + struct bond_slave *slave = bond->hash[i].slave; + + if (slave) { + slave_map[i] = slave->ofp_port; + } else { + slave_map[i] = OFPP_NONE; + } + } + ofproto_dpif_add_lb_output_buckets(bond->ofproto, bond->recirc_id, + slave_map); +} + +static void +bond_del_lb_output_buckets(const struct bond *bond) +{ + ofproto_dpif_delete_lb_output_buckets(bond->ofproto, + bond->recirc_id); +} diff --git a/ofproto/bond.h b/ofproto/bond.h index e7c3d9bc3..40c3258dc 100644 --- a/ofproto/bond.h +++ b/ofproto/bond.h @@ -58,6 +58,8 @@ struct bond_settings { /* The MAC address of the interface that was active during the last ovs run. */ + bool use_lb_output_action; /* Use lb_output action. Only applicable for + bond mode BALANCE TCP. */ }; /* Program startup. */ @@ -122,4 +124,7 @@ void bond_rebalance(struct bond *); */ void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id, uint32_t *hash_basis); + +bool bond_use_lb_output_action(const struct bond *bond); + #endif /* bond.h */ diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c index b413768ef..796eb6f88 100644 --- a/ofproto/ofproto-dpif-ipfix.c +++ b/ofproto/ofproto-dpif-ipfix.c @@ -2979,6 +2979,7 @@ dpif_ipfix_read_actions(const struct flow *flow, enum ovs_action_attr type = nl_attr_type(a); switch (type) { case OVS_ACTION_ATTR_OUTPUT: + case OVS_ACTION_ATTR_LB_OUTPUT: ipfix_actions->output_action = true; break; case OVS_ACTION_ATTR_SAMPLE: diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index f9ea47a2f..f616fb2bb 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -1175,8 +1175,9 @@ dpif_sflow_read_actions(const struct flow *flow, case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_CT: - case OVS_ACTION_ATTR_CT_CLEAR: + case OVS_ACTION_ATTR_CT_CLEAR: case OVS_ACTION_ATTR_METER: + case OVS_ACTION_ATTR_LB_OUTPUT: break; case OVS_ACTION_ATTR_SET_MASKED: diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index e64c6d477..e0ede2cab 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -4207,7 +4207,17 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* Commit accumulated flow updates before output. */ xlate_commit_actions(ctx); - if (xr) { + if (xr && bond_use_lb_output_action(xport->xbundle->bond)) { + /* + * If bond mode is balance-tcp and optimize balance tcp is enabled + * then use the hash directly for slave selection and avoid + * recirculation. + * + * Currently support for netdev datapath only. + */ + nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_LB_OUTPUT, + xr->recirc_id); + } else if (xr) { /* Recirculate the packet. */ struct ovs_action_hash *act_hash; @@ -7310,7 +7320,8 @@ count_output_actions(const struct ofpbuf *odp_actions) int n = 0; NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) { - if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) { + if ((a->nla_type == OVS_ACTION_ATTR_OUTPUT) || + (a->nla_type == OVS_ACTION_ATTR_LB_OUTPUT)) { n++; } } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 7e10375f2..4f0638f23 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -868,6 +868,12 @@ ovs_explicit_drop_action_supported(struct ofproto_dpif *ofproto) return ofproto->backer->rt_support.explicit_drop_action; } +bool +ovs_lb_output_action_supported(struct ofproto_dpif *ofproto) +{ + return ofproto->backer->rt_support.lb_output_action; +} + /* Tests whether 'backer''s datapath supports recirculation. Only newer * datapaths support OVS_KEY_ATTR_RECIRC_ID in keys. We need to disable some * features on older datapaths that don't support this feature. @@ -1582,6 +1588,8 @@ check_support(struct dpif_backer *backer) backer->rt_support.ct_timeout = check_ct_timeout_policy(backer); backer->rt_support.explicit_drop_action = dpif_supports_explicit_drop_action(backer->dpif); + backer->rt_support.lb_output_action= + dpif_supports_lb_output_action(backer->dpif); /* Flow fields. */ backer->rt_support.odp.ct_state = check_ct_state(backer); @@ -3441,6 +3449,27 @@ bundle_remove(struct ofport *port_) } } +int +ofproto_dpif_add_lb_output_buckets(struct ofproto_dpif *ofproto, + uint32_t bond_id, + const ofp_port_t *slave_map) +{ + odp_port_t odp_map[BOND_BUCKETS]; + + for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) { + /* Convert ofp_port to odp_port. */ + odp_map[bucket] = ofp_port_to_odp_port(ofproto, slave_map[bucket]); + } + return dpif_bond_add(ofproto->backer->dpif, bond_id, odp_map); +} + +int +ofproto_dpif_delete_lb_output_buckets(struct ofproto_dpif *ofproto, + uint32_t bond_id) +{ + return dpif_bond_del(ofproto->backer->dpif, bond_id); +} + static void send_pdu_cb(void *port_, const void *pdu, size_t pdu_size) { @@ -5572,6 +5601,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap) smap_add(cap, "ct_timeout", s.ct_timeout ? "true" : "false"); smap_add(cap, "explicit_drop_action", s.explicit_drop_action ? "true" :"false"); + smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false"); } /* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index aee61d61d..4e5ae0c9e 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -202,7 +202,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *, DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy") \ \ /* True if the datapath supports explicit drop action. */ \ - DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") + DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \ + \ + /* True if the datapath supports balance_tcp optimization */ \ + DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode") /* Stores the various features which the corresponding backer supports. */ @@ -382,6 +385,11 @@ int ofproto_dpif_add_internal_flow(struct ofproto_dpif *, struct rule **rulep); int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *, int priority); +int ofproto_dpif_add_lb_output_buckets(struct ofproto_dpif *, uint32_t bond_id, + const ofp_port_t *slave_map); +int ofproto_dpif_delete_lb_output_buckets(struct ofproto_dpif *, + uint32_t bond_id); +bool ovs_lb_output_action_supported(struct ofproto_dpif *); bool ovs_native_tunneling_is_on(struct ofproto_dpif *); diff --git a/tests/lacp.at b/tests/lacp.at index 7b460d7be..df1691731 100644 --- a/tests/lacp.at +++ b/tests/lacp.at @@ -121,6 +121,7 @@ AT_CHECK([ovs-appctl bond/show], [0], [dnl bond_mode: active-backup bond may use recirculation: no, Recirc-ID : -1 bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -286,6 +287,7 @@ slave: p3: current attached bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -301,6 +303,7 @@ slave p1: enabled bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -423,6 +426,7 @@ slave: p3: current attached bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -440,6 +444,7 @@ slave p1: enabled bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -555,6 +560,7 @@ slave: p3: current attached bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -572,6 +578,7 @@ slave p1: enabled bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -692,6 +699,7 @@ slave: p3: current attached bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated @@ -709,6 +717,7 @@ slave p1: enabled bond_mode: balance-tcp bond may use recirculation: yes, bond-hash-basis: 0 +lb_output action: disabled, bond-id: -1 updelay: 0 ms downdelay: 0 ms lacp_status: negotiated diff --git a/tests/odp.at b/tests/odp.at index 3ab9ad62d..1ebdf0515 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -383,6 +383,7 @@ check_pkt_len(size=200,gt(4),le(5)) check_pkt_len(size=200,gt(drop),le(5)) check_pkt_len(size=200,gt(ct(nat)),le(drop)) check_pkt_len(size=200,gt(set(eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15))),le(set(eth(src=00:01:02:03:04:06,dst=10:11:12:13:14:16)))) +lb_output(1) ]) AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0], [`cat actions.txt` diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index fe73c38d4..f312efd8e 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -4600,6 +4600,11 @@ port_configure_bond(struct port *port, struct bond_settings *s) /* OVSDB did not store the last active interface */ s->active_slave_mac = eth_addr_zero; } + + /* lb_output action is disabled by default. */ + s->use_lb_output_action = (s->balance == BM_TCP) + && smap_get_bool(&port->cfg->other_config, + "lb-output-action", false); } /* Returns true if 'port' is synthetic, that is, if we constructed it locally diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 6d334370d..b6acb34ca 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1994,6 +1994,15 @@ active-backup. + + Enable/disable usage of optimized lb_output action for + balancing flows among output slaves in load balanced bonds in + balance-tcp. When enabled, it uses optimized path for + balance-tcp mode by using rss hash and avoids recirculation. + This knob does not affect other balancing modes. + +

    An important part of link bonding is detecting that links are down so @@ -5812,6 +5821,19 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ higher performance for MPLS and active-active load balancing bonding modes. + + If this is true, then the datapath supports optimized balance-tcp + bond mode. This capability replaces existing hash and + recirc actions with new action lb_output + and avoids recirculation of packet in datapath. It is supported + only for balance-tcp bond mode in netdev datapath. The new action + gives higer performance by using bond buckets instead of post + recirculation flows for selection of slave port from bond. By default + this new action is disabled, however it can be enabled by setting + in + table. +

    These capabilities are granular because Open vSwitch and its -- GitLab From 3950e350d240232b5ad8b7b5a701549ad0e84378 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 25 May 2020 20:05:15 +0200 Subject: [PATCH 171/432] ofproto-dpif.at: Add unit test for lb_output action. Extend the balance-tcp one so it tests lb-output action too. The test checks that that the option is shown in bond/show, and that the lb_output action is programmed in the datapath. Signed-off-by: Matteo Croce Signed-off-by: Ilya Maximets --- tests/ofproto-dpif.at | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index a03a63ac0..e3402e7b8 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -152,6 +152,8 @@ ovs-appctl time/stop ovs-appctl time/warp 100 ovs-appctl lacp/show > lacp.txt ovs-appctl bond/show > bond.txt +# Check that lb_output is not enabled by default. +AT_CHECK([grep -q '^lb_output action: disabled' bond.txt]) ( for i in `seq 0 255` ; do @@ -164,9 +166,32 @@ AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt]) AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt]) # Make sure there is resonable distribution to all three ports. # We don't want to make this check precise, in case hash function changes. -AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 24]) -AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 24]) -AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 24]) +AT_CHECK([test $(grep -c in_port.4 br1_flows.txt) -gt 24]) +AT_CHECK([test $(grep -c in_port.5 br1_flows.txt) -gt 24]) +AT_CHECK([test $(grep -c in_port.6 br1_flows.txt) -gt 24]) +# Check that bonding is doing dp_hash. +AT_CHECK([grep -q dp_hash br0_flows.txt]) +# Enabling lb_output. +AT_CHECK([ovs-vsctl set Port bond0 other_config:lb-output-action=true]) +OVS_WAIT_UNTIL([ovs-appctl bond/show | grep -q '^lb_output action: enabled']) +ovs-appctl time/warp 10000 500 +ovs-appctl revalidator/wait +OVS_WAIT_WHILE([ovs-appctl dpif/dump-flows br1 | grep -q tcp]) +( +for i in $(seq 256) ; + do + pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)" + AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt]) + done +) +ovs-appctl time/warp 300 100 +AT_CHECK([ovs-appctl dpif/dump-flows br0 | grep tcp > br0_flows.txt]) +AT_CHECK([ovs-appctl dpif/dump-flows br1 | grep tcp > br1_flows.txt]) +# Make sure there is resonable distribution to all three ports, again. +AT_CHECK([test $(grep -c in_port.4 br1_flows.txt) -gt 24]) +AT_CHECK([test $(grep -c in_port.5 br1_flows.txt) -gt 24]) +AT_CHECK([test $(grep -c in_port.6 br1_flows.txt) -gt 24]) +AT_CHECK([grep -q lb_output br0_flows.txt]) OVS_VSWITCHD_STOP() AT_CLEANUP -- GitLab From 029273855939cce35cba7c2ab1831bd92b0502cb Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Fri, 29 May 2020 02:33:05 -0400 Subject: [PATCH 172/432] netdev-offload-dpdk: Support offload of VLAN PUSH/POP actions. Parse VLAN PUSH/POP OVS datapath actions and add respective RTE actions. Signed-off-by: Sriharsha Basavapatna Acked-by: Eli Britstein Signed-off-by: Ilya Maximets --- Documentation/howto/dpdk.rst | 1 + NEWS | 1 + lib/netdev-offload-dpdk.c | 67 ++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index be950d7ce..c40fcafcb 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -395,6 +395,7 @@ Supported actions for hardware offload are: - Modification of Ethernet (mod_dl_src/mod_dl_dst). - Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl). - Modification of TCP/UDP (mod_tp_src/mod_tp_dst). +- VLAN Push/Pop (push_vlan/pop_vlan). Further Reading --------------- diff --git a/NEWS b/NEWS index 309c00f6b..0116b3ea0 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,7 @@ Post-v2.13.0 - DPDK: * Deprecated DPDK pdump packet capture support removed. * Deprecated DPDK ring ports (dpdkr) are no longer supported. + * Add hardware offload support for VLAN Push/Pop actions (experimental). - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index f8c46bbaa..26a75f0f2 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -420,6 +420,41 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions) } else { ds_put_format(s, " Set-%s-tcp/udp-port = null\n", dirstr); } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) { + const struct rte_flow_action_of_push_vlan *rte_push_vlan; + + rte_push_vlan = actions->conf; + ds_put_cstr(s, "rte flow push-vlan action:\n"); + if (rte_push_vlan) { + ds_put_format(s, " Push-vlan: 0x%"PRIx16"\n", + ntohs(rte_push_vlan->ethertype)); + } else { + ds_put_format(s, " Push-vlan = null\n"); + } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) { + const struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp; + + rte_vlan_pcp = actions->conf; + ds_put_cstr(s, "rte flow set-vlan-pcp action:\n"); + if (rte_vlan_pcp) { + ds_put_format(s, " Set-vlan-pcp: %"PRIu8"\n", + rte_vlan_pcp->vlan_pcp); + } else { + ds_put_format(s, " Set-vlan-pcp = null\n"); + } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) { + const struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid; + + rte_vlan_vid = actions->conf; + ds_put_cstr(s, "rte flow set-vlan-vid action:\n"); + if (rte_vlan_vid) { + ds_put_format(s, " Set-vlan-vid: %"PRIu16"\n", + ntohs(rte_vlan_vid->vlan_vid)); + } else { + ds_put_format(s, " Set-vlan-vid = null\n"); + } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) { + ds_put_cstr(s, "rte flow pop-vlan action\n"); } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } @@ -970,6 +1005,30 @@ parse_set_actions(struct flow_actions *actions, return 0; } +static int +parse_vlan_push_action(struct flow_actions *actions, + const struct ovs_action_push_vlan *vlan_push) +{ + struct rte_flow_action_of_push_vlan *rte_push_vlan; + struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp; + struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid; + + rte_push_vlan = xzalloc(sizeof *rte_push_vlan); + rte_push_vlan->ethertype = vlan_push->vlan_tpid; + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, rte_push_vlan); + + rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp); + rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci); + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP, + rte_vlan_pcp); + + rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid); + rte_vlan_vid->vlan_vid = htons(vlan_tci_to_vid(vlan_push->vlan_tci)); + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID, + rte_vlan_vid); + return 0; +} + static int parse_flow_actions(struct netdev *netdev, struct flow_actions *actions, @@ -998,6 +1057,14 @@ parse_flow_actions(struct netdev *netdev, masked)) { return -1; } + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) { + const struct ovs_action_push_vlan *vlan = nl_attr_get(nla); + + if (parse_vlan_push_action(actions, vlan)) { + return -1; + } + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) { + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL); } else { VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla)); return -1; -- GitLab From dda80837a852f27f54ffd1ab4de3cf2d002631e5 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 22 Jun 2020 14:07:33 +0200 Subject: [PATCH 173/432] AUTHORS: Add Sriharsha Basavapatna. Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 7a3b12610..fbba823fd 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -357,6 +357,7 @@ Shu Shen shu.shen@radisys.com Simon Horman horms@verge.net.au Simon Horman simon.horman@netronome.com Sorin Vinturis svinturis@cloudbasesolutions.com +Sriharsha Basavapatna sriharsha.basavapatna@broadcom.com Steffen Gebert steffen.gebert@informatik.uni-wuerzburg.de Sten Spans sten@blinkenlights.nl Stephane A. Sezer sas@cd80.net -- GitLab From 51119374ce921fca9cfc62a05cca614f72dc4608 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 22 Jun 2020 08:54:13 -0700 Subject: [PATCH 174/432] rhel: Fix syntax error when matching version. Remove the extra 'fi' in the script. VMware-BZ: #2582834 Fixed: fecb28051b35 ("rhel: Support RHEL 7.8 kernel module rpm build.") Reported-by: Abhijeet Malawade Acked-by: Greg Rose Signed-off-by: William Tu --- rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh index 93d487101..c70e135cd 100644 --- a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh +++ b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh @@ -113,7 +113,6 @@ if [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ]; then comp_ver=10 ver_offset=4 installed_ver="$minor_rev" - fi elif [ "$major_rev" = "1127" ]; then # echo "rhel78" comp_ver=10 -- GitLab From 98670b77ffe8b36c9d5089022ee36680aeecc542 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Fri, 26 Jun 2020 11:21:06 -0700 Subject: [PATCH 175/432] bridge: Fix null dereference on ct_timeout_policy record Accoridng to vswitch.ovsschema, each CT_Zone record may have zero or one associcated CT_Timeout_policy. Thus, this patch checks if ovsrec_ct_timeout_policy exist before accesses the record. VMWare-BZ: 2585825 Fixes: 45339539f69d ("ovs-vsctl: Add conntrack zone commands.") Fixes: 993cae678bca ("ofproto-dpif: Consume CT_Zone, and CT_Timeout_Policy tables") Reported-by: Yang Song Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- tests/ovs-vsctl.at | 8 ++++++++ utilities/ovs-vsctl.c | 10 +++++++--- vswitchd/bridge.c | 6 ++++-- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at index 55c7a6e17..c8babe361 100644 --- a/tests/ovs-vsctl.at +++ b/tests/ovs-vsctl.at @@ -966,6 +966,14 @@ AT_CHECK([RUN_OVS_VSCTL([--if-exists del-zone-tp netdev zone=1])]) AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:2, Timeout Policies: icmp_first=2 icmp_reply=3 ]) +AT_CHECK( + [RUN_OVS_VSCTL_TOGETHER([--id=@n create CT_Zone external_ids:"test"="123"], + [--id=@m create Datapath datapath_version=0 ct_zones:"10"=@n], + [set Open_vSwitch . datapaths:"netdev"=@m])], + [0], [stdout]) +AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:10, Timeout Policies: system default +]) + AT_CHECK([RUN_OVS_VSCTL([-- --id=@m create Datapath datapath_version=0 'capabilities={recirc=true}' -- set Open_vSwitch . datapaths:"system"=@m])], [0], [stdout]) AT_CHECK([RUN_OVS_VSCTL([list-dp-cap system])], [0], [recirc=true ]) diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c index bd3972636..37cc72d40 100644 --- a/utilities/ovs-vsctl.c +++ b/utilities/ovs-vsctl.c @@ -1344,9 +1344,13 @@ cmd_list_zone_tp(struct ctl_context *ctx) struct ovsrec_ct_timeout_policy *tp = zone->timeout_policy; - for (int j = 0; j < tp->n_timeouts; j++) { - ds_put_format(&ctx->output, "%s=%"PRIu64" ", - tp->key_timeouts[j], tp->value_timeouts[j]); + if (tp) { + for (int j = 0; j < tp->n_timeouts; j++) { + ds_put_format(&ctx->output, "%s=%"PRIu64" ", + tp->key_timeouts[j], tp->value_timeouts[j]); + } + } else { + ds_put_cstr(&ctx->output, "system default"); } ds_chomp(&ctx->output, ' '); ds_put_char(&ctx->output, '\n'); diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index f312efd8e..0bb4fa652 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -635,8 +635,10 @@ static void get_timeout_policy_from_ovsrec(struct simap *tp, const struct ovsrec_ct_timeout_policy *tp_cfg) { - for (size_t i = 0; i < tp_cfg->n_timeouts; i++) { - simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); + if (tp_cfg) { + for (size_t i = 0; i < tp_cfg->n_timeouts; i++) { + simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]); + } } } -- GitLab From 7d68168468b1f862953d38010cbf3d1079eaf621 Mon Sep 17 00:00:00 2001 From: Rui Cao Date: Tue, 23 Jun 2020 06:46:22 +0000 Subject: [PATCH 176/432] datapath-windows, conntrack: Fix conntrack new state On windows, if we send a connection setup packet in one direction twice, it will make the connection to be in established state. The same issue happened in Linux userspace conntrack module and has been fixed. This patch port the following previous fixes to windows datapath to fix the issue: - a867c010ee9183885ee9d3eb76a0005c075c4d2e - ac23d20fc90da3b1c9b2117d1e22102e99fba006 Acked-by: Yi-Hung Wei Signed-off-by: Rui Cao Signed-off-by: William Tu --- datapath-windows/ovsext/Conntrack-other.c | 4 +++- datapath-windows/ovsext/Conntrack-tcp.c | 14 ++++++++++---- datapath-windows/ovsext/Conntrack.c | 3 +++ datapath-windows/ovsext/Conntrack.h | 1 + 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-other.c b/datapath-windows/ovsext/Conntrack-other.c index 962cc8ac6..8580415a6 100644 --- a/datapath-windows/ovsext/Conntrack-other.c +++ b/datapath-windows/ovsext/Conntrack-other.c @@ -49,17 +49,19 @@ OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_, { ASSERT(conn_); struct conn_other *conn = OvsCastConntrackEntryToOtherEntry(conn_); + enum CT_UPDATE_RES ret = CT_UPDATE_VALID; if (reply && conn->state != OTHERS_BIDIR) { conn->state = OTHERS_BIDIR; } else if (conn->state == OTHERS_FIRST) { conn->state = OTHERS_MULTIPLE; + ret = CT_UPDATE_VALID_NEW; } OvsConntrackUpdateExpiration(&conn->up, now, other_timeouts[conn->state]); - return CT_UPDATE_VALID; + return ret; } OVS_CT_ENTRY * diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c index eda42ac82..a468c3e6b 100644 --- a/datapath-windows/ovsext/Conntrack-tcp.c +++ b/datapath-windows/ovsext/Conntrack-tcp.c @@ -213,11 +213,17 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_, return CT_UPDATE_INVALID; } - if (((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN) - && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 + if ((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN) { + if (dst->state >= CT_DPIF_TCPS_FIN_WAIT_2 && src->state >= CT_DPIF_TCPS_FIN_WAIT_2) { - src->state = dst->state = CT_DPIF_TCPS_CLOSED; - return CT_UPDATE_NEW; + src->state = dst->state = CT_DPIF_TCPS_CLOSED; + return CT_UPDATE_NEW; + } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) { + src->state = CT_DPIF_TCPS_SYN_SENT; + OvsConntrackUpdateExpiration(&conn->up, now, + 30 * CT_INTERVAL_SEC); + return CT_UPDATE_VALID_NEW; + } } if (src->wscale & CT_WSCALE_FLAG diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index ba5611697..55917c43f 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -753,6 +753,9 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx, return NULL; } break; + case CT_UPDATE_VALID_NEW: + state |= OVS_CS_F_NEW; + break; } } if (entry) { diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index bc6580d70..b0932186a 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -56,6 +56,7 @@ typedef enum CT_UPDATE_RES { CT_UPDATE_INVALID, CT_UPDATE_VALID, CT_UPDATE_NEW, + CT_UPDATE_VALID_NEW, } CT_UPDATE_RES; /* Metadata mark for masked write to conntrack mark */ -- GitLab From f740828d82234ccd026fcbc9a49d230d72667330 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 26 Jun 2020 12:46:10 -0700 Subject: [PATCH 177/432] jsonrpc: Don't assert for 0 remotes in jsonrpc_session_open_multiple(). It's pretty easy to get 0 remotes here from ovn-northd if you specify --ovnnb-db='' or --ovnnb-db=' ' on the command line. The internals of jsonrpc_session aren't equipped to cope with that, so just add a dummy remote instead. Acked-by: Numan Siddique Signed-off-by: Ben Pfaff --- lib/jsonrpc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c index ed748dbde..ecbc939fe 100644 --- a/lib/jsonrpc.c +++ b/lib/jsonrpc.c @@ -825,8 +825,10 @@ jsonrpc_session_open_multiple(const struct svec *remotes, bool retry) s = xmalloc(sizeof *s); /* Set 'n' remotes from 'names'. */ - ovs_assert(remotes->n > 0); svec_clone(&s->remotes, remotes); + if (!s->remotes.n) { + svec_add(&s->remotes, "invalid:"); + } s->next_remote = 0; s->reconnect = reconnect_create(time_msec()); -- GitLab From 28b3e3ba0db5f10f875679d2a1535e13e4994e0e Mon Sep 17 00:00:00 2001 From: Jinjun Gao Date: Tue, 30 Jun 2020 19:47:57 +0800 Subject: [PATCH 178/432] datapath-windows: Add CTA_HELP and CTA_TUPLE_MASTER Add helper and master if existing to a conntrack entry: 1, For CTA_HELP, only support FTP/TFTP; 2, For CTA_TUPLE_MASTER, only support FTP. Signed-off-by: Jinjun Gao Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Conntrack-related.c | 5 ++- datapath-windows/ovsext/Conntrack.c | 50 ++++++++++++++++++--- datapath-windows/ovsext/Conntrack.h | 1 + 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c index 950be98e9..a5bba5cf8 100644 --- a/datapath-windows/ovsext/Conntrack-related.c +++ b/datapath-windows/ovsext/Conntrack-related.c @@ -47,8 +47,11 @@ OvsCtRelatedKeyAreSame(OVS_CT_KEY incomingKey, OVS_CT_KEY entryKey) } /* FTP ACTIVE - Server initiates the connection */ + /* Some ftp server, such as pyftpdlib, may use random (>1024) data port + * except 20. In this case, the incomingKey's src port is different with + * entryKey's src port. + */ if ((incomingKey.src.addr.ipv4 == entryKey.src.addr.ipv4) && - (incomingKey.src.port == entryKey.src.port) && (incomingKey.dst.addr.ipv4 == entryKey.dst.addr.ipv4) && (incomingKey.dst.port == entryKey.dst.port) && (incomingKey.dl_type == entryKey.dl_type) && diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index 55917c43f..d0655911b 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -246,7 +246,6 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type) { OVS_CT_EVENT_ENTRY ctEventEntry = {0}; NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY)); - ctEventEntry.entry.parent = NULL; ctEventEntry.type = type; OvsPostCtEvent(&ctEventEntry); } @@ -480,6 +479,9 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete) RemoveEntryList(&entry->link); OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); NdisFreeSpinLock(&(entry->lock)); + if (entry->helper_name) { + OvsFreeMemoryWithTag(entry->helper_name, OVS_CT_POOL_TAG); + } OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG); NdisInterlockedDecrement((PLONG)&ctTotalEntries); return; @@ -883,6 +885,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, BOOLEAN triggerUpdateEvent = FALSE; BOOLEAN entryCreated = FALSE; POVS_CT_ENTRY entry = NULL; + POVS_CT_ENTRY parent = NULL; PNET_BUFFER_LIST curNbl = fwdCtx->curNbl; OvsConntrackKeyLookupCtx ctx = { 0 }; LOCK_STATE_EX lockStateTable; @@ -959,8 +962,6 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, if (OvsDetectFtpPacket(key)) { /* FTP parser will always be loaded */ - UNREFERENCED_PARAMETER(helper); - status = OvsCtHandleFtp(curNbl, key, layers, currentTime, entry, (ntohs(key->ipKey.l4.tpDst) == IPPORT_FTP)); if (status != NDIS_STATUS_SUCCESS) { @@ -968,10 +969,25 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx, } } + parent = entry->parent; + /* The entry should have the same helper name with parent's */ + if (!entry->helper_name && + (helper || (parent && parent->helper_name))) { + + helper = helper ? helper : parent->helper_name; + entry->helper_name = OvsAllocateMemoryWithTag(strlen(helper) + 1, + OVS_CT_POOL_TAG); + if (!entry->helper_name) { + OVS_LOG_ERROR("Error while allocating memory"); + OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql); + return NDIS_STATUS_RESOURCES; + } + memcpy(entry->helper_name, helper, strlen(helper) + 1); + } + /* Add original tuple information to flow Key */ if (entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) { - if (entry->parent != NULL) { - POVS_CT_ENTRY parent = entry->parent; + if (parent != NULL) { OVS_ACQUIRE_SPIN_LOCK(&(parent->lock), irql); OvsCtUpdateTuple(key, &parent->key); OVS_RELEASE_SPIN_LOCK(&(parent->lock), irql); @@ -1042,8 +1058,8 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx, if (helper == NULL) { return NDIS_STATUS_INVALID_PARAMETER; } - if (strcmp("ftp", helper) != 0) { - /* Only support FTP */ + if (strcmp("ftp", helper) != 0 && strcmp("tftp", helper) != 0) { + /* Only support FTP/TFTP */ return NDIS_STATUS_NOT_SUPPORTED; } break; @@ -1683,6 +1699,26 @@ OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry, } } + if (entry->helper_name) { + UINT32 offset; + offset = NlMsgStartNested(&nlBuf, CTA_HELP); + if (!offset) { + return NDIS_STATUS_FAILURE; + } + if (!NlMsgPutTailString(&nlBuf, CTA_HELP_NAME, entry->helper_name)) { + return STATUS_INVALID_BUFFER_SIZE; + } + NlMsgEndNested(&nlBuf, offset); + } + + if (entry->parent) { + status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_MASTER, + &((POVS_CT_ENTRY)entry->parent)->key); + if (status != NDIS_STATUS_SUCCESS) { + return STATUS_UNSUCCESSFUL; + } + } + /* CTA_STATUS is required but not implemented. Default to 0 */ if (!NlMsgPutTailU32(&nlBuf, CTA_STATUS, 0)) { return STATUS_INVALID_BUFFER_SIZE; diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h index b0932186a..bbbf49c11 100644 --- a/datapath-windows/ovsext/Conntrack.h +++ b/datapath-windows/ovsext/Conntrack.h @@ -109,6 +109,7 @@ typedef struct OVS_CT_ENTRY { struct ovs_key_ct_labels labels; NAT_ACTION_INFO natInfo; PVOID parent; /* Points to main connection */ + PCHAR helper_name; } OVS_CT_ENTRY, *POVS_CT_ENTRY; typedef struct OVS_CT_REL_ENTRY { -- GitLab From 0fc38297ba3830631e4a6ad3f9fbbe364e8a3b6b Mon Sep 17 00:00:00 2001 From: wenxu Date: Mon, 29 Jun 2020 17:31:18 +0800 Subject: [PATCH 179/432] lib/tc: only update the stats for non-empty counter A packet with first frag and execute act_ct action. The packet will stole by defrag. So the stats counter for "gact action goto chain" will always 0. The openvswitch update each action in order. So the flower stats finally alway be zero. The rule will be delete adter max-idle time even there are packet executing the action. ovs-appctl dpctl/dump-flows recirc_id(0),in_port(1),eth_type(0x0800),ipv4(dst=11.0.0.7,frag=first), packets:0, bytes:0, used:5.390s, actions:ct(zone=1,nat),recirc(0x4) filter protocol ip pref 2 flower chain 0 handle 0x2 eth_type ipv4 dst_ip 1.1.1.1 ip_flags frag/firstfrag skip_hw not_in_hw action order 1: ct zone 1 nat pipe index 2 ref 1 bind 1 installed 11 sec used 1 sec Action statistics: Sent 15000 bytes 11 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie e04106c2ac41769b278edaa9b5309960 action order 2: gact action goto chain 1 random type none pass val 0 index 2 ref 1 bind 1 installed 11 sec used 11 sec Action statistics: Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) backlog 0b 0p requeues 0 cookie e04106c2ac41769b278edaa9b5309960 Signed-off-by: wenxu Signed-off-by: Simon Horman --- lib/tc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/tc.c b/lib/tc.c index c2ab77553..c96d09538 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -1726,8 +1726,10 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower, } bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs); - put_32aligned_u64(&stats->n_packets, bs->packets); - put_32aligned_u64(&stats->n_bytes, bs->bytes); + if (bs->packets) { + put_32aligned_u64(&stats->n_packets, bs->packets); + put_32aligned_u64(&stats->n_bytes, bs->bytes); + } return 0; } -- GitLab From 9ed9df77a3097146addd9cc2f53bffebd71c1343 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Wed, 10 Jun 2020 16:49:45 -0300 Subject: [PATCH 180/432] ctags: Include new annotations to ctags ignore list. The annotation OVS_NO_THREAD_SAFETY_ANALYSIS and OVS_LOCKABLE are not part of the list, so ctags can't find functions using them. The annotation list comes from a regex and to include more items make the regex more difficult to read and maintain. Convert to a static list because it isn't supposed to change much and there is no standard names. Also add a comment to remind to keep the list up-to-date. Signed-off-by: Flavio Leitner Signed-off-by: William Tu --- Makefile.am | 2 +- acinclude.m4 | 6 +++--- include/openvswitch/compiler.h | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile.am b/Makefile.am index b279303d1..27ef9e4b4 100644 --- a/Makefile.am +++ b/Makefile.am @@ -46,7 +46,7 @@ AM_CPPFLAGS += -DNDEBUG AM_CFLAGS += -fomit-frame-pointer endif -AM_CTAGSFLAGS = $(OVS_CTAGS_IDENTIFIERS_LIST) +AM_CTAGSFLAGS = -I "$(OVS_CTAGS_IDENTIFIERS_LIST)" if WIN32 psep=";" diff --git a/acinclude.m4 b/acinclude.m4 index 8847b8145..054ec2e3c 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -1332,11 +1332,11 @@ AC_DEFUN([OVS_ENABLE_SPARSE], dnl OVS_CTAGS_IDENTIFIERS dnl -dnl ctags ignores symbols with extras identifiers. This builds a list of -dnl specially handled identifiers to be ignored. +dnl ctags ignores symbols with extras identifiers. This is a list of +dnl specially handled identifiers to be ignored. [ctags(1) -I ]. AC_DEFUN([OVS_CTAGS_IDENTIFIERS], AC_SUBST([OVS_CTAGS_IDENTIFIERS_LIST], - [`printf %s '-I "'; sed -n 's/^#define \(OVS_[A-Z_]\+\)(\.\.\.)$/\1+/p' ${srcdir}/include/openvswitch/compiler.h | tr \\\n ' ' ; printf '"'`] )) + ["OVS_LOCKABLE OVS_NO_THREAD_SAFETY_ANALYSIS OVS_REQ_RDLOCK+ OVS_ACQ_RDLOCK+ OVS_REQ_WRLOCK+ OVS_ACQ_WRLOCK+ OVS_REQUIRES+ OVS_ACQUIRES+ OVS_TRY_WRLOCK+ OVS_TRY_RDLOCK+ OVS_TRY_LOCK+ OVS_GUARDED_BY+ OVS_EXCLUDED+ OVS_RELEASES+ OVS_ACQ_BEFORE+ OVS_ACQ_AFTER+"])) dnl OVS_PTHREAD_SET_NAME dnl diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h index 5289a70f6..cf009f826 100644 --- a/include/openvswitch/compiler.h +++ b/include/openvswitch/compiler.h @@ -113,6 +113,8 @@ * OVS_REQUIRES OVS_REQ_RDLOCK OVS_REQ_WRLOCK * OVS_EXCLUDED OVS_EXCLUDED OVS_EXCLUDED */ + +/* Please keep OVS_CTAGS_IDENTIFIERS up-to-date in acinclude.m4. */ #define OVS_LOCKABLE __attribute__((lockable)) #define OVS_REQ_RDLOCK(...) __attribute__((shared_locks_required(__VA_ARGS__))) #define OVS_ACQ_RDLOCK(...) __attribute__((shared_lock_function(__VA_ARGS__))) -- GitLab From e7fac43bd1b4627792c5b2a344b95af4625c5288 Mon Sep 17 00:00:00 2001 From: Peng He Date: Sun, 5 Jul 2020 21:07:23 +0800 Subject: [PATCH 181/432] conntrack-tp: fix lock order in conn_update_expiration *conn_update_expiration* violates the lock order of conn->lock and ct->lock. In the comments of conntrack, the conn->lock should be held after ct->lock when ct->lock needs to be taken. Fixes: 2078901a4c142 ("userspace: Add conntrack timeout policy support.") Signed-off-by: Peng He Signed-off-by: William Tu --- lib/conntrack-tp.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c index 3a7604c0d..a586d3a8d 100644 --- a/lib/conntrack-tp.c +++ b/lib/conntrack-tp.c @@ -260,15 +260,20 @@ conn_update_expiration(struct conntrack *ct, struct conn *conn, struct timeout_policy *tp; uint32_t val; + ovs_mutex_unlock(&conn->lock); + ovs_mutex_lock(&ct->ct_lock); + ovs_mutex_lock(&conn->lock); tp = timeout_policy_lookup(ct, conn->tp_id); if (tp) { val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)]; } else { val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)]; } + ovs_mutex_unlock(&conn->lock); ovs_mutex_unlock(&ct->ct_lock); + ovs_mutex_lock(&conn->lock); VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d " "val=%u sec.", ct_timeout_str[tm], conn->key.zone, conn->tp_id, val); -- GitLab From 6adc879b6369fd009e2bdc1db6f5d0aea6c50f89 Mon Sep 17 00:00:00 2001 From: William Tu Date: Sun, 5 Jul 2020 06:44:25 -0700 Subject: [PATCH 182/432] AUTHORS: Add Peng He. Signed-off-by: William Tu --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index fbba823fd..26a37aac2 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -303,6 +303,7 @@ Paul Fazzone pfazzone@vmware.com Paul Ingram Paul-Emmanuel Raoul skyper@skyplabs.net Pavithra Ramesh paramesh@vmware.com +Peng He hepeng.0320@bytedance.com Peter Downs padowns@gmail.com Philippe Jung phil.jung@free.fr Pim van den Berg pim@nethuis.nl -- GitLab From edd04838c61c61f0156400c36bcdf06c746481b0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 27 May 2020 12:24:31 -0700 Subject: [PATCH 183/432] meta-flow: Document that constituents of conjunctive flows may overlap. Suggested-by: Antonin Bas Acked-by: William Tu Signed-off-by: Ben Pfaff --- lib/meta-flow.xml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index d4495552b..154675874 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -1240,6 +1240,8 @@ tcp,tp_src=0x07c0/0xfff0 priority, that is, any given packet must be able to match at most one conjunctive flow at a given priority. Overlapping conjunctive flows yield unpredictable results. + (The flows that constitute a conjunctive flow may overlap with those + that constitute the same or another conjunctive flow.)

  • Following a conjunctive flow match, the search for the flow with -- GitLab From 32697d4f6497c283a02e65e9c4515fc3183a55f6 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 11 Jun 2020 11:25:52 +0200 Subject: [PATCH 184/432] odp-execute: Fix length checking while executing check_pkt_len action. If dp-packet contains l2 padding or cutlen was applied to it, size will be larger than the actual size of a payload and action will work incorrectly. Ex. Padding could be added during miniflow_extract() if detected. Fixes: 5b34f8fc3b38 ("Add a new OVS action check_pkt_larger") Reported-by: Miroslav Kubiczek Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2020-May/050157.html Acked-by: Dumitru Ceara Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- lib/odp-execute.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 6018e378a..6eeda2a61 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -761,10 +761,11 @@ odp_execute_check_pkt_len(void *dp, struct dp_packet *packet, bool steal, const struct nlattr *a; struct dp_packet_batch pb; + uint32_t size = dp_packet_get_send_len(packet) + - dp_packet_l2_pad_size(packet); a = attrs[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]; - bool is_greater = dp_packet_size(packet) > nl_attr_get_u16(a); - if (is_greater) { + if (size > nl_attr_get_u16(a)) { a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; } else { a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; -- GitLab From a64ee5493f0910012d201ea200d75365788acc3a Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 15 May 2020 19:21:00 +0800 Subject: [PATCH 185/432] ovsdb: Remove duplicated include. Signed-off-by: Yunjian Wang Signed-off-by: Ilya Maximets --- ovsdb/monitor.c | 1 - ovsdb/rbac.c | 1 - 2 files changed, 2 deletions(-) diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c index 1c66b428e..532dedcb6 100644 --- a/ovsdb/monitor.c +++ b/ovsdb/monitor.c @@ -31,7 +31,6 @@ #include "simap.h" #include "hash.h" #include "table.h" -#include "hash.h" #include "timeval.h" #include "transaction.h" #include "jsonrpc-server.h" diff --git a/ovsdb/rbac.c b/ovsdb/rbac.c index b85ca9a93..2986027c9 100644 --- a/ovsdb/rbac.c +++ b/ovsdb/rbac.c @@ -21,7 +21,6 @@ #include "column.h" #include "condition.h" -#include "condition.h" #include "file.h" #include "mutation.h" #include "openvswitch/vlog.h" -- GitLab From df5c293642cc07013e796e588eb7aead917e20a1 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 16 Mar 2020 05:56:03 +0800 Subject: [PATCH 186/432] dpif-netdev: Delete the artificial flow limit. The MAX_FLOWS constant was there from the introduction of dpif-netdev, however, later new flow-limit mechanism was implemented that controls number of datapath flows in a dynamic way on ofproto level. So, we can just remove the limit and fully rely on ofproto to decide what flow limit we need. There are no limitations for flow table size in dpif-netdev beside the artificial one. 'other_config:flow-limit' seems suitable to control this. Suggested-by: Ilya Maximets Signed-off-by: Tonghao Zhang Signed-off-by: Ilya Maximets --- NEWS | 3 +++ lib/dpif-netdev.c | 10 ++-------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/NEWS b/NEWS index 0116b3ea0..ac992d17f 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,9 @@ Post-v2.13.0 * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles by enabling interrupt mode. - Userspace datapath: + * Removed artificial datapath flow limit that was 65536. + Now number of datapath flows is fully controlled by revalidators and the + 'other_config:flow-limit' knob. * Add support for conntrack zone-based timeout policy. * New configuration knob 'other_config:lb-output-action' for bond ports that enables new datapath action 'lb_output' to avoid recirculation diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1086efd47..cd349c4a4 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -98,7 +98,6 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) #define DEFAULT_TX_FLUSH_INTERVAL 0 /* Configuration parameters. */ -enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */ @@ -3527,13 +3526,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd, netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL); if (!netdev_flow) { if (put->flags & DPIF_FP_CREATE) { - if (cmap_count(&pmd->flow_table) < MAX_FLOWS) { - dp_netdev_flow_add(pmd, match, ufid, put->actions, - put->actions_len); - error = 0; - } else { - error = EFBIG; - } + dp_netdev_flow_add(pmd, match, ufid, put->actions, + put->actions_len); } else { error = ENOENT; } -- GitLab From 82a106ebfbf90b4b4d09782caf74d22f51af8d12 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Fri, 26 Jun 2020 13:51:16 +0200 Subject: [PATCH 187/432] ofproto: Delete buckets when lb_output is false. When lb-output-action is toggled back to "false" buckets are not being deleted. Delete them as they will no longer be used. Add unit test to verify buckets are correctly deleted. Cc: Vishal Deep Ajmera Signed-off-by: Adrian Moreno Signed-off-by: Ilya Maximets --- ofproto/bond.c | 3 +++ tests/ofproto-dpif.at | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/ofproto/bond.c b/ofproto/bond.c index 9947e7531..2466c4d02 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -490,6 +490,9 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) "disabled.", bond->name); } else { bond->use_lb_output_action = s->use_lb_output_action; + if (!bond->use_lb_output_action) { + bond_del_lb_output_buckets(bond); + } revalidate = true; } } diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index e3402e7b8..c1455d8aa 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -193,6 +193,10 @@ AT_CHECK([test $(grep -c in_port.5 br1_flows.txt) -gt 24]) AT_CHECK([test $(grep -c in_port.6 br1_flows.txt) -gt 24]) AT_CHECK([grep -q lb_output br0_flows.txt]) +AT_CHECK([test $(ovs-appctl dpif-netdev/bond-show | grep -c bucket) -eq 256]) +AT_CHECK([ovs-vsctl set Port bond0 other_config:lb-output-action=false]) +OVS_WAIT_UNTIL([test -z "$(ovs-appctl dpif-netdev/bond-show)"]) + OVS_VSWITCHD_STOP() AT_CLEANUP -- GitLab From d4a12d87a7a3a8d32a284c5e5b0121529188e580 Mon Sep 17 00:00:00 2001 From: Adrian Moreno Date: Mon, 6 Jul 2020 11:26:55 +0200 Subject: [PATCH 188/432] dpif-netdev-unixctl.man: Document bond-show command. Document recently added ovs-appctl command. Fixes: 9df65060cf4c ("userspace: Avoid dp_hash recirculation for balance-tcp bond mode.") Acked-by: Flavio Leitner Signed-off-by: Adrian Moreno Signed-off-by: Ilya Maximets --- lib/dpif-netdev-unixctl.man | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man index 6c54f6f9c..858d491df 100644 --- a/lib/dpif-netdev-unixctl.man +++ b/lib/dpif-netdev-unixctl.man @@ -217,3 +217,12 @@ with port names, which this thread polls. . .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]" Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage. +. +.IP "\fBdpif-netdev/bond-show\fR [\fIdp\fR]" +When "other_config:lb-output-action" is set to "true", the userspace datapath +handles the load balancing of bonds directly instead of depending on flow +recirculation (only in balance-tcp mode). + +When this is the case, the above command prints the load-balancing information +of the bonds configured in datapath \fIdp\fR showing the interface associated +with each bucket (hash). -- GitLab From 319fca52412b8514286a5ad012b16515a245d4e9 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 7 Jul 2020 01:19:26 +0200 Subject: [PATCH 189/432] AUTHORS: Add Adrian Moreno. Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 26a37aac2..8e6a0769f 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -33,6 +33,7 @@ Name Email ================================== =============================================== Aaron Conole aconole@redhat.com Aaron Rosen arosen@clemson.edu +Adrian Moreno amorenoz@redhat.com Alan Pevec alan.pevec@redhat.com Alessandro Pilotti apilotti@cloudbasesolutions.com Alexander Duyck alexander.h.duyck@redhat.com -- GitLab From 7ed49f8be175852bca0433aaaa9f1449507d8a4b Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Sun, 21 Jun 2020 22:55:12 -0700 Subject: [PATCH 190/432] ovsdb/TODO.rst: Remove completed items. - snapshot unit test has been added for "change-election-timer" related patches. - 100% CPU problem was addressed by: 2cd62f75c1 ("ovsdb raft: Precheck prereq before proposing commit.") Signed-off-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/TODO.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ovsdb/TODO.rst b/ovsdb/TODO.rst index fb4a50fa6..0771c25c8 100644 --- a/ovsdb/TODO.rst +++ b/ovsdb/TODO.rst @@ -27,12 +27,8 @@ OVSDB Clustering To-do List * Ephemeral columns. -* Unit test snapshotting. - * Locks. -* Investigate 100% CPU for long-running triggers - * Tons of unit tests. * Increase exponential backoff cap. Introduce randomization. -- GitLab From c659b64f3880916c8e12b3db57c6e97808d04ba2 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Sun, 21 Jun 2020 22:55:13 -0700 Subject: [PATCH 191/432] ovsdb/TODO.rst: Remove OVN specific items. These should belong to OVN project, if still not done yet. Signed-off-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/TODO.rst | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ovsdb/TODO.rst b/ovsdb/TODO.rst index 0771c25c8..fd0163f22 100644 --- a/ovsdb/TODO.rst +++ b/ovsdb/TODO.rst @@ -43,10 +43,6 @@ OVSDB Clustering To-do List * ACID (and CAP?) explanation. - * Upgrading OVN to a clustered database - - * Installing OVN with a clustered database - * Overall diagram explaining the cluster and ovsdb protocol pieces * Future work: -- GitLab From 9ed69557e5fae4f33f34a985c380e7e040b09548 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 2 Jul 2020 16:20:57 +0200 Subject: [PATCH 192/432] ovsdb-idl: Force IDL retry when missing updates encountered. Adds a generic recovery mechanism which triggers an IDL retry with fast resync disabled in case the IDL has detected that it ended up in an inconsistent state due to other bugs in the ovsdb-server/ovsdb-idl implementation. Additionally, this commit also: - bumps IDL semantic error logs to level ERR to make them more visible. - triggers an IDL retry in cases when the IDL client used to try to recover (i.e., trying to add an existing row, trying to remove a non existent row). Fixes: db2b5757328c ("lib: add monitor2 support in ovsdb-idl.") Acked-by: Han Zhou Signed-off-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 169 +++++++++++++++++++++++++++++++----------------- 1 file changed, 109 insertions(+), 60 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 0a18261fc..ef3b97b23 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -321,14 +321,19 @@ static bool ovsdb_idl_handle_monitor_canceled(struct ovsdb_idl *, static void ovsdb_idl_db_parse_update(struct ovsdb_idl_db *, const struct json *table_updates, enum ovsdb_idl_monitor_method method); -static bool ovsdb_idl_process_update(struct ovsdb_idl_table *, - const struct uuid *, - const struct json *old, - const struct json *new); -static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *, - const struct uuid *, - const char *operation, - const struct json *row); +enum update_result { + OVSDB_IDL_UPDATE_DB_CHANGED, + OVSDB_IDL_UPDATE_NO_CHANGES, + OVSDB_IDL_UPDATE_INCONSISTENT, +}; +static enum update_result ovsdb_idl_process_update(struct ovsdb_idl_table *, + const struct uuid *, + const struct json *old, + const struct json *new); +static enum update_result ovsdb_idl_process_update2(struct ovsdb_idl_table *, + const struct uuid *, + const char *operation, + const struct json *row); static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *); static void ovsdb_idl_delete_row(struct ovsdb_idl_row *); static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *); @@ -2418,6 +2423,7 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db, version_suffix, table->class_->name); } SHASH_FOR_EACH (table_node, json_object(table_update)) { + enum update_result result = OVSDB_IDL_UPDATE_NO_CHANGES; const struct json *row_update = table_node->data; struct uuid uuid; @@ -2450,13 +2456,13 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db, operation = ops[i]; row = shash_find_data(json_object(row_update), operation); - if (row) { - if (ovsdb_idl_process_update2(table, &uuid, operation, - row)) { - db->change_seqno++; - } - break; + if (!row) { + continue; } + + result = ovsdb_idl_process_update2(table, &uuid, + operation, row); + break; } /* row_update2 should contain one of the objects */ @@ -2487,10 +2493,24 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db, "and \"new\" members"); } - if (ovsdb_idl_process_update(table, &uuid, old_json, - new_json)) { - db->change_seqno++; - } + result = ovsdb_idl_process_update(table, &uuid, old_json, + new_json); + } + + switch (result) { + case OVSDB_IDL_UPDATE_DB_CHANGED: + db->change_seqno++; + break; + case OVSDB_IDL_UPDATE_NO_CHANGES: + break; + case OVSDB_IDL_UPDATE_INCONSISTENT: + memset(&db->last_id, 0, sizeof db->last_id); + ovsdb_idl_retry(db->idl); + return ovsdb_error(NULL, + " received for inconsistent " + "IDL: reconnecting IDL and resync all " + "data", + version_suffix); } } } @@ -2523,9 +2543,22 @@ ovsdb_idl_get_row(struct ovsdb_idl_table *table, const struct uuid *uuid) return NULL; } -/* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false - * otherwise. */ -static bool +/* Returns OVSDB_IDL_UPDATE_DB_CHANGED if a column with mode + * OVSDB_IDL_MODE_RW changed. + * + * Some IDL inconsistencies can be detected when processing updates: + * - trying to insert an already existing row + * - trying to update a missing row + * - trying to delete a non existent row + * + * In such cases OVSDB_IDL_UPDATE_INCONSISTENT is returned. + * Even though the IDL client could recover, it's best to report the + * inconsistent state because the state the server is in is unknown so the + * safest thing to do is to retry (potentially connecting to a new server). + * + * Returns OVSDB_IDL_UPDATE_NO_CHANGES otherwise. + */ +static enum update_result ovsdb_idl_process_update(struct ovsdb_idl_table *table, const struct uuid *uuid, const struct json *old, const struct json *new) @@ -2539,10 +2572,10 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table, /* XXX perhaps we should check the 'old' values? */ ovsdb_idl_delete_row(row); } else { - VLOG_WARN_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" " - "from table %s", - UUID_ARGS(uuid), table->class_->name); - return false; + VLOG_ERR_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" " + "from table %s", + UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else if (!old) { /* Insert row. */ @@ -2551,35 +2584,50 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table, } else if (ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_insert_row(row, new); } else { - VLOG_WARN_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to " - "table %s", UUID_ARGS(uuid), table->class_->name); - return ovsdb_idl_modify_row(row, new); + VLOG_ERR_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to " + "table %s", UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else { /* Modify row. */ if (row) { /* XXX perhaps we should check the 'old' values? */ if (!ovsdb_idl_row_is_orphan(row)) { - return ovsdb_idl_modify_row(row, new); + return ovsdb_idl_modify_row(row, new) + ? OVSDB_IDL_UPDATE_DB_CHANGED + : OVSDB_IDL_UPDATE_NO_CHANGES; } else { - VLOG_WARN_RL(&semantic_rl, "cannot modify missing but " - "referenced row "UUID_FMT" in table %s", - UUID_ARGS(uuid), table->class_->name); - ovsdb_idl_insert_row(row, new); + VLOG_ERR_RL(&semantic_rl, "cannot modify missing but " + "referenced row "UUID_FMT" in table %s", + UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else { - VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" " - "in table %s", UUID_ARGS(uuid), table->class_->name); - ovsdb_idl_insert_row(ovsdb_idl_row_create(table, uuid), new); + VLOG_ERR_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" " + "in table %s", UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } - return true; + return OVSDB_IDL_UPDATE_DB_CHANGED; } -/* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false - * otherwise. */ -static bool +/* Returns OVSDB_IDL_UPDATE_DB_CHANGED if a column with mode + * OVSDB_IDL_MODE_RW changed. + * + * Some IDL inconsistencies can be detected when processing updates: + * - trying to insert an already existing row + * - trying to update a missing row + * - trying to delete a non existent row + * + * In such cases OVSDB_IDL_UPDATE_INCONSISTENT is returned. + * Even though the IDL client could recover, it's best to report the + * inconsistent state because the state the server is in is unknown so the + * safest thing to do is to retry (potentially connecting to a new server). + * + * Otherwise OVSDB_IDL_UPDATE_NO_CHANGES is returned. + */ +static enum update_result ovsdb_idl_process_update2(struct ovsdb_idl_table *table, const struct uuid *uuid, const char *operation, @@ -2593,10 +2641,10 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table, if (row && !ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_delete_row(row); } else { - VLOG_WARN_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" " - "from table %s", - UUID_ARGS(uuid), table->class_->name); - return false; + VLOG_ERR_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" " + "from table %s", + UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else if (!strcmp(operation, "insert") || !strcmp(operation, "initial")) { /* Insert row. */ @@ -2605,34 +2653,35 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table, } else if (ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_insert_row(row, json_row); } else { - VLOG_WARN_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to " - "table %s", UUID_ARGS(uuid), table->class_->name); - ovsdb_idl_delete_row(row); - ovsdb_idl_insert_row(row, json_row); + VLOG_ERR_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to " + "table %s", UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else if (!strcmp(operation, "modify")) { /* Modify row. */ if (row) { if (!ovsdb_idl_row_is_orphan(row)) { - return ovsdb_idl_modify_row_by_diff(row, json_row); + return ovsdb_idl_modify_row_by_diff(row, json_row) + ? OVSDB_IDL_UPDATE_DB_CHANGED + : OVSDB_IDL_UPDATE_NO_CHANGES; } else { - VLOG_WARN_RL(&semantic_rl, "cannot modify missing but " - "referenced row "UUID_FMT" in table %s", - UUID_ARGS(uuid), table->class_->name); - return false; + VLOG_ERR_RL(&semantic_rl, "cannot modify missing but " + "referenced row "UUID_FMT" in table %s", + UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else { - VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" " - "in table %s", UUID_ARGS(uuid), table->class_->name); - return false; + VLOG_ERR_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" " + "in table %s", UUID_ARGS(uuid), table->class_->name); + return OVSDB_IDL_UPDATE_INCONSISTENT; } } else { - VLOG_WARN_RL(&semantic_rl, "unknown operation %s to " - "table %s", operation, table->class_->name); - return false; + VLOG_ERR_RL(&semantic_rl, "unknown operation %s to " + "table %s", operation, table->class_->name); + return OVSDB_IDL_UPDATE_NO_CHANGES; } - return true; + return OVSDB_IDL_UPDATE_DB_CHANGED; } /* Recursively add rows to tracked change lists for current row -- GitLab From 81ac8b3b194c602e7c9b4095f5bc1e214ae72a32 Mon Sep 17 00:00:00 2001 From: Nitin Katiyar Date: Thu, 22 Aug 2019 22:23:30 +0530 Subject: [PATCH 193/432] dpif-netdev: Do RCU synchronization at fixed interval in PMD main loop. Each PMD updates the global sequence number for RCU synchronization purpose with other OVS threads. This is done at every 1025th iteration in PMD main loop. If the PMD thread is responsible for polling large number of queues that are carrying traffic, it spends a lot of time processing packets and this results in significant delay in performing the housekeeping activities. If the OVS main thread is waiting to synchronize with the PMD threads and if those threads delay performing housekeeping activities for more than 3 sec then LACP processing will be impacted and it will lead to LACP flaps. Similarly, other controls protocols run by OVS main thread are impacted. For e.g. a PMD thread polling 200 ports/queues with average of 1600 processing cycles per packet with batch size of 32 may take 10240000 (200 * 1600 * 32) cycles per iteration. In system with 2.0 GHz CPU it means more than 5 ms per iteration. So, for 1024 iterations to complete it would be more than 5 seconds. This gets worse when there are PMD threads which are less loaded. It reduces possibility of getting mutex lock in ovsrcu_try_quiesce() by heavily loaded PMD and next attempt to quiesce would be after 1024 iterations. With this patch, PMD RCU synchronization will be performed after fixed interval instead after a fixed number of iterations. This will ensure that even if the packet processing load is high the RCU synchronization will not be delayed long. Co-authored-by: Anju Thomas Signed-off-by: Anju Thomas Signed-off-by: Nitin Katiyar Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cd349c4a4..e037eab2a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -240,6 +240,9 @@ struct dfc_cache { * and used during rxq to pmd assignment. */ #define PMD_RXQ_INTERVAL_MAX 6 +/* Time in microseconds to try RCU quiescing. */ +#define PMD_RCU_QUIESCE_INTERVAL 10000LL + struct dpcls { struct cmap_node node; /* Within dp_netdev_pmd_thread.classifiers */ odp_port_t in_port; @@ -787,6 +790,9 @@ struct dp_netdev_pmd_thread { /* Set to true if the pmd thread needs to be reloaded. */ bool need_reload; + + /* Next time when PMD should try RCU quiescing. */ + long long next_rcu_quiesce; }; /* Interface to netdev-based datapath. */ @@ -5730,6 +5736,9 @@ reload: pmd->intrvl_tsc_prev = 0; atomic_store_relaxed(&pmd->intrvl_cycles, 0); cycles_counter_update(s); + + pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL; + /* Protect pmd stats from external clearing while polling. */ ovs_mutex_lock(&pmd->perf_stats.stats_mutex); for (;;) { @@ -5764,6 +5773,16 @@ reload: tx_packets = dp_netdev_pmd_flush_output_packets(pmd, false); } + /* Do RCU synchronization at fixed interval. This ensures that + * synchronization would not be delayed long even at high load of + * packet processing. */ + if (pmd->ctx.now > pmd->next_rcu_quiesce) { + if (!ovsrcu_try_quiesce()) { + pmd->next_rcu_quiesce = + pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL; + } + } + if (lc++ > 1024) { lc = 0; @@ -5771,6 +5790,8 @@ reload: dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt); if (!ovsrcu_try_quiesce()) { emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache)); + pmd->next_rcu_quiesce = + pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL; } for (i = 0; i < poll_cnt; i++) { @@ -6244,6 +6265,7 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp, pmd->ctx.last_rxq = NULL; pmd_thread_ctx_time_update(pmd); pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL; + pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL; pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN; hmap_init(&pmd->poll_list); hmap_init(&pmd->tx_ports); -- GitLab From 714a104995af2b93eb456b0017ed8431f6769b16 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 21 Mar 2020 15:17:27 -0700 Subject: [PATCH 194/432] python: Fix plural forms of OVSDB types. Fixes two problems. First, the plural of chassis is also chassis. Second, for linguistic analysis we need to consider plain words, not words that have (e.g.) \fB and \fR pasted into them for nroff output. This makes the OVN manpage for ovn-sb(5) talk about "set of Chassis" not "set of Chassiss". Acked-by: Numan Siddique Signed-off-by: Ben Pfaff --- python/ovs/db/types.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py index 3b47b9b30..626ae8fc4 100644 --- a/python/ovs/db/types.py +++ b/python/ovs/db/types.py @@ -591,7 +591,16 @@ class Type(object): if self.value: return "map of %s%s-%s pairs" % (quantity, keyName, valueName) else: - if keyName.endswith('s'): + # Extract the last word from 'keyName' so we can make it + # plural. For linguistic analysis, turn it into English + # without formatting so that we don't consider any prefix or + # suffix added by escapeLiteral. + plainKeyName = (self.key.toEnglish(returnUnchanged) + .rpartition(' ')[2].lower()) + + if plainKeyName == 'chassis': + plural = keyName + elif plainKeyName.endswith('s'): plural = keyName + "es" else: plural = keyName + "s" -- GitLab From 002682727e6e275b05f6cd2f32170b18564ef258 Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 6 Jul 2020 20:11:05 -0700 Subject: [PATCH 195/432] netdev-offload-tc: Add drop action support. Currently drop action is not offloaded when using userspace datapath with tc offload. The patch programs tc gact (generic action) chain ID 0 to drop the packet by setting it to TC_ACT_SHOT. Example: $ ovs-appctl dpctl/add-flow netdev@ovs-netdev \ 'recirc_id(0),in_port(2),eth(),eth_type(0x0806),\ arp(op=2,tha=00:50:56:e1:4b:ab,tip=10.255.1.116)' drop Or no action also infers drop $ ovs-appctl dpctl/add-flow netdev@ovs-netdev \ 'recirc_id(0),in_port(2),eth(),eth_type(0x0806),\ arp(op=2,tha=00:50:56:e1:4b:ab,tip=10.255.1.116)' '' $ tc filter show dev ovs-p0 ingress filter protocol arp pref 2 flower chain 0 filter protocol arp pref 2 flower chain 0 handle 0x1 eth_type arp arp_tip 10.255.1.116 arp_op reply arp_tha 00:50:56:e1:4b:ab skip_hw not_in_hw action order 1: gact action drop ... Signed-off-by: William Tu Acked-by: Tonghao Zhang Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 258d31f54..e50e00f23 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1788,6 +1788,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, action->chain = nl_attr_get_u32(nla); flower.action_count++; recirc_act = true; + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) { + action->type = TC_ACT_GOTO; + action->chain = 0; /* 0 is reserved and not used by recirc. */ + flower.action_count++; } else { VLOG_DBG_RL(&rl, "unsupported put action type: %d", nl_attr_type(nla)); -- GitLab From d8ad173fb9c1a49119f52285d53f481994b76f49 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:20 +0000 Subject: [PATCH 196/432] netdev-offload-dpdk: Log testpmd format for flow create/destroy. To enhance debugability with DPDK, format the logs in a testpmd format commands. Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 380 +++++++++++++++++--------------------- 1 file changed, 167 insertions(+), 213 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 26a75f0f2..16bde7c87 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -118,7 +118,7 @@ ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) } } - VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", + VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow", UUID_ARGS((struct uuid *) ufid)); } @@ -142,13 +142,22 @@ struct flow_actions { static void dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr) { - ds_put_format(s, - " Attributes: " - "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n", - attr->ingress, attr->egress, attr->priority, attr->group, - attr->transfer); + ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s", + attr->ingress ? "ingress " : "", + attr->egress ? "egress " : "", attr->priority, attr->group, + attr->transfer ? "transfer " : ""); } +/* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using + * 'testpmd command'-like format. */ +#define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \ + if (is_all_ones(&mask, sizeof mask)) { \ + ds_put_format(s, field " is " fmt " ", spec_pri); \ + } else if (!is_all_zeros(&mask, sizeof mask)) { \ + ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \ + spec_pri, mask_pri); \ + } + static void dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) { @@ -156,171 +165,135 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) const struct rte_flow_item_eth *eth_spec = item->spec; const struct rte_flow_item_eth *eth_mask = item->mask; - ds_put_cstr(s, "rte flow eth pattern:\n"); + ds_put_cstr(s, "eth "); if (eth_spec) { - ds_put_format(s, - " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " - "type=0x%04" PRIx16"\n", - ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), - ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), - ntohs(eth_spec->type)); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - if (eth_mask) { - ds_put_format(s, - " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " - "type=0x%04"PRIx16"\n", - ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), - ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), - ntohs(eth_mask->type)); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!eth_mask) { + eth_mask = &rte_flow_item_eth_mask; + } + DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT, + ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), + ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes)); + DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT, + ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), + ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes)); + DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16, + ntohs(eth_spec->type), + ntohs(eth_mask->type)); } + ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { const struct rte_flow_item_vlan *vlan_spec = item->spec; const struct rte_flow_item_vlan *vlan_mask = item->mask; - ds_put_cstr(s, "rte flow vlan pattern:\n"); + ds_put_cstr(s, "vlan "); if (vlan_spec) { - ds_put_format(s, - " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", - ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - - if (vlan_mask) { - ds_put_format(s, - " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", - ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!vlan_mask) { + vlan_mask = &rte_flow_item_vlan_mask; + } + DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16, + ntohs(vlan_spec->inner_type), + ntohs(vlan_mask->inner_type)); + DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16, + ntohs(vlan_spec->tci), ntohs(vlan_mask->tci)); } + ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; - ds_put_cstr(s, "rte flow ipv4 pattern:\n"); + ds_put_cstr(s, "ipv4 "); if (ipv4_spec) { - ds_put_format(s, - " Spec: tos=0x%"PRIx8", ttl=%"PRIx8 - ", proto=0x%"PRIx8 - ", src="IP_FMT", dst="IP_FMT"\n", - ipv4_spec->hdr.type_of_service, - ipv4_spec->hdr.time_to_live, - ipv4_spec->hdr.next_proto_id, - IP_ARGS(ipv4_spec->hdr.src_addr), - IP_ARGS(ipv4_spec->hdr.dst_addr)); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - if (ipv4_mask) { - ds_put_format(s, - " Mask: tos=0x%"PRIx8", ttl=%"PRIx8 - ", proto=0x%"PRIx8 - ", src="IP_FMT", dst="IP_FMT"\n", - ipv4_mask->hdr.type_of_service, - ipv4_mask->hdr.time_to_live, - ipv4_mask->hdr.next_proto_id, - IP_ARGS(ipv4_mask->hdr.src_addr), - IP_ARGS(ipv4_mask->hdr.dst_addr)); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!ipv4_mask) { + ipv4_mask = &rte_flow_item_ipv4_mask; + } + DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT, + IP_ARGS(ipv4_spec->hdr.src_addr), + IP_ARGS(ipv4_mask->hdr.src_addr)); + DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT, + IP_ARGS(ipv4_spec->hdr.dst_addr), + IP_ARGS(ipv4_mask->hdr.dst_addr)); + DUMP_PATTERN_ITEM(ipv4_mask->hdr.next_proto_id, "proto", + "0x%"PRIx8, ipv4_spec->hdr.next_proto_id, + ipv4_mask->hdr.next_proto_id); + DUMP_PATTERN_ITEM(ipv4_mask->hdr.type_of_service, "tos", + "0x%"PRIx8, ipv4_spec->hdr.type_of_service, + ipv4_mask->hdr.type_of_service); + DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, "ttl", + "0x%"PRIx8, ipv4_spec->hdr.time_to_live, + ipv4_mask->hdr.time_to_live); } + ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { const struct rte_flow_item_udp *udp_spec = item->spec; const struct rte_flow_item_udp *udp_mask = item->mask; - ds_put_cstr(s, "rte flow udp pattern:\n"); + ds_put_cstr(s, "udp "); if (udp_spec) { - ds_put_format(s, - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", - ntohs(udp_spec->hdr.src_port), - ntohs(udp_spec->hdr.dst_port)); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - if (udp_mask) { - ds_put_format(s, - " Mask: src_port=0x%"PRIx16 - ", dst_port=0x%"PRIx16"\n", - ntohs(udp_mask->hdr.src_port), - ntohs(udp_mask->hdr.dst_port)); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!udp_mask) { + udp_mask = &rte_flow_item_udp_mask; + } + DUMP_PATTERN_ITEM(udp_mask->hdr.src_port, "src", "%"PRIu16, + ntohs(udp_spec->hdr.src_port), + ntohs(udp_mask->hdr.src_port)); + DUMP_PATTERN_ITEM(udp_mask->hdr.dst_port, "dst", "%"PRIu16, + ntohs(udp_spec->hdr.dst_port), + ntohs(udp_mask->hdr.dst_port)); } + ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { const struct rte_flow_item_sctp *sctp_spec = item->spec; const struct rte_flow_item_sctp *sctp_mask = item->mask; - ds_put_cstr(s, "rte flow sctp pattern:\n"); + ds_put_cstr(s, "sctp "); if (sctp_spec) { - ds_put_format(s, - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", - ntohs(sctp_spec->hdr.src_port), - ntohs(sctp_spec->hdr.dst_port)); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - if (sctp_mask) { - ds_put_format(s, - " Mask: src_port=0x%"PRIx16 - ", dst_port=0x%"PRIx16"\n", - ntohs(sctp_mask->hdr.src_port), - ntohs(sctp_mask->hdr.dst_port)); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!sctp_mask) { + sctp_mask = &rte_flow_item_sctp_mask; + } + DUMP_PATTERN_ITEM(sctp_mask->hdr.src_port, "src", "%"PRIu16, + ntohs(sctp_spec->hdr.src_port), + ntohs(sctp_mask->hdr.src_port)); + DUMP_PATTERN_ITEM(sctp_mask->hdr.dst_port, "dst", "%"PRIu16, + ntohs(sctp_spec->hdr.dst_port), + ntohs(sctp_mask->hdr.dst_port)); } + ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { const struct rte_flow_item_icmp *icmp_spec = item->spec; const struct rte_flow_item_icmp *icmp_mask = item->mask; - ds_put_cstr(s, "rte flow icmp pattern:\n"); + ds_put_cstr(s, "icmp "); if (icmp_spec) { - ds_put_format(s, - " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", - icmp_spec->hdr.icmp_type, - icmp_spec->hdr.icmp_code); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - if (icmp_mask) { - ds_put_format(s, - " Mask: icmp_type=0x%"PRIx8 - ", icmp_code=0x%"PRIx8"\n", - icmp_spec->hdr.icmp_type, - icmp_spec->hdr.icmp_code); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!icmp_mask) { + icmp_mask = &rte_flow_item_icmp_mask; + } + DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_type, "icmp_type", "%"PRIu8, + icmp_spec->hdr.icmp_type, + icmp_mask->hdr.icmp_type); + DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_code, "icmp_code", "%"PRIu8, + icmp_spec->hdr.icmp_code, + icmp_mask->hdr.icmp_code); } + ds_put_cstr(s, "/ "); } else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { const struct rte_flow_item_tcp *tcp_spec = item->spec; const struct rte_flow_item_tcp *tcp_mask = item->mask; - ds_put_cstr(s, "rte flow tcp pattern:\n"); + ds_put_cstr(s, "tcp "); if (tcp_spec) { - ds_put_format(s, - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", - ntohs(tcp_spec->hdr.src_port), - ntohs(tcp_spec->hdr.dst_port), - tcp_spec->hdr.data_off, - tcp_spec->hdr.tcp_flags); - } else { - ds_put_cstr(s, " Spec = null\n"); - } - if (tcp_mask) { - ds_put_format(s, - " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", - ntohs(tcp_mask->hdr.src_port), - ntohs(tcp_mask->hdr.dst_port), - tcp_mask->hdr.data_off, - tcp_mask->hdr.tcp_flags); - } else { - ds_put_cstr(s, " Mask = null\n"); + if (!tcp_mask) { + tcp_mask = &rte_flow_item_tcp_mask; + } + DUMP_PATTERN_ITEM(tcp_mask->hdr.src_port, "src", "%"PRIu16, + ntohs(tcp_spec->hdr.src_port), + ntohs(tcp_mask->hdr.src_port)); + DUMP_PATTERN_ITEM(tcp_mask->hdr.dst_port, "dst", "%"PRIu16, + ntohs(tcp_spec->hdr.dst_port), + ntohs(tcp_mask->hdr.dst_port)); + DUMP_PATTERN_ITEM(tcp_mask->hdr.tcp_flags, "flags", "0x%"PRIx8, + tcp_spec->hdr.tcp_flags, + tcp_mask->hdr.tcp_flags); } + ds_put_cstr(s, "/ "); } else { ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type); } @@ -332,43 +305,26 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions) if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { const struct rte_flow_action_mark *mark = actions->conf; - ds_put_cstr(s, "rte flow mark action:\n"); + ds_put_cstr(s, "mark "); if (mark) { - ds_put_format(s, " Mark: id=%d\n", mark->id); - } else { - ds_put_cstr(s, " Mark = null\n"); + ds_put_format(s, "id %d ", mark->id); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) { - const struct rte_flow_action_rss *rss = actions->conf; - - ds_put_cstr(s, "rte flow RSS action:\n"); - if (rss) { - ds_put_format(s, " RSS: queue_num=%d\n", rss->queue_num); - } else { - ds_put_cstr(s, " RSS = null\n"); - } + ds_put_cstr(s, "rss / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) { - const struct rte_flow_action_count *count = actions->conf; - - ds_put_cstr(s, "rte flow count action:\n"); - if (count) { - ds_put_format(s, " Count: shared=%d, id=%d\n", count->shared, - count->id); - } else { - ds_put_cstr(s, " Count = null\n"); - } + ds_put_cstr(s, "count / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) { const struct rte_flow_action_port_id *port_id = actions->conf; - ds_put_cstr(s, "rte flow port-id action:\n"); + ds_put_cstr(s, "port_id "); if (port_id) { - ds_put_format(s, " Port-id: original=%d, id=%d\n", + ds_put_format(s, "original %d id %d ", port_id->original, port_id->id); - } else { - ds_put_cstr(s, " Port-id = null\n"); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { - ds_put_cstr(s, "rte flow drop action\n"); + ds_put_cstr(s, "drop / "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) { const struct rte_flow_action_set_mac *set_mac = actions->conf; @@ -376,85 +332,74 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions) char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST ? "dst" : "src"; - ds_put_format(s, "rte flow set-mac-%s action:\n", dirstr); + ds_put_format(s, "set_mac_%s ", dirstr); if (set_mac) { - ds_put_format(s, - " Set-mac-%s: "ETH_ADDR_FMT"\n", dirstr, + ds_put_format(s, "mac_addr "ETH_ADDR_FMT" ", ETH_ADDR_BYTES_ARGS(set_mac->mac_addr)); - } else { - ds_put_format(s, " Set-mac-%s = null\n", dirstr); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) { const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST ? "dst" : "src"; - ds_put_format(s, "rte flow set-ipv4-%s action:\n", dirstr); + ds_put_format(s, "set_ipv4_%s ", dirstr); if (set_ipv4) { - ds_put_format(s, - " Set-ipv4-%s: "IP_FMT"\n", dirstr, + ds_put_format(s, "ipv4_addr "IP_FMT" ", IP_ARGS(set_ipv4->ipv4_addr)); - } else { - ds_put_format(s, " Set-ipv4-%s = null\n", dirstr); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) { const struct rte_flow_action_set_ttl *set_ttl = actions->conf; - ds_put_cstr(s, "rte flow set-ttl action:\n"); + ds_put_cstr(s, "set_ttl "); if (set_ttl) { - ds_put_format(s, " Set-ttl: %d\n", set_ttl->ttl_value); - } else { - ds_put_cstr(s, " Set-ttl = null\n"); + ds_put_format(s, "ttl_value %d ", set_ttl->ttl_value); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC || actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) { const struct rte_flow_action_set_tp *set_tp = actions->conf; char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST ? "dst" : "src"; - ds_put_format(s, "rte flow set-tcp/udp-port-%s action:\n", dirstr); + ds_put_format(s, "set_tp_%s ", dirstr); if (set_tp) { - ds_put_format(s, " Set-%s-tcp/udp-port: %"PRIu16"\n", dirstr, - ntohs(set_tp->port)); - } else { - ds_put_format(s, " Set-%s-tcp/udp-port = null\n", dirstr); + ds_put_format(s, "port %"PRIu16" ", ntohs(set_tp->port)); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) { - const struct rte_flow_action_of_push_vlan *rte_push_vlan; + const struct rte_flow_action_of_push_vlan *of_push_vlan = + actions->conf; - rte_push_vlan = actions->conf; - ds_put_cstr(s, "rte flow push-vlan action:\n"); - if (rte_push_vlan) { - ds_put_format(s, " Push-vlan: 0x%"PRIx16"\n", - ntohs(rte_push_vlan->ethertype)); - } else { - ds_put_format(s, " Push-vlan = null\n"); + ds_put_cstr(s, "of_push_vlan "); + if (of_push_vlan) { + ds_put_format(s, "ethertype 0x%"PRIx16" ", + ntohs(of_push_vlan->ethertype)); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) { - const struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp; + const struct rte_flow_action_of_set_vlan_pcp *of_set_vlan_pcp = + actions->conf; - rte_vlan_pcp = actions->conf; - ds_put_cstr(s, "rte flow set-vlan-pcp action:\n"); - if (rte_vlan_pcp) { - ds_put_format(s, " Set-vlan-pcp: %"PRIu8"\n", - rte_vlan_pcp->vlan_pcp); - } else { - ds_put_format(s, " Set-vlan-pcp = null\n"); + ds_put_cstr(s, "of_set_vlan_pcp "); + if (of_set_vlan_pcp) { + ds_put_format(s, "vlan_pcp %"PRIu8" ", of_set_vlan_pcp->vlan_pcp); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) { - const struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid; + const struct rte_flow_action_of_set_vlan_vid *of_set_vlan_vid = + actions->conf; - rte_vlan_vid = actions->conf; - ds_put_cstr(s, "rte flow set-vlan-vid action:\n"); - if (rte_vlan_vid) { - ds_put_format(s, " Set-vlan-vid: %"PRIu16"\n", - ntohs(rte_vlan_vid->vlan_vid)); - } else { - ds_put_format(s, " Set-vlan-vid = null\n"); + ds_put_cstr(s, "of_set_vlan_vid "); + if (of_set_vlan_vid) { + ds_put_format(s, "vlan_vid %"PRIu16" ", + ntohs(of_set_vlan_vid->vlan_vid)); } + ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) { - ds_put_cstr(s, "rte flow pop-vlan action\n"); + ds_put_cstr(s, "of_pop_vlan / "); } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } @@ -469,12 +414,15 @@ dump_flow(struct ds *s, if (attr) { dump_flow_attr(s, attr); } + ds_put_cstr(s, "pattern "); while (items && items->type != RTE_FLOW_ITEM_TYPE_END) { dump_flow_pattern(s, items++); } + ds_put_cstr(s, "end actions "); while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) { dump_flow_action(s, actions++); } + ds_put_cstr(s, "end"); return s; } @@ -493,8 +441,9 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev, if (!VLOG_DROP_DBG(&rl)) { ds_init(&s); dump_flow(&s, attr, items, actions); - VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" created:\n%s", - netdev_get_name(netdev), (intptr_t) flow, ds_cstr(&s)); + VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" flow create %d %s", + netdev_get_name(netdev), (intptr_t) flow, + netdev_dpdk_get_port_id(netdev), ds_cstr(&s)); ds_destroy(&s); } } else { @@ -508,7 +457,9 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev, if (!vlog_should_drop(&this_module, level, &rl)) { ds_init(&s); dump_flow(&s, attr, items, actions); - VLOG_RL(&rl, level, "Failed flow:\n%s", ds_cstr(&s)); + VLOG_RL(&rl, level, "Failed flow: %s: flow create %d %s", + netdev_get_name(netdev), + netdev_dpdk_get_port_id(netdev), ds_cstr(&s)); ds_destroy(&s); } } @@ -1138,7 +1089,7 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, goto out; } ufid_to_rte_flow_associate(ufid, flow, actions_offloaded); - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT, netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); out: @@ -1228,12 +1179,15 @@ netdev_offload_dpdk_destroy_flow(struct netdev *netdev, if (ret == 0) { ufid_to_rte_flow_disassociate(ufid); - VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n", - netdev_get_name(netdev), rte_flow, - UUID_ARGS((struct uuid *)ufid)); + VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR + " flow destroy %d ufid " UUID_FMT, + netdev_get_name(netdev), (intptr_t) rte_flow, + netdev_dpdk_get_port_id(netdev), + UUID_ARGS((struct uuid *) ufid)); } else { - VLOG_ERR("%s: Failed to destroy flow: %s (%u)\n", - netdev_get_name(netdev), error.message, error.type); + VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT, + netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev), + UUID_ARGS((struct uuid *) ufid)); } return ret; @@ -1327,7 +1281,7 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev, ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow, &query, &error); if (ret) { - VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p\n", + VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p", netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid), rte_flow_data->rte_flow); goto out; -- GitLab From 9ac365a8edd02934fed259108ec31c114d75f9f1 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:21 +0000 Subject: [PATCH 197/432] dpif-netdev: Add mega ufid in flow add/del log. As offload is done using the mega ufid of a flow, for better debugability, add it in the log message. Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 13 +++++++++---- tests/dpif-netdev.at | 2 ++ tests/ofproto-macros.at | 3 ++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e037eab2a..46bcdd897 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2356,7 +2356,8 @@ mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow *flow) hash_int(mark, 0)); flow->mark = mark; - VLOG_DBG("Associated dp_netdev flow %p with mark %u\n", flow, mark); + VLOG_DBG("Associated dp_netdev flow %p with mark %u mega_ufid "UUID_FMT, + flow, mark, UUID_ARGS((struct uuid *) &flow->mega_ufid)); } static bool @@ -2405,7 +2406,8 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, } flow_mark_free(mark); - VLOG_DBG("Freed flow mark %u\n", mark); + VLOG_DBG("Freed flow mark %u mega_ufid "UUID_FMT, mark, + UUID_ARGS((struct uuid *) &flow->mega_ufid)); megaflow_to_mark_disassociate(&flow->mega_ufid); } @@ -2612,8 +2614,9 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED) OVS_NOT_REACHED(); } - VLOG_DBG("%s to %s netdev flow\n", - ret == 0 ? "succeed" : "failed", op); + VLOG_DBG("%s to %s netdev flow "UUID_FMT, + ret == 0 ? "succeed" : "failed", op, + UUID_ARGS((struct uuid *) &offload->flow->mega_ufid)); dp_netdev_free_flow_offload(offload); ovsrcu_quiesce(); } @@ -3484,6 +3487,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, ds_put_cstr(&ds, "flow_add: "); odp_format_ufid(ufid, &ds); + ds_put_cstr(&ds, " mega_"); + odp_format_ufid(&flow->mega_ufid, &ds); ds_put_cstr(&ds, " "); odp_flow_format(key_buf.data, key_buf.size, mask_buf.data, mask_buf.size, diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index 9c0a42d00..ff173677a 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -13,6 +13,7 @@ strip_timers () { strip_xout () { sed ' + s/mega_ufid:[-0-9a-f]* // s/ufid:[-0-9a-f]* // s/used:[0-9]*\.[0-9]*/used:0.0/ s/actions:.*/actions: / @@ -23,6 +24,7 @@ strip_xout () { strip_xout_keep_actions () { sed ' + s/mega_ufid:[-0-9a-f]* // s/ufid:[-0-9a-f]* // s/used:[0-9]*\.[0-9]*/used:0.0/ s/packets:[0-9]*/packets:0/ diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index b2b17eed3..87f9ae280 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -131,7 +131,8 @@ strip_duration () { # Strips 'ufid:...' from output, to make it easier to compare. # (ufids are random.) strip_ufid () { - sed 's/ufid:[[-0-9a-f]]* //' + sed 's/mega_ufid:[[-0-9a-f]]* // + s/ufid:[[-0-9a-f]]* //' } m4_divert_pop([PREPARE_TESTS]) -- GitLab From 77057965cb36dffd38829efccd2ff2eac8d444d2 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:22 +0000 Subject: [PATCH 198/432] dpif-netdev: Don't use zero flow mark. Zero flow mark is used to indicate the HW to remove the mark. A packet marked with zero mark is received in SW without a mark at all, so it cannot be used as a valid mark. Change the pool range to fix it. Fixes: 241bad15d99a ("dpif-netdev: associate flow with a mark id") Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 8 ++++++-- tests/dpif-netdev.at | 18 +++++++++--------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 46bcdd897..0fa5b6fff 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2252,7 +2252,11 @@ dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread *pmd, } #define MAX_FLOW_MARK (UINT32_MAX - 1) -#define INVALID_FLOW_MARK (UINT32_MAX) +#define INVALID_FLOW_MARK 0 +/* Zero flow mark is used to indicate the HW to remove the mark. A packet + * marked with zero mark is received in SW without a mark at all, so it + * cannot be used as a valid mark. + */ struct megaflow_to_mark_data { const struct cmap_node node; @@ -2278,7 +2282,7 @@ flow_mark_alloc(void) if (!flow_mark.pool) { /* Haven't initiated yet, do it here */ - flow_mark.pool = id_pool_create(0, MAX_FLOW_MARK); + flow_mark.pool = id_pool_create(1, MAX_FLOW_MARK); } if (id_pool_alloc_id(flow_mark.pool, &mark)) { diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index ff173677a..ec5ffc290 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -395,7 +395,7 @@ skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc # Check that flow successfully offloaded. OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 0 +p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 1 ]) # Check that datapath flow installed successfully. AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl @@ -406,7 +406,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), a # Check for succesfull packet matching with installed offloaded flow. AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 0 +p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 1 ]) ovs-appctl revalidator/wait @@ -423,7 +423,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), p # Check that flow successfully deleted from HW. OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow del: mark: 0 +p1: flow del: mark: 1 ]) OVS_VSWITCHD_STOP AT_CLEANUP]) @@ -462,7 +462,7 @@ packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type # Check that flow successfully offloaded. OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 0 +p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 1 ]) # Check that datapath flow installed successfully. AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl @@ -474,7 +474,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp= # Check for succesfull packet matching with installed offloaded flow. AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl p1: packet: udp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=81,tp_dst=82 dnl -matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 0 +matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 1 ]) ovs-appctl revalidator/wait @@ -492,7 +492,7 @@ packets:1, bytes:64, used:0.0s, actions:set(ipv4(src=192.168.0.7)),set(udp(dst=3 # Check that flow successfully deleted from HW. OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow del: mark: 0 +p1: flow del: mark: 1 ]) # Check that ip address and udp port were correctly modified in output packets. @@ -539,7 +539,7 @@ packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type # Check that flow successfully offloaded. OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow put[[create]]: flow match: recirc_id=0,eth,arp,in_port=1,dl_vlan=99,dl_vlan_pcp=7, mark: 0 +p1: flow put[[create]]: flow match: recirc_id=0,eth,arp,in_port=1,dl_vlan=99,dl_vlan_pcp=7, mark: 1 ]) # Check that datapath flow installed successfully. AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl @@ -551,7 +551,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp= # Check for succesfull packet matching with installed offloaded flow. AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl p1: packet: arp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 dnl -matches with flow: recirc_id=0,eth,arp,dl_vlan=99,dl_vlan_pcp=7 with mark: 0 +matches with flow: recirc_id=0,eth,arp,dl_vlan=99,dl_vlan_pcp=7 with mark: 1 ]) ovs-appctl revalidator/wait @@ -569,7 +569,7 @@ packets:1, bytes:64, used:0.0s, actions:pop_vlan,push_vlan(vid=11,pcp=7),1 # Check that flow successfully deleted from HW. OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log]) AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl -p1: flow del: mark: 0 +p1: flow del: mark: 1 ]) # Check that VLAN ID was correctly modified in output packets. -- GitLab From 864852a0624a25953ab015cc82b784595b03f33f Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:23 +0000 Subject: [PATCH 199/432] netdev-offload-dpdk: Fix Ethernet matching for type only. For OVS rule of the form "eth type is 0x1234 / end", rule is offloaded in the form of "eth / end", which is incorrect. Fix it. Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow") Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 16bde7c87..63a63209f 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -551,7 +551,8 @@ parse_flow_match(struct flow_patterns *patterns, uint8_t proto = 0; /* Eth */ - if (!eth_addr_is_zero(match->wc.masks.dl_src) || + if (match->wc.masks.dl_type || + !eth_addr_is_zero(match->wc.masks.dl_src) || !eth_addr_is_zero(match->wc.masks.dl_dst)) { struct rte_flow_item_eth *spec, *mask; @@ -567,15 +568,6 @@ parse_flow_match(struct flow_patterns *patterns, mask->type = match->wc.masks.dl_type; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask); - } else { - /* - * If user specifies a flow (like UDP flow) without L2 patterns, - * OVS will at least set the dl_type. Normally, it's enough to - * create an eth pattern just with it. Unluckily, some Intel's - * NIC (such as XL710) doesn't support that. Below is a workaround, - * which simply matches any L2 pkts. - */ - add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); } /* VLAN */ -- GitLab From 07ac8f6a5a44948786b2e6630663b05ccb4a3d11 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:24 +0000 Subject: [PATCH 200/432] netdev-offload-dpdk: Support partial TCP/UDP port matching. The cited commit failed partial matching of TCP/UDP port matching, preventing such offload of supporting HWs. Remove this failure. Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow") Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 63a63209f..2d20e068f 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -622,11 +622,6 @@ parse_flow_match(struct flow_patterns *patterns, return -1; } - if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) || - (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) { - return -1; - } - if (proto == IPPROTO_TCP) { struct rte_flow_item_tcp *spec, *mask; -- GitLab From a79eae87abe4031353c10f775e1948b4a2c820c3 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:25 +0000 Subject: [PATCH 201/432] netdev-offload-dpdk: Remove pre-validate of patterns function. The function of adding patterns by requested matches checks that it consumed all the required matches, and err if not. For functional purpose there is no need for pre-validation. For performance such validation may decrease the time spent for failing flows, but at the expense of increasing the time spent for the good flows, and code complexity. Remove the pre-validation function. Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 130 +++++++++++++++----------------------- 1 file changed, 51 insertions(+), 79 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 2d20e068f..426b8414f 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -545,11 +545,22 @@ free_flow_actions(struct flow_actions *actions) static int parse_flow_match(struct flow_patterns *patterns, - const struct match *match) + struct match *match) { uint8_t *next_proto_mask = NULL; + struct flow *consumed_masks; uint8_t proto = 0; + consumed_masks = &match->wc.masks; + + memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port); + /* recirc id must be zero. */ + if (match->wc.masks.recirc_id & match->flow.recirc_id) { + return -1; + } + consumed_masks->recirc_id = 0; + consumed_masks->packet_type = 0; + /* Eth */ if (match->wc.masks.dl_type || !eth_addr_is_zero(match->wc.masks.dl_src) || @@ -567,6 +578,10 @@ parse_flow_match(struct flow_patterns *patterns, memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src); mask->type = match->wc.masks.dl_type; + memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst); + memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src); + consumed_masks->dl_type = 0; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask); } @@ -585,6 +600,11 @@ parse_flow_match(struct flow_patterns *patterns, add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask); } + /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and + * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if + * scope to handle that. + */ + memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]); /* IP v4 */ if (match->flow.dl_type == htons(ETH_TYPE_IP)) { @@ -605,6 +625,12 @@ parse_flow_match(struct flow_patterns *patterns, mask->hdr.src_addr = match->wc.masks.nw_src; mask->hdr.dst_addr = match->wc.masks.nw_dst; + consumed_masks->nw_tos = 0; + consumed_masks->nw_ttl = 0; + consumed_masks->nw_proto = 0; + consumed_masks->nw_src = 0; + consumed_masks->nw_dst = 0; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask); /* Save proto for L4 protocol setup. */ @@ -612,6 +638,11 @@ parse_flow_match(struct flow_patterns *patterns, mask->hdr.next_proto_id; next_proto_mask = &mask->hdr.next_proto_id; } + /* If fragmented, then don't HW accelerate - for now. */ + if (match->wc.masks.nw_frag & match->flow.nw_frag) { + return -1; + } + consumed_masks->nw_frag = 0; if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && proto != IPPROTO_SCTP && proto != IPPROTO_TCP && @@ -638,6 +669,10 @@ parse_flow_match(struct flow_patterns *patterns, mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; + consumed_masks->tp_src = 0; + consumed_masks->tp_dst = 0; + consumed_masks->tcp_flags = 0; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask); /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */ @@ -656,6 +691,9 @@ parse_flow_match(struct flow_patterns *patterns, mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; + consumed_masks->tp_src = 0; + consumed_masks->tp_dst = 0; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask); /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */ @@ -674,6 +712,9 @@ parse_flow_match(struct flow_patterns *patterns, mask->hdr.src_port = match->wc.masks.tp_src; mask->hdr.dst_port = match->wc.masks.tp_dst; + consumed_masks->tp_src = 0; + consumed_masks->tp_dst = 0; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask); /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */ @@ -692,6 +733,9 @@ parse_flow_match(struct flow_patterns *patterns, mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src); mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst); + consumed_masks->tp_src = 0; + consumed_masks->tp_dst = 0; + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask); /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */ @@ -702,6 +746,9 @@ parse_flow_match(struct flow_patterns *patterns, add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); + if (!is_all_zeros(consumed_masks, sizeof *consumed_masks)) { + return -1; + } return 0; } @@ -1044,7 +1091,7 @@ out: static int netdev_offload_dpdk_add_flow(struct netdev *netdev, - const struct match *match, + struct match *match, struct nlattr *nl_actions, size_t actions_len, const ovs_u128 *ufid, @@ -1057,6 +1104,8 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, ret = parse_flow_match(&patterns, match); if (ret) { + VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported", + netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid)); goto out; } @@ -1084,78 +1133,6 @@ out: return ret; } -/* - * Check if any unsupported flow patterns are specified. - */ -static int -netdev_offload_dpdk_validate_flow(const struct match *match) -{ - struct match match_zero_wc; - const struct flow *masks = &match->wc.masks; - - /* Create a wc-zeroed version of flow. */ - match_init(&match_zero_wc, &match->flow, &match->wc); - - if (!is_all_zeros(&match_zero_wc.flow.tunnel, - sizeof match_zero_wc.flow.tunnel)) { - goto err; - } - - if (masks->metadata || masks->skb_priority || - masks->pkt_mark || masks->dp_hash) { - goto err; - } - - /* recirc id must be zero. */ - if (match_zero_wc.flow.recirc_id) { - goto err; - } - - if (masks->ct_state || masks->ct_nw_proto || - masks->ct_zone || masks->ct_mark || - !ovs_u128_is_zero(masks->ct_label)) { - goto err; - } - - if (masks->conj_id || masks->actset_output) { - goto err; - } - - /* Unsupported L2. */ - if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { - goto err; - } - - /* Unsupported L3. */ - if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst || - !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || - !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || - !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || - !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || - !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || - !is_all_zeros(&masks->nsh, sizeof masks->nsh) || - !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || - !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { - goto err; - } - - /* If fragmented, then don't HW accelerate - for now. */ - if (match_zero_wc.flow.nw_frag) { - goto err; - } - - /* Unsupported L4. */ - if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) { - goto err; - } - - return 0; - -err: - VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols"); - return -1; -} - static int netdev_offload_dpdk_destroy_flow(struct netdev *netdev, const ovs_u128 *ufid, @@ -1202,11 +1179,6 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, } } - ret = netdev_offload_dpdk_validate_flow(match); - if (ret < 0) { - return ret; - } - if (stats) { memset(stats, 0, sizeof *stats); } -- GitLab From 85270e99630530300e579ebd3d52b3a11ffbe2bc Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:26 +0000 Subject: [PATCH 202/432] netdev-offload-dpdk: Add IPv6 pattern matching. Add support for IPv6 pattern matching for offloading flows. Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- Documentation/howto/dpdk.rst | 2 +- NEWS | 1 + lib/netdev-offload-dpdk.c | 75 ++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index c40fcafcb..ebde9aeb9 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -385,7 +385,7 @@ The validated NICs are: Supported protocols for hardware offload matches are: - L2: Ethernet, VLAN -- L3: IPv4 +- L3: IPv4, IPv6 - L4: TCP, UDP, SCTP, ICMP Supported actions for hardware offload are: diff --git a/NEWS b/NEWS index ac992d17f..4dddbb23b 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,7 @@ Post-v2.13.0 * Deprecated DPDK pdump packet capture support removed. * Deprecated DPDK ring ports (dpdkr) are no longer supported. * Add hardware offload support for VLAN Push/Pop actions (experimental). + * Add hardware offload support for matching IPv6 protocol (experimental). - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 426b8414f..110f6e829 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -16,6 +16,8 @@ */ #include +#include +#include #include #include "cmap.h" @@ -294,6 +296,41 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) tcp_mask->hdr.tcp_flags); } ds_put_cstr(s, "/ "); + } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6) { + const struct rte_flow_item_ipv6 *ipv6_spec = item->spec; + const struct rte_flow_item_ipv6 *ipv6_mask = item->mask; + + char addr_str[INET6_ADDRSTRLEN]; + char mask_str[INET6_ADDRSTRLEN]; + struct in6_addr addr, mask; + + ds_put_cstr(s, "ipv6 "); + if (ipv6_spec) { + if (!ipv6_mask) { + ipv6_mask = &rte_flow_item_ipv6_mask; + } + memcpy(&addr, ipv6_spec->hdr.src_addr, sizeof addr); + memcpy(&mask, ipv6_mask->hdr.src_addr, sizeof mask); + ipv6_string_mapped(addr_str, &addr); + ipv6_string_mapped(mask_str, &mask); + DUMP_PATTERN_ITEM(mask, "src", "%s", addr_str, mask_str); + + memcpy(&addr, ipv6_spec->hdr.dst_addr, sizeof addr); + memcpy(&mask, ipv6_mask->hdr.dst_addr, sizeof mask); + ipv6_string_mapped(addr_str, &addr); + ipv6_string_mapped(mask_str, &mask); + DUMP_PATTERN_ITEM(mask, "dst", "%s", addr_str, mask_str); + + DUMP_PATTERN_ITEM(ipv6_mask->hdr.proto, "proto", "%"PRIu8, + ipv6_spec->hdr.proto, ipv6_mask->hdr.proto); + DUMP_PATTERN_ITEM(ipv6_mask->hdr.vtc_flow, "tc", "0x%"PRIx32, + ntohl(ipv6_spec->hdr.vtc_flow), + ntohl(ipv6_mask->hdr.vtc_flow)); + DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, "hop", "%"PRIu8, + ipv6_spec->hdr.hop_limits, + ipv6_mask->hdr.hop_limits); + } + ds_put_cstr(s, "/ "); } else { ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type); } @@ -644,6 +681,44 @@ parse_flow_match(struct flow_patterns *patterns, } consumed_masks->nw_frag = 0; + /* IP v6 */ + if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) { + struct rte_flow_item_ipv6 *spec, *mask; + + spec = xzalloc(sizeof *spec); + mask = xzalloc(sizeof *mask); + + spec->hdr.proto = match->flow.nw_proto; + spec->hdr.hop_limits = match->flow.nw_ttl; + spec->hdr.vtc_flow = + htonl((uint32_t) match->flow.nw_tos << RTE_IPV6_HDR_TC_SHIFT); + memcpy(spec->hdr.src_addr, &match->flow.ipv6_src, + sizeof spec->hdr.src_addr); + memcpy(spec->hdr.dst_addr, &match->flow.ipv6_dst, + sizeof spec->hdr.dst_addr); + + mask->hdr.proto = match->wc.masks.nw_proto; + mask->hdr.hop_limits = match->wc.masks.nw_ttl; + mask->hdr.vtc_flow = + htonl((uint32_t) match->wc.masks.nw_tos << RTE_IPV6_HDR_TC_SHIFT); + memcpy(mask->hdr.src_addr, &match->wc.masks.ipv6_src, + sizeof mask->hdr.src_addr); + memcpy(mask->hdr.dst_addr, &match->wc.masks.ipv6_dst, + sizeof mask->hdr.dst_addr); + + consumed_masks->nw_proto = 0; + consumed_masks->nw_ttl = 0; + consumed_masks->nw_tos = 0; + memset(&consumed_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src); + memset(&consumed_masks->ipv6_dst, 0, sizeof consumed_masks->ipv6_dst); + + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV6, spec, mask); + + /* Save proto for L4 protocol setup. */ + proto = spec->hdr.proto & mask->hdr.proto; + next_proto_mask = &mask->hdr.proto; + } + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && proto != IPPROTO_SCTP && proto != IPPROTO_TCP && (match->wc.masks.tp_src || -- GitLab From b6207b1d2711933cb4c4f03d6ac91f4a7da71912 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:27 +0000 Subject: [PATCH 203/432] netdev-offload-dpdk: Support offload of set IPv6 actions. Add support for set IPv6 actions. Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- Documentation/howto/dpdk.rst | 1 + NEWS | 2 ++ lib/netdev-offload-dpdk.c | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index ebde9aeb9..fb9bd2d10 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -396,6 +396,7 @@ Supported actions for hardware offload are: - Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl). - Modification of TCP/UDP (mod_tp_src/mod_tp_dst). - VLAN Push/Pop (push_vlan/pop_vlan). +- Modification of IPv6 (set_field:->ipv6_src/ipv6_dst/mod_nw_ttl). Further Reading --------------- diff --git a/NEWS b/NEWS index 4dddbb23b..f172362d1 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,8 @@ Post-v2.13.0 * Deprecated DPDK ring ports (dpdkr) are no longer supported. * Add hardware offload support for VLAN Push/Pop actions (experimental). * Add hardware offload support for matching IPv6 protocol (experimental). + * Add hardware offload support for set of IPv6 src/dst/ttl + actions (experimental). - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 110f6e829..042dccc15 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -437,6 +437,20 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions) ds_put_cstr(s, "/ "); } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) { ds_put_cstr(s, "of_pop_vlan / "); + } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC || + actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) { + const struct rte_flow_action_set_ipv6 *set_ipv6 = actions->conf; + + char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST + ? "dst" : "src"; + + ds_put_format(s, "set_ipv6_%s ", dirstr); + if (set_ipv6) { + ds_put_cstr(s, "ipv6_addr "); + ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s); + ds_put_cstr(s, " "); + } + ds_put_cstr(s, "/ "); } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } @@ -984,6 +998,12 @@ BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) == MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl)); +BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) == + MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_src)); +BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) == + MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_dst)); +BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) == + MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_hlimit)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src)); BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) == @@ -1033,6 +1053,18 @@ parse_set_actions(struct flow_actions *actions, VLOG_DBG_RL(&rl, "Unsupported IPv4 set action"); return -1; } + } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV6) { + const struct ovs_key_ipv6 *key = nl_attr_get(sa); + const struct ovs_key_ipv6 *mask = masked ? key + 1 : NULL; + + add_set_flow_action(ipv6_src, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC); + add_set_flow_action(ipv6_dst, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST); + add_set_flow_action(ipv6_hlimit, RTE_FLOW_ACTION_TYPE_SET_TTL); + + if (mask && !is_all_zeros(mask, sizeof *mask)) { + VLOG_DBG_RL(&rl, "Unsupported IPv6 set action"); + return -1; + } } else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) { const struct ovs_key_tcp *key = nl_attr_get(sa); const struct ovs_key_tcp *mask = masked ? key + 1 : NULL; -- GitLab From 48c1ab5d74ece8d2e6dd332bfebd3a7adce9414b Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 8 Jul 2020 06:38:28 +0000 Subject: [PATCH 204/432] netdev: Allow storing dpif type into netdev structure. Storing of the dpif type of the owning datapath interface will allow us to easily distinguish, for example, userspace tunneling ports from the system ones. This is required in terms of HW offloading to avoid offloading of userspace flows to kernel interfaces that doesn't belong to userspace datapath, but have same dpif_port names. Acked-by: Eli Britstein Acked-by: Roni Bar Yanai Acked-by: Ophir Munk Signed-off-by: Ilya Maximets --- lib/netdev-provider.h | 3 ++- lib/netdev.c | 16 ++++++++++++++++ lib/netdev.h | 2 ++ 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index d9503adb0..73dce2fca 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -96,7 +96,8 @@ struct netdev { /* Functions to control flow offloading. */ OVSRCU_TYPE(const struct netdev_flow_api *) flow_api; - struct netdev_hw_info hw_info; /* offload-capable netdev info */ + const char *dpif_type; /* Type of dpif this netdev belongs to. */ + struct netdev_hw_info hw_info; /* Offload-capable netdev info. */ }; static inline void diff --git a/lib/netdev.c b/lib/netdev.c index 90962eec6..91e91955c 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -1984,6 +1984,22 @@ netdev_get_class(const struct netdev *netdev) return netdev->netdev_class; } +/* Set the type of 'dpif' this 'netdev' belongs to. */ +void +netdev_set_dpif_type(struct netdev *netdev, const char *type) +{ + netdev->dpif_type = type; +} + +/* Returns the type of 'dpif' this 'netdev' belongs to. + * + * The caller must not free the returned value. */ +const char * +netdev_get_dpif_type(const struct netdev *netdev) +{ + return netdev->dpif_type; +} + /* Returns the netdev with 'name' or NULL if there is none. * * The caller must free the returned netdev with netdev_close(). */ diff --git a/lib/netdev.h b/lib/netdev.h index fdbe0e1f5..fb5073056 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -179,6 +179,8 @@ bool netdev_mtu_is_user_config(struct netdev *); int netdev_get_ifindex(const struct netdev *); int netdev_set_tx_multiq(struct netdev *, unsigned int n_txq); enum netdev_pt_mode netdev_get_pt_mode(const struct netdev *); +void netdev_set_dpif_type(struct netdev *, const char *); +const char *netdev_get_dpif_type(const struct netdev *); /* Packet reception. */ int netdev_rxq_open(struct netdev *, struct netdev_rxq **, int id); -- GitLab From 8842fdf1b318e626ab24025aaa285461d51a1621 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 8 Jul 2020 06:38:29 +0000 Subject: [PATCH 205/432] netdev-offload: Use dpif type instead of class. There is no real difference between the 'class' and 'type' in the context of common lookup operations inside netdev-offload module because it only checks the value of pointers without using the value itself. However, 'type' has some meaning and can be used by offload provides on the initialization phase to check if this type of Flow API in pair with the netdev type could be used in particular datapath type. For example, this is needed to check if Linux flow API could be used for current tunneling vport because it could be used only if tunneling vport belongs to system datapath, i.e. has backing linux interface. This is needed to unblock tunneling offloads in userspace datapath with DPDK flow API. Acked-by: Eli Britstein Acked-by: Roni Bar Yanai Acked-by: Ophir Munk Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 15 +++++----- lib/dpif-netlink.c | 23 +++++++++------- lib/dpif.c | 21 ++++++++------ lib/netdev-offload-dpdk.c | 17 +++++------- lib/netdev-offload-tc.c | 3 +- lib/netdev-offload.c | 52 +++++++++++++++++------------------ lib/netdev-offload.h | 16 +++++------ ofproto/ofproto-dpif-upcall.c | 5 ++-- 8 files changed, 76 insertions(+), 76 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 0fa5b6fff..d67ebce80 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2383,10 +2383,11 @@ static int mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, struct dp_netdev_flow *flow) { - int ret = 0; - uint32_t mark = flow->mark; + const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type); struct cmap_node *mark_node = CONST_CAST(struct cmap_node *, &flow->mark_node); + uint32_t mark = flow->mark; + int ret = 0; cmap_remove(&flow_mark.mark_to_flow, mark_node, hash_int(mark, 0)); flow->mark = INVALID_FLOW_MARK; @@ -2399,7 +2400,7 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, struct netdev *port; odp_port_t in_port = flow->flow.in_port.odp_port; - port = netdev_ports_get(in_port, pmd->dp->class); + port = netdev_ports_get(in_port, dpif_type_str); if (port) { /* Taking a global 'port_mutex' to fulfill thread safety * restrictions for the netdev-offload-dpdk module. */ @@ -2507,9 +2508,9 @@ static int dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) { struct dp_netdev_pmd_thread *pmd = offload->pmd; - const struct dpif_class *dpif_class = pmd->dp->class; struct dp_netdev_flow *flow = offload->flow; odp_port_t in_port = flow->flow.in_port.odp_port; + const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type); bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD; struct offload_info info; struct netdev *port; @@ -2545,9 +2546,8 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) } } info.flow_mark = mark; - info.dpif_class = dpif_class; - port = netdev_ports_get(in_port, pmd->dp->class); + port = netdev_ports_get(in_port, dpif_type_str); if (!port || netdev_vport_is_vport_class(port->netdev_class)) { netdev_close(port); goto err_free; @@ -3161,7 +3161,8 @@ dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, return false; } - netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class); + netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, + dpif_normalize_type(dp->class->type)); if (!netdev) { return false; } diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 18322e879..7da4fb54d 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -1120,6 +1120,7 @@ dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no) static int dpif_netlink_flow_flush(struct dpif *dpif_) { + const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif_)); const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_netlink_flow flow; @@ -1128,7 +1129,7 @@ dpif_netlink_flow_flush(struct dpif *dpif_) flow.dp_ifindex = dpif->dp_ifindex; if (netdev_is_flow_api_enabled()) { - netdev_ports_flow_flush(dpif_->dpif_class); + netdev_ports_flow_flush(dpif_type_str); } return dpif_netlink_flow_transact(&flow, NULL, NULL); @@ -1445,7 +1446,7 @@ start_netdev_dump(const struct dpif *dpif_, ovs_mutex_lock(&dump->netdev_lock); dump->netdev_current_dump = 0; dump->netdev_dumps - = netdev_ports_flow_dump_create(dpif_->dpif_class, + = netdev_ports_flow_dump_create(dpif_normalize_type(dpif_type(dpif_)), &dump->netdev_dumps_num, dump->up.terse); ovs_mutex_unlock(&dump->netdev_lock); @@ -2002,6 +2003,7 @@ dpif_netlink_operate__(struct dpif_netlink *dpif, static int parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get) { + const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif)); struct dpif_flow *dpif_flow = get->flow; struct match match; struct nlattr *actions; @@ -2016,8 +2018,8 @@ parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get) int err; ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf); - err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match, - &actions, get->ufid, &stats, &attrs, &buf); + err = netdev_ports_flow_get(dpif_type_str, &match, &actions, get->ufid, + &stats, &attrs, &buf); if (err) { return err; } @@ -2042,8 +2044,8 @@ parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get) static int parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) { + const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif)); static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); - const struct dpif_class *dpif_class = dpif->dpif.dpif_class; struct match match; odp_port_t in_port; const struct nlattr *nla; @@ -2065,7 +2067,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) } in_port = match.flow.in_port.odp_port; - dev = netdev_ports_get(in_port, dpif_class); + dev = netdev_ports_get(in_port, dpif_type_str); if (!dev) { return EOPNOTSUPP; } @@ -2078,7 +2080,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) odp_port_t out_port; out_port = nl_attr_get_odp_port(nla); - outdev = netdev_ports_get(out_port, dpif_class); + outdev = netdev_ports_get(out_port, dpif_type_str); if (!outdev) { err = EOPNOTSUPP; goto out; @@ -2094,7 +2096,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) } } - info.dpif_class = dpif_class; info.tp_dst_port = dst_port; info.tunnel_csum_on = csum_on; info.recirc_id_shared_with_tc = (dpif->user_features @@ -2198,8 +2199,10 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op) break; } - err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid, - del->stats); + err = netdev_ports_flow_del( + dpif_normalize_type(dpif_type(&dpif->dpif)), + del->ufid, + del->stats); log_flow_del_message(&dpif->dpif, &this_module, del, 0); break; } diff --git a/lib/dpif.c b/lib/dpif.c index c529a93f1..7cac3a629 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -347,6 +347,7 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) error = registered_class->dpif_class->open(registered_class->dpif_class, name, create, &dpif); if (!error) { + const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif)); struct dpif_port_dump port_dump; struct dpif_port dpif_port; @@ -363,7 +364,7 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) err = netdev_open(dpif_port.name, dpif_port.type, &netdev); if (!err) { - netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); + netdev_ports_insert(netdev, dpif_type_str, &dpif_port); netdev_close(netdev); } else { VLOG_WARN("could not open netdev %s type %s: %s", @@ -427,14 +428,15 @@ dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp) static void dpif_remove_netdev_ports(struct dpif *dpif) { - struct dpif_port_dump port_dump; - struct dpif_port dpif_port; + const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif)); + struct dpif_port_dump port_dump; + struct dpif_port dpif_port; - DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { - if (!dpif_is_tap_port(dpif_port.type)) { - netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); - } + DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { + if (!dpif_is_tap_port(dpif_port.type)) { + netdev_ports_remove(dpif_port.port_no, dpif_type_str); } + } } /* Closes and frees the connection to 'dpif'. Does not destroy the datapath @@ -597,12 +599,13 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) if (!dpif_is_tap_port(netdev_get_type(netdev))) { + const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif)); struct dpif_port dpif_port; dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); dpif_port.name = CONST_CAST(char *, netdev_name); dpif_port.port_no = port_no; - netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); + netdev_ports_insert(netdev, dpif_type_str, &dpif_port); } } else { VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", @@ -634,7 +637,7 @@ dpif_port_del(struct dpif *dpif, odp_port_t port_no, bool local_delete) } } - netdev_ports_remove(port_no, dpif->dpif_class); + netdev_ports_remove(port_no, dpif_normalize_type(dpif_type(dpif))); return error; } diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 042dccc15..a8b236788 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -934,15 +934,14 @@ add_port_id_action(struct flow_actions *actions, static int add_output_action(struct netdev *netdev, struct flow_actions *actions, - const struct nlattr *nla, - struct offload_info *info) + const struct nlattr *nla) { struct netdev *outdev; odp_port_t port; int ret = 0; port = nl_attr_get_odp_port(nla); - outdev = netdev_ports_get(port, info->dpif_class); + outdev = netdev_ports_get(port, netdev->dpif_type); if (outdev == NULL) { VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port); return -1; @@ -1125,8 +1124,7 @@ static int parse_flow_actions(struct netdev *netdev, struct flow_actions *actions, struct nlattr *nl_actions, - size_t nl_actions_len, - struct offload_info *info) + size_t nl_actions_len) { struct nlattr *nla; size_t left; @@ -1134,7 +1132,7 @@ parse_flow_actions(struct netdev *netdev, add_count_action(actions); NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) { if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) { - if (add_output_action(netdev, actions, nla, info)) { + if (add_output_action(netdev, actions, nla)) { return -1; } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) { @@ -1176,8 +1174,7 @@ static struct rte_flow * netdev_offload_dpdk_actions(struct netdev *netdev, struct flow_patterns *patterns, struct nlattr *nl_actions, - size_t actions_len, - struct offload_info *info) + size_t actions_len) { const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 }; struct flow_actions actions = { .actions = NULL, .cnt = 0 }; @@ -1185,7 +1182,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev, struct rte_flow_error error; int ret; - ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len, info); + ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len); if (ret) { goto out; } @@ -1217,7 +1214,7 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, } flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions, - actions_len, info); + actions_len); if (!flow) { /* If we failed to offload the rule actions fallback to MARK+RSS * actions. diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index e50e00f23..2c9c6f4ca 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1714,7 +1714,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, action = &flower.actions[flower.action_count]; if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) { odp_port_t port = nl_attr_get_odp_port(nla); - struct netdev *outdev = netdev_ports_get(port, info->dpif_class); + struct netdev *outdev = netdev_ports_get( + port, netdev_get_dpif_type(netdev)); if (!outdev) { VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port); diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c index ab97a292e..2da3bc701 100644 --- a/lib/netdev-offload.c +++ b/lib/netdev-offload.c @@ -383,11 +383,10 @@ static struct hmap ifindex_to_port OVS_GUARDED_BY(netdev_hmap_rwlock) = HMAP_INITIALIZER(&ifindex_to_port); struct port_to_netdev_data { - struct hmap_node portno_node; /* By (dpif_class, dpif_port.port_no). */ - struct hmap_node ifindex_node; /* By (dpif_class, ifindex). */ + struct hmap_node portno_node; /* By (dpif_type, dpif_port.port_no). */ + struct hmap_node ifindex_node; /* By (dpif_type, ifindex). */ struct netdev *netdev; struct dpif_port dpif_port; - const struct dpif_class *dpif_class; int ifindex; }; @@ -423,13 +422,13 @@ netdev_is_flow_api_enabled(void) } void -netdev_ports_flow_flush(const struct dpif_class *dpif_class) +netdev_ports_flow_flush(const char *dpif_type) { struct port_to_netdev_data *data; ovs_rwlock_rdlock(&netdev_hmap_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { - if (data->dpif_class == dpif_class) { + if (netdev_get_dpif_type(data->netdev) == dpif_type) { netdev_flow_flush(data->netdev); } } @@ -437,8 +436,7 @@ netdev_ports_flow_flush(const struct dpif_class *dpif_class) } struct netdev_flow_dump ** -netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports, - bool terse) +netdev_ports_flow_dump_create(const char *dpif_type, int *ports, bool terse) { struct port_to_netdev_data *data; struct netdev_flow_dump **dumps; @@ -447,7 +445,7 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports, ovs_rwlock_rdlock(&netdev_hmap_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { - if (data->dpif_class == dpif_class) { + if (netdev_get_dpif_type(data->netdev) == dpif_type) { count++; } } @@ -455,7 +453,7 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports, dumps = count ? xzalloc(sizeof *dumps * count) : NULL; HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { - if (data->dpif_class == dpif_class) { + if (netdev_get_dpif_type(data->netdev) == dpif_type) { if (netdev_flow_dump_create(data->netdev, &dumps[i], terse)) { continue; } @@ -471,15 +469,14 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports, } int -netdev_ports_flow_del(const struct dpif_class *dpif_class, - const ovs_u128 *ufid, +netdev_ports_flow_del(const char *dpif_type, const ovs_u128 *ufid, struct dpif_flow_stats *stats) { struct port_to_netdev_data *data; ovs_rwlock_rdlock(&netdev_hmap_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { - if (data->dpif_class == dpif_class + if (netdev_get_dpif_type(data->netdev) == dpif_type && !netdev_flow_del(data->netdev, ufid, stats)) { ovs_rwlock_unlock(&netdev_hmap_rwlock); return 0; @@ -491,7 +488,7 @@ netdev_ports_flow_del(const struct dpif_class *dpif_class, } int -netdev_ports_flow_get(const struct dpif_class *dpif_class, struct match *match, +netdev_ports_flow_get(const char *dpif_type, struct match *match, struct nlattr **actions, const ovs_u128 *ufid, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs, struct ofpbuf *buf) @@ -500,7 +497,7 @@ netdev_ports_flow_get(const struct dpif_class *dpif_class, struct match *match, ovs_rwlock_rdlock(&netdev_hmap_rwlock); HMAP_FOR_EACH (data, portno_node, &port_to_netdev) { - if (data->dpif_class == dpif_class + if (netdev_get_dpif_type(data->netdev) == dpif_type && !netdev_flow_get(data->netdev, match, actions, ufid, stats, attrs, buf)) { ovs_rwlock_unlock(&netdev_hmap_rwlock); @@ -512,21 +509,21 @@ netdev_ports_flow_get(const struct dpif_class *dpif_class, struct match *match, } static uint32_t -netdev_ports_hash(odp_port_t port, const struct dpif_class *dpif_class) +netdev_ports_hash(odp_port_t port, const char *dpif_type) { - return hash_int(odp_to_u32(port), hash_pointer(dpif_class, 0)); + return hash_int(odp_to_u32(port), hash_pointer(dpif_type, 0)); } static struct port_to_netdev_data * -netdev_ports_lookup(odp_port_t port_no, const struct dpif_class *dpif_class) +netdev_ports_lookup(odp_port_t port_no, const char *dpif_type) OVS_REQ_RDLOCK(netdev_hmap_rwlock) { struct port_to_netdev_data *data; HMAP_FOR_EACH_WITH_HASH (data, portno_node, - netdev_ports_hash(port_no, dpif_class), + netdev_ports_hash(port_no, dpif_type), &port_to_netdev) { - if (data->dpif_class == dpif_class + if (netdev_get_dpif_type(data->netdev) == dpif_type && data->dpif_port.port_no == port_no) { return data; } @@ -535,7 +532,7 @@ netdev_ports_lookup(odp_port_t port_no, const struct dpif_class *dpif_class) } int -netdev_ports_insert(struct netdev *netdev, const struct dpif_class *dpif_class, +netdev_ports_insert(struct netdev *netdev, const char *dpif_type, struct dpif_port *dpif_port) { struct port_to_netdev_data *data; @@ -546,19 +543,20 @@ netdev_ports_insert(struct netdev *netdev, const struct dpif_class *dpif_class, } ovs_rwlock_wrlock(&netdev_hmap_rwlock); - if (netdev_ports_lookup(dpif_port->port_no, dpif_class)) { + if (netdev_ports_lookup(dpif_port->port_no, dpif_type)) { ovs_rwlock_unlock(&netdev_hmap_rwlock); return EEXIST; } data = xzalloc(sizeof *data); data->netdev = netdev_ref(netdev); - data->dpif_class = dpif_class; dpif_port_clone(&data->dpif_port, dpif_port); data->ifindex = ifindex; + netdev_set_dpif_type(netdev, dpif_type); + hmap_insert(&port_to_netdev, &data->portno_node, - netdev_ports_hash(dpif_port->port_no, dpif_class)); + netdev_ports_hash(dpif_port->port_no, dpif_type)); hmap_insert(&ifindex_to_port, &data->ifindex_node, ifindex); ovs_rwlock_unlock(&netdev_hmap_rwlock); @@ -568,13 +566,13 @@ netdev_ports_insert(struct netdev *netdev, const struct dpif_class *dpif_class, } struct netdev * -netdev_ports_get(odp_port_t port_no, const struct dpif_class *dpif_class) +netdev_ports_get(odp_port_t port_no, const char *dpif_type) { struct port_to_netdev_data *data; struct netdev *ret = NULL; ovs_rwlock_rdlock(&netdev_hmap_rwlock); - data = netdev_ports_lookup(port_no, dpif_class); + data = netdev_ports_lookup(port_no, dpif_type); if (data) { ret = netdev_ref(data->netdev); } @@ -584,13 +582,13 @@ netdev_ports_get(odp_port_t port_no, const struct dpif_class *dpif_class) } int -netdev_ports_remove(odp_port_t port_no, const struct dpif_class *dpif_class) +netdev_ports_remove(odp_port_t port_no, const char *dpif_type) { struct port_to_netdev_data *data; int ret = ENOENT; ovs_rwlock_wrlock(&netdev_hmap_rwlock); - data = netdev_ports_lookup(port_no, dpif_class); + data = netdev_ports_lookup(port_no, dpif_type); if (data) { dpif_port_destroy(&data->dpif_port); netdev_close(data->netdev); /* unref and possibly close */ diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index 87f5852c8..4c0ed2ae8 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -62,7 +62,6 @@ struct netdev_flow_dump { /* Flow offloading. */ struct offload_info { - const struct dpif_class *dpif_class; ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */ uint8_t tunnel_csum_on; /* Tunnel header with checksum */ @@ -105,22 +104,21 @@ bool netdev_is_flow_api_enabled(void); void netdev_set_flow_api_enabled(const struct smap *ovs_other_config); bool netdev_is_offload_rebalance_policy_enabled(void); -struct dpif_class; struct dpif_port; -int netdev_ports_insert(struct netdev *, const struct dpif_class *, +int netdev_ports_insert(struct netdev *, const char *dpif_type, struct dpif_port *); -struct netdev *netdev_ports_get(odp_port_t port, const struct dpif_class *); -int netdev_ports_remove(odp_port_t port, const struct dpif_class *); +struct netdev *netdev_ports_get(odp_port_t port, const char *dpif_type); +int netdev_ports_remove(odp_port_t port, const char *dpif_type); odp_port_t netdev_ifindex_to_odp_port(int ifindex); struct netdev_flow_dump **netdev_ports_flow_dump_create( - const struct dpif_class *, + const char *dpif_type, int *ports, bool terse); -void netdev_ports_flow_flush(const struct dpif_class *); -int netdev_ports_flow_del(const struct dpif_class *, const ovs_u128 *ufid, +void netdev_ports_flow_flush(const char *dpif_type); +int netdev_ports_flow_del(const char *dpif_type, const ovs_u128 *ufid, struct dpif_flow_stats *stats); -int netdev_ports_flow_get(const struct dpif_class *, struct match *match, +int netdev_ports_flow_get(const char *dpif_type, struct match *match, struct nlattr **actions, const ovs_u128 *ufid, struct dpif_flow_stats *stats, diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 920f29a6f..72751b9b3 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -2501,8 +2501,7 @@ ukey_netdev_unref(struct udpif_key *ukey) static void ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey) { - const struct dpif *dpif = udpif->dpif; - const struct dpif_class *dpif_class = dpif->dpif_class; + const char *dpif_type_str = dpif_normalize_type(dpif_type(udpif->dpif)); const struct nlattr *k; unsigned int left; @@ -2515,7 +2514,7 @@ ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey) if (type == OVS_KEY_ATTR_IN_PORT) { ukey->in_netdev = netdev_ports_get(nl_attr_get_odp_port(k), - dpif_class); + dpif_type_str); } else if (type == OVS_KEY_ATTR_TUNNEL) { struct flow_tnl tnl; enum odp_key_fitness res; -- GitLab From 6571965bdfd9d8055a04d97728130adce704750a Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:30 +0000 Subject: [PATCH 206/432] netdev-offload-dpdk: Support offload of clone tnl_push/output actions. Tunnel encapsulation is done by tnl_push and output actions nested in a clone action. Support offloading of such flows with RTE_FLOW_ACTION_TYPE_RAW_ENCAP attribute. Signed-off-by: Eli Britstein Reviewed-by: Oz Shlomo Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- Documentation/howto/dpdk.rst | 1 + NEWS | 2 +- lib/netdev-offload-dpdk.c | 89 ++++++++++++++++++++++++++++++------ 3 files changed, 77 insertions(+), 15 deletions(-) diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst index fb9bd2d10..f0d45e47b 100644 --- a/Documentation/howto/dpdk.rst +++ b/Documentation/howto/dpdk.rst @@ -397,6 +397,7 @@ Supported actions for hardware offload are: - Modification of TCP/UDP (mod_tp_src/mod_tp_dst). - VLAN Push/Pop (push_vlan/pop_vlan). - Modification of IPv6 (set_field:->ipv6_src/ipv6_dst/mod_nw_ttl). +- Clone/output (tnl_push and output) for encapsulating over a tunnel. Further Reading --------------- diff --git a/NEWS b/NEWS index f172362d1..e52e862e1 100644 --- a/NEWS +++ b/NEWS @@ -12,7 +12,7 @@ Post-v2.13.0 * Add hardware offload support for VLAN Push/Pop actions (experimental). * Add hardware offload support for matching IPv6 protocol (experimental). * Add hardware offload support for set of IPv6 src/dst/ttl - actions (experimental). + and tunnel push-output actions (experimental). - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index a8b236788..e404bf444 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -337,7 +337,8 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) } static void -dump_flow_action(struct ds *s, const struct rte_flow_action *actions) +dump_flow_action(struct ds *s, struct ds *s_extra, + const struct rte_flow_action *actions) { if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { const struct rte_flow_action_mark *mark = actions->conf; @@ -451,13 +452,25 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions) ds_put_cstr(s, " "); } ds_put_cstr(s, "/ "); + } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) { + const struct rte_flow_action_raw_encap *raw_encap = actions->conf; + + ds_put_cstr(s, "raw_encap index 0 / "); + if (raw_encap) { + ds_put_format(s_extra, "Raw-encap size=%ld set raw_encap 0 raw " + "pattern is ", raw_encap->size); + for (int i = 0; i < raw_encap->size; i++) { + ds_put_format(s_extra, "%02x", raw_encap->data[i]); + } + ds_put_cstr(s_extra, " / end_set;"); + } } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } } static struct ds * -dump_flow(struct ds *s, +dump_flow(struct ds *s, struct ds *s_extra, const struct rte_flow_attr *attr, const struct rte_flow_item *items, const struct rte_flow_action *actions) @@ -471,7 +484,7 @@ dump_flow(struct ds *s, } ds_put_cstr(s, "end actions "); while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) { - dump_flow_action(s, actions++); + dump_flow_action(s, s_extra, actions++); } ds_put_cstr(s, "end"); return s; @@ -484,18 +497,19 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev, const struct rte_flow_action *actions, struct rte_flow_error *error) { + struct ds s_extra = DS_EMPTY_INITIALIZER; + struct ds s = DS_EMPTY_INITIALIZER; struct rte_flow *flow; - struct ds s; + char *extra_str; flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error); if (flow) { if (!VLOG_DROP_DBG(&rl)) { - ds_init(&s); - dump_flow(&s, attr, items, actions); - VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" flow create %d %s", - netdev_get_name(netdev), (intptr_t) flow, + dump_flow(&s, &s_extra, attr, items, actions); + extra_str = ds_cstr(&s_extra); + VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s", + netdev_get_name(netdev), (intptr_t) flow, extra_str, netdev_dpdk_get_port_id(netdev), ds_cstr(&s)); - ds_destroy(&s); } } else { enum vlog_level level = VLL_WARN; @@ -506,14 +520,15 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev, VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).", netdev_get_name(netdev), error->type, error->message); if (!vlog_should_drop(&this_module, level, &rl)) { - ds_init(&s); - dump_flow(&s, attr, items, actions); - VLOG_RL(&rl, level, "Failed flow: %s: flow create %d %s", - netdev_get_name(netdev), + dump_flow(&s, &s_extra, attr, items, actions); + extra_str = ds_cstr(&s_extra); + VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s", + netdev_get_name(netdev), extra_str, netdev_dpdk_get_port_id(netdev), ds_cstr(&s)); - ds_destroy(&s); } } + ds_destroy(&s); + ds_destroy(&s_extra); return flow; } @@ -1120,6 +1135,43 @@ parse_vlan_push_action(struct flow_actions *actions, return 0; } +static int +parse_clone_actions(struct netdev *netdev, + struct flow_actions *actions, + const struct nlattr *clone_actions, + const size_t clone_actions_len) +{ + const struct nlattr *ca; + unsigned int cleft; + + NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) { + int clone_type = nl_attr_type(ca); + + if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) { + const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca); + struct rte_flow_action_raw_encap *raw_encap = + xzalloc(sizeof *raw_encap); + + raw_encap->data = (uint8_t *) tnl_push->header; + raw_encap->preserve = NULL; + raw_encap->size = tnl_push->header_len; + + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP, + raw_encap); + } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) { + if (add_output_action(netdev, actions, ca)) { + return -1; + } + } else { + VLOG_DBG_RL(&rl, + "Unsupported nested action inside clone(), " + "action type: %d", clone_type); + return -1; + } + } + return 0; +} + static int parse_flow_actions(struct netdev *netdev, struct flow_actions *actions, @@ -1155,6 +1207,15 @@ parse_flow_actions(struct netdev *netdev, } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) { add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL); + } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE && + left <= NLA_ALIGN(nla->nla_len)) { + const struct nlattr *clone_actions = nl_attr_get(nla); + size_t clone_actions_len = nl_attr_get_size(nla); + + if (parse_clone_actions(netdev, actions, clone_actions, + clone_actions_len)) { + return -1; + } } else { VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla)); return -1; -- GitLab From 4e432d6f8128a03687a669ce3fc4ffd8cb7d855c Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 8 Jul 2020 06:38:31 +0000 Subject: [PATCH 207/432] netdev-offload-dpdk: Support tnl/push using vxlan encap attribute. For DPDK, there is the RAW_ENCAP attribute which gets raw buffer of the encapsulation header. For specific protocol, such as vxlan, there is a more specific attribute, VXLAN_ENCAP, which gets the parsed fields of the outer header. In case tunnel type is vxlan, parse the header and use the specific attribute, with fallback to RAW_ENCAP. Signed-off-by: Eli Britstein Reviewed-by: Roni Bar Yanai Acked-by: Sriharsha Basavapatna Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 157 +++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 2 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index e404bf444..de6101e4d 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -336,6 +336,62 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item) } } +static void +dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items) +{ + const struct rte_flow_item_eth *eth = NULL; + const struct rte_flow_item_ipv4 *ipv4 = NULL; + const struct rte_flow_item_ipv6 *ipv6 = NULL; + const struct rte_flow_item_udp *udp = NULL; + const struct rte_flow_item_vxlan *vxlan = NULL; + + for (; items && items->type != RTE_FLOW_ITEM_TYPE_END; items++) { + if (items->type == RTE_FLOW_ITEM_TYPE_ETH) { + eth = items->spec; + } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV4) { + ipv4 = items->spec; + } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV6) { + ipv6 = items->spec; + } else if (items->type == RTE_FLOW_ITEM_TYPE_UDP) { + udp = items->spec; + } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) { + vxlan = items->spec; + } + } + + ds_put_format(s, "set vxlan ip-version %s ", + ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR"); + if (vxlan) { + ds_put_format(s, "vni %"PRIu32" ", + ntohl(*(ovs_be32 *) vxlan->vni) >> 8); + } + if (udp) { + ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ", + ntohs(udp->hdr.src_port), ntohs(udp->hdr.dst_port)); + } + if (ipv4) { + ds_put_format(s, "ip-src "IP_FMT" ip-dst "IP_FMT" ", + IP_ARGS(ipv4->hdr.src_addr), + IP_ARGS(ipv4->hdr.dst_addr)); + } + if (ipv6) { + struct in6_addr addr; + + ds_put_cstr(s, "ip-src "); + memcpy(&addr, ipv6->hdr.src_addr, sizeof addr); + ipv6_format_mapped(&addr, s); + ds_put_cstr(s, " ip-dst "); + memcpy(&addr, ipv6->hdr.dst_addr, sizeof addr); + ipv6_format_mapped(&addr, s); + ds_put_cstr(s, " "); + } + if (eth) { + ds_put_format(s, "eth-src "ETH_ADDR_FMT" eth-dst "ETH_ADDR_FMT, + ETH_ADDR_BYTES_ARGS(eth->src.addr_bytes), + ETH_ADDR_BYTES_ARGS(eth->dst.addr_bytes)); + } +} + static void dump_flow_action(struct ds *s, struct ds *s_extra, const struct rte_flow_action *actions) @@ -464,6 +520,13 @@ dump_flow_action(struct ds *s, struct ds *s_extra, } ds_put_cstr(s_extra, " / end_set;"); } + } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) { + const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf; + const struct rte_flow_item *items = vxlan_encap->definition; + + ds_put_cstr(s, "vxlan_encap / "); + dump_vxlan_encap(s_extra, items); + ds_put_cstr(s_extra, ";"); } else { ds_put_format(s, "unknown rte flow action (%d)\n", actions->type); } @@ -1111,6 +1174,91 @@ parse_set_actions(struct flow_actions *actions, return 0; } +/* Maximum number of items in struct rte_flow_action_vxlan_encap. + * ETH / IPv4(6) / UDP / VXLAN / END + */ +#define ACTION_VXLAN_ENCAP_ITEMS_NUM 5 + +static int +add_vxlan_encap_action(struct flow_actions *actions, + const void *header) +{ + const struct eth_header *eth; + const struct udp_header *udp; + struct vxlan_data { + struct rte_flow_action_vxlan_encap conf; + struct rte_flow_item items[ACTION_VXLAN_ENCAP_ITEMS_NUM]; + } *vxlan_data; + BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0); + const void *vxlan; + const void *l3; + const void *l4; + int field; + + vxlan_data = xzalloc(sizeof *vxlan_data); + field = 0; + + eth = header; + /* Ethernet */ + vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH; + vxlan_data->items[field].spec = eth; + vxlan_data->items[field].mask = &rte_flow_item_eth_mask; + field++; + + l3 = eth + 1; + /* IP */ + if (eth->eth_type == htons(ETH_TYPE_IP)) { + /* IPv4 */ + const struct ip_header *ip = l3; + + vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4; + vxlan_data->items[field].spec = ip; + vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask; + + if (ip->ip_proto != IPPROTO_UDP) { + goto err; + } + l4 = (ip + 1); + } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) { + const struct ovs_16aligned_ip6_hdr *ip6 = l3; + + vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6; + vxlan_data->items[field].spec = ip6; + vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask; + + if (ip6->ip6_nxt != IPPROTO_UDP) { + goto err; + } + l4 = (ip6 + 1); + } else { + goto err; + } + field++; + + udp = l4; + vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP; + vxlan_data->items[field].spec = udp; + vxlan_data->items[field].mask = &rte_flow_item_udp_mask; + field++; + + vxlan = (udp + 1); + vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN; + vxlan_data->items[field].spec = vxlan; + vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask; + field++; + + vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END; + + vxlan_data->conf.definition = vxlan_data->items; + + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data); + + return 0; +err: + free(vxlan_data); + return -1; +} + static int parse_vlan_push_action(struct flow_actions *actions, const struct ovs_action_push_vlan *vlan_push) @@ -1149,9 +1297,14 @@ parse_clone_actions(struct netdev *netdev, if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) { const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca); - struct rte_flow_action_raw_encap *raw_encap = - xzalloc(sizeof *raw_encap); + struct rte_flow_action_raw_encap *raw_encap; + + if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN && + !add_vxlan_encap_action(actions, tnl_push->header)) { + continue; + } + raw_encap = xzalloc(sizeof *raw_encap); raw_encap->data = (uint8_t *) tnl_push->header; raw_encap->preserve = NULL; raw_encap->size = tnl_push->header_len; -- GitLab From fd4d4777600fb420d502b99bf8ce8d93c25d3cd2 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 7 Jul 2020 15:59:49 -0700 Subject: [PATCH 208/432] netdev-linux: Fix broken build on Ubuntu 14.04 Patch 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") uses __virtio16 which is defined in kernel 3.19. Ubuntu 14.04 is using 3.13 kernel that lacks the virtio_types definition. This patch fixes that. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Acked-by: Greg Rose Signed-off-by: Yi-Hung Wei Signed-off-by: William Tu --- acinclude.m4 | 12 ++++++++++++ configure.ac | 1 + lib/netdev-linux.c | 8 ++++++++ 3 files changed, 21 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index 054ec2e3c..863a04349 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -250,6 +250,18 @@ AC_DEFUN([OVS_CHECK_LINUX_SCTP_CT], [ [Define to 1 if SCTP_CONNTRACK_HEARTBEAT_SENT is available.])]) ]) +dnl OVS_CHECK_LINUX_VIRTIO_TYPES +dnl +dnl Checks for kernels that need virtio_types definition. +AC_DEFUN([OVS_CHECK_LINUX_VIRTIO_TYPES], [ + AC_COMPILE_IFELSE([ + AC_LANG_PROGRAM([#include ], [ + __virtio16 x = 0; + ])], + [AC_DEFINE([HAVE_VIRTIO_TYPES], [1], + [Define to 1 if __virtio16 is available.])]) +]) + dnl OVS_FIND_DEPENDENCY(FUNCTION, SEARCH_LIBS, NAME_TO_PRINT) dnl dnl Check for a function in a library list. diff --git a/configure.ac b/configure.ac index 1877aae56..5ce510c20 100644 --- a/configure.ac +++ b/configure.ac @@ -188,6 +188,7 @@ OVS_CHECK_LINUX OVS_CHECK_LINUX_NETLINK OVS_CHECK_LINUX_TC OVS_CHECK_LINUX_SCTP_CT +OVS_CHECK_LINUX_VIRTIO_TYPES OVS_CHECK_DPDK OVS_CHECK_PRAGMA_MESSAGE AC_SUBST([OVS_CFLAGS]) diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 6269c24ac..fe7fb9b29 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -227,6 +227,14 @@ struct rtnl_link_stats64 { uint64_t tx_compressed; }; +/* Linux 3.19 introduced virtio_types.h. It might be missing + * if we are using old kernel. */ +#ifndef HAVE_VIRTIO_TYPES +typedef __u16 __bitwise__ __virtio16; +typedef __u32 __bitwise__ __virtio32; +typedef __u64 __bitwise__ __virtio64; +#endif + enum { VALID_IFINDEX = 1 << 0, VALID_ETHERADDR = 1 << 1, -- GitLab From 058b80d3de31b2c539d9e6f5f6687bde78ef08e9 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 9 Jun 2020 08:53:40 +0800 Subject: [PATCH 209/432] dpif-netdev: Add check mark to avoid ovs-vswitchd crash. When changing the pmd interfaces attribute, ovs-vswitchd will reload pmd and flush offload flows. reload_affected_pmds may be invoked twice or more. In that case, the flows may been queued to "dp_netdev_flow_offload" thread again. For example: $ ovs-vsctl -- set interface options:dpdk-lsc-interrupt=true ovs-vswitchd main flow-offload thread append F to queue ... ... append F to queue ... del F ... del F (crash [1]) [1]: ovs_assert_failure lib/cmap.c:922 cmap_replace lib/cmap.c:921 cmap_remove lib/cmap.h:295 mark_to_flow_disassociate lib/dpif-netdev.c:2269 dp_netdev_flow_offload_del lib/dpif-netdev.c:2369 dp_netdev_flow_offload_main lib/dpif-netdev.c:2492 Fixes: 02bb2824e51d ("dpif-netdev: do hw flow offload in a thread") Signed-off-by: Tonghao Zhang Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index d67ebce80..c001f18cd 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2389,6 +2389,12 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd, uint32_t mark = flow->mark; int ret = 0; + /* INVALID_FLOW_MARK may mean that the flow has been disassociated or + * never associated. */ + if (OVS_UNLIKELY(mark == INVALID_FLOW_MARK)) { + return EINVAL; + } + cmap_remove(&flow_mark.mark_to_flow, mark_node, hash_int(mark, 0)); flow->mark = INVALID_FLOW_MARK; -- GitLab From fa31efd211143f1adb06a62faad803a5aca1e400 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Tue, 9 Jun 2020 08:53:41 +0800 Subject: [PATCH 210/432] dpif-netdev: Return error code when no mark available. The max number of mark is (UINT32_MAX - 1), that is enough to be used. But theoretically, if there are no mark available, the later different flows will shared the mark INVALID_FLOW_MARK, that may break the function. If there are no available mark to be used, return error code. Fixes: 02bb2824e51d ("dpif-netdev: do hw flow offload in a thread") Signed-off-by: Tonghao Zhang Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c001f18cd..629a0cb53 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2549,6 +2549,7 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload) mark = flow_mark_alloc(); if (mark == INVALID_FLOW_MARK) { VLOG_ERR("Failed to allocate flow mark!\n"); + return -1; } } info.flow_mark = mark; -- GitLab From e90e115a01afe7001863582b65fdaa278794e610 Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Mon, 13 Jul 2020 13:42:10 +0100 Subject: [PATCH 211/432] dpif-netdev: implement subtable lookup validation. This commit refactors the existing dpif subtable function pointer infrastructure, and implements an autovalidator component. The refactoring of the existing dpcls subtable lookup function handling, making it more generic, and cleaning up how to enable more implementations in future. In order to ensure all implementations provide identical results, the autovalidator is added. The autovalidator itself implements the subtable lookup function prototype, but internally iterates over all other available implementations. The end result is that testing of each implementation becomes automatic, when the auto- validator implementation is selected. Signed-off-by: Harry van Haaren Acked-by: William Tu Signed-off-by: Ian Stokes --- acinclude.m4 | 16 ++++ configure.ac | 1 + lib/automake.mk | 3 + lib/dpif-netdev-lookup-autovalidator.c | 110 +++++++++++++++++++++++++ lib/dpif-netdev-lookup-generic.c | 9 +- lib/dpif-netdev-lookup.c | 104 +++++++++++++++++++++++ lib/dpif-netdev-lookup.h | 75 +++++++++++++++++ lib/dpif-netdev-private.h | 15 ---- lib/dpif-netdev.c | 13 ++- 9 files changed, 322 insertions(+), 24 deletions(-) create mode 100644 lib/dpif-netdev-lookup-autovalidator.c create mode 100644 lib/dpif-netdev-lookup.c create mode 100644 lib/dpif-netdev-lookup.h diff --git a/acinclude.m4 b/acinclude.m4 index 863a04349..0f1986184 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -14,6 +14,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +dnl Set OVS DPCLS Autovalidator as default subtable search at compile time? +dnl This enables automatically running all unit tests with all DPCLS +dnl implementations. +AC_DEFUN([OVS_CHECK_DPCLS_AUTOVALIDATOR], [ + AC_ARG_ENABLE([autovalidator], + [AC_HELP_STRING([--enable-autovalidator], [Enable DPCLS autovalidator as default subtable search implementation.])], + [autovalidator=yes],[autovalidator=no]) + AC_MSG_CHECKING([whether DPCLS Autovalidator is default implementation]) + if test "$autovalidator" != yes; then + AC_MSG_RESULT([no]) + else + OVS_CFLAGS="$OVS_CFLAGS -DDPCLS_AUTOVALIDATOR_DEFAULT" + AC_MSG_RESULT([yes]) + fi +]) + dnl OVS_ENABLE_WERROR AC_DEFUN([OVS_ENABLE_WERROR], [AC_ARG_ENABLE( diff --git a/configure.ac b/configure.ac index 5ce510c20..4a6995ea8 100644 --- a/configure.ac +++ b/configure.ac @@ -181,6 +181,7 @@ OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], [HAVE_WNO_UNUSED_PARAMETER]) OVS_ENABLE_WERROR OVS_ENABLE_SPARSE OVS_CTAGS_IDENTIFIERS +OVS_CHECK_DPCLS_AUTOVALIDATOR AC_ARG_VAR(KARCH, [Kernel Architecture String]) AC_SUBST(KARCH) diff --git a/lib/automake.mk b/lib/automake.mk index 86940ccd2..1fc1a209e 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -81,6 +81,9 @@ lib_libopenvswitch_la_SOURCES = \ lib/dp-packet.h \ lib/dp-packet.c \ lib/dpdk.h \ + lib/dpif-netdev-lookup.h \ + lib/dpif-netdev-lookup.c \ + lib/dpif-netdev-lookup-autovalidator.c \ lib/dpif-netdev-lookup-generic.c \ lib/dpif-netdev.c \ lib/dpif-netdev.h \ diff --git a/lib/dpif-netdev-lookup-autovalidator.c b/lib/dpif-netdev-lookup-autovalidator.c new file mode 100644 index 000000000..97b59fdd0 --- /dev/null +++ b/lib/dpif-netdev-lookup-autovalidator.c @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2020 Intel Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include "dpif-netdev.h" +#include "dpif-netdev-lookup.h" +#include "dpif-netdev-private.h" +#include "openvswitch/vlog.h" + +VLOG_DEFINE_THIS_MODULE(dpif_lookup_autovalidator); + +/* This file implements an automated validator for subtable search + * implementations. It compares the results of the generic scalar search result + * with ISA optimized implementations. + * + * Note the goal is *NOT* to test the *specialized* versions of subtables, as + * the compiler performs the specialization - and we rely on the correctness of + * the compiler to not break those specialized variants. + * + * The goal is to ensure identical results of the different implementations, + * despite that the implementations may have different methods to get those + * results. + * + * Example: AVX-512 ISA uses different instructions and algorithm to the scalar + * implementation, however the results (rules[] output) must be the same. + */ + +dpcls_subtable_lookup_func +dpcls_subtable_autovalidator_probe(uint32_t u0 OVS_UNUSED, + uint32_t u1 OVS_UNUSED); + +static uint32_t +dpcls_subtable_autovalidator(struct dpcls_subtable *subtable, + uint32_t keys_map, + const struct netdev_flow_key *keys[], + struct dpcls_rule **rules_good) +{ + const uint32_t u0_bit_count = subtable->mf_bits_set_unit0; + const uint32_t u1_bit_count = subtable->mf_bits_set_unit1; + + /* Scalar generic - the "known correct" version. */ + dpcls_subtable_lookup_func lookup_good; + lookup_good = dpcls_subtable_generic_probe(u0_bit_count, u1_bit_count); + + /* Run actual scalar implementation to get known good results. */ + uint32_t matches_good = lookup_good(subtable, keys_map, keys, rules_good); + + struct dpcls_subtable_lookup_info_t *lookup_funcs; + int32_t lookup_func_count = dpcls_subtable_lookup_info_get(&lookup_funcs); + if (lookup_func_count < 0) { + VLOG_ERR("failed to get lookup subtable function implementations\n"); + return 0; + } + + /* Ensure the autovalidator is the 0th item in the lookup_funcs array. */ + ovs_assert(lookup_funcs[0].probe(0, 0) == dpcls_subtable_autovalidator); + + /* Now compare all other implementations against known good results. + * Note we start iterating from array[1], as 0 is the autotester itself. + */ + for (int i = 1; i < lookup_func_count; i++) { + dpcls_subtable_lookup_func lookup_func; + lookup_func = lookup_funcs[i].probe(u0_bit_count, + u1_bit_count); + + /* If its probe returns a function, then test it. */ + if (lookup_func) { + struct dpcls_rule *rules_test[NETDEV_MAX_BURST]; + size_t rules_size = sizeof(struct dpcls_rule *) * NETDEV_MAX_BURST; + memset(rules_test, 0, rules_size); + uint32_t matches_test = lookup_func(subtable, keys_map, keys, + rules_test); + + /* Ensure same packets matched against subtable. */ + if (matches_good != matches_test) { + VLOG_ERR("matches_good 0x%x != matches_test 0x%x in func %s\n", + matches_good, matches_test, lookup_funcs[i].name); + } + + /* Ensure rules matched are the same for scalar / others. */ + int j; + ULLONG_FOR_EACH_1 (j, matches_test) { + ovs_assert(rules_good[j] == rules_test[j]); + } + } + } + + return matches_good; +} + +dpcls_subtable_lookup_func +dpcls_subtable_autovalidator_probe(uint32_t u0 OVS_UNUSED, + uint32_t u1 OVS_UNUSED) +{ + /* Always return the same validator tester, it works for all subtables. */ + return dpcls_subtable_autovalidator; +} diff --git a/lib/dpif-netdev-lookup-generic.c b/lib/dpif-netdev-lookup-generic.c index 89c8be0fa..b1a0cfc36 100644 --- a/lib/dpif-netdev-lookup-generic.c +++ b/lib/dpif-netdev-lookup-generic.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2016, 2017 Nicira, Inc. - * Copyright (c) 2019 Intel Corporation. + * Copyright (c) 2019, 2020 Intel Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include "dpif-netdev.h" #include "dpif-netdev-private.h" +#include "dpif-netdev-lookup.h" #include "bitmap.h" #include "cmap.h" @@ -254,7 +255,7 @@ lookup_generic_impl(struct dpcls_subtable *subtable, } /* Generic lookup function that uses runtime provided mf bits for iterating. */ -uint32_t +static uint32_t dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable, uint32_t keys_map, const struct netdev_flow_key *keys[], @@ -310,6 +311,10 @@ dpcls_subtable_generic_probe(uint32_t u0_bits, uint32_t u1_bits) if (f) { VLOG_DBG("Subtable using Generic Optimized for u0 %d, u1 %d\n", u0_bits, u1_bits); + } else { + /* Always return the generic function. */ + f = dpcls_subtable_lookup_generic; } + return f; } diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c new file mode 100644 index 000000000..530187e9c --- /dev/null +++ b/lib/dpif-netdev-lookup.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2020 Intel Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "dpif-netdev-lookup.h" + +#include "openvswitch/vlog.h" + +VLOG_DEFINE_THIS_MODULE(dpif_netdev_lookup); + +/* Actual list of implementations goes here */ +static struct dpcls_subtable_lookup_info_t subtable_lookups[] = { + /* The autovalidator implementation will not be used by default, it must + * be enabled at compile time to be the default lookup implementation. The + * user may enable it at runtime using the normal "prio-set" command if + * desired. The compile time default switch is here to enable all unit + * tests to transparently run with the autovalidator. + */ +#ifdef DPCLS_AUTOVALIDATOR_DEFAULT + { .prio = 255, +#else + { .prio = 0, +#endif + .probe = dpcls_subtable_autovalidator_probe, + .name = "autovalidator", }, + + /* The default scalar C code implementation. */ + { .prio = 1, + .probe = dpcls_subtable_generic_probe, + .name = "generic", }, +}; + +int32_t +dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr) +{ + if (out_ptr == NULL) { + return -1; + } + + *out_ptr = subtable_lookups; + return ARRAY_SIZE(subtable_lookups); +} + +/* sets the priority of the lookup function with "name". */ +int32_t +dpcls_subtable_set_prio(const char *name, uint8_t priority) +{ + for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) { + if (strcmp(name, subtable_lookups[i].name) == 0) { + subtable_lookups[i].prio = priority; + VLOG_INFO("Subtable function '%s' set priority to %d\n", + name, priority); + return 0; + } + } + VLOG_WARN("Subtable function '%s' not found, failed to set priority\n", + name); + return -EINVAL; +} + +dpcls_subtable_lookup_func +dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count) +{ + /* Iter over each subtable impl, and get highest priority one. */ + int32_t prio = -1; + const char *name = NULL; + dpcls_subtable_lookup_func best_func = NULL; + + for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) { + int32_t probed_prio = subtable_lookups[i].prio; + if (probed_prio > prio) { + dpcls_subtable_lookup_func probed_func; + probed_func = subtable_lookups[i].probe(u0_bit_count, + u1_bit_count); + if (probed_func) { + best_func = probed_func; + prio = probed_prio; + name = subtable_lookups[i].name; + } + } + } + + VLOG_DBG("Subtable lookup function '%s' with units (%d,%d), priority %d\n", + name, u0_bit_count, u1_bit_count, prio); + + /* Programming error - we must always return a valid func ptr. */ + ovs_assert(best_func != NULL); + + return best_func; +} diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h new file mode 100644 index 000000000..61f44b9e8 --- /dev/null +++ b/lib/dpif-netdev-lookup.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2020 Intel Corporation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DPIF_NETDEV_LOOKUP_H +#define DPIF_NETDEV_LOOKUP_H 1 + +#include +#include "dpif-netdev.h" +#include "dpif-netdev-private.h" + +/* Function to perform a probe for the subtable bit fingerprint. + * Returns NULL if not valid, or a valid function pointer to call for this + * subtable on success. + */ +typedef +dpcls_subtable_lookup_func (*dpcls_subtable_probe_func)(uint32_t u0_bit_count, + uint32_t u1_bit_count); + +/* Prototypes for subtable implementations */ +dpcls_subtable_lookup_func +dpcls_subtable_autovalidator_probe(uint32_t u0_bit_count, + uint32_t u1_bit_count); + +/* Probe function to select a specialized version of the generic lookup + * implementation. This provides performance benefit due to compile-time + * optimizations such as loop-unrolling. These are enabled by the compile-time + * constants in the specific function implementations. + */ +dpcls_subtable_lookup_func +dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count); + + +/* Subtable registration and iteration helpers */ +struct dpcls_subtable_lookup_info_t { + /* higher priority gets used over lower values. This allows deployments + * to select the best implementation for the use-case. + */ + uint8_t prio; + + /* Probe function: tests if the (u0,u1) combo is supported. If not + * supported, this function returns NULL. If supported, a function pointer + * is returned which when called will perform the lookup on the subtable. + */ + dpcls_subtable_probe_func probe; + + /* Human readable name, used in setting subtable priority commands */ + const char *name; +}; + +int32_t dpcls_subtable_set_prio(const char *name, uint8_t priority); + +dpcls_subtable_lookup_func +dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count); + +/* Retrieve the array of lookup implementations for iteration. + * On error, returns a negative number. + * On success, returns the size of the arrays pointed to by the out parameter. + */ +int32_t +dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr); + +#endif /* dpif-netdev-lookup.h */ diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h index 68c33a0f9..bdc150d45 100644 --- a/lib/dpif-netdev-private.h +++ b/lib/dpif-netdev-private.h @@ -60,21 +60,6 @@ uint32_t (*dpcls_subtable_lookup_func)(struct dpcls_subtable *subtable, const struct netdev_flow_key *keys[], struct dpcls_rule **rules); -/* Prototype for generic lookup func, using generic scalar code path. */ -uint32_t -dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable, - uint32_t keys_map, - const struct netdev_flow_key *keys[], - struct dpcls_rule **rules); - -/* Probe function to select a specialized version of the generic lookup - * implementation. This provides performance benefit due to compile-time - * optimizations such as loop-unrolling. These are enabled by the compile-time - * constants in the specific function implementations. - */ -dpcls_subtable_lookup_func -dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count); - /* A set of rules that all have the same fields wildcarded. */ struct dpcls_subtable { /* The fields are only used by writers. */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 629a0cb53..e023ece6f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -42,6 +42,7 @@ #include "csum.h" #include "dp-packet.h" #include "dpif.h" +#include "dpif-netdev-lookup.h" #include "dpif-netdev-perf.h" #include "dpif-provider.h" #include "dummy.h" @@ -8428,13 +8429,11 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) subtable->mf_masks = xmalloc(sizeof(uint64_t) * (unit0 + unit1)); netdev_flow_key_gen_masks(mask, subtable->mf_masks, unit0, unit1); - /* Probe for a specialized generic lookup function. */ - subtable->lookup_func = dpcls_subtable_generic_probe(unit0, unit1); - - /* If not set, assign generic lookup. Generic works for any miniflow. */ - if (!subtable->lookup_func) { - subtable->lookup_func = dpcls_subtable_lookup_generic; - } + /* Get the preferred subtable search function for this (u0,u1) subtable. + * The function is guaranteed to always return a valid implementation, and + * possibly an ISA optimized, and/or specialized implementation. + */ + subtable->lookup_func = dpcls_subtable_get_best_impl(unit0, unit1); cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash); /* Add the new subtable at the end of the pvector (with no hits yet) */ -- GitLab From 3d018c3ea79d751ed3e928a01120edfcbb7bff9d Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Mon, 13 Jul 2020 13:42:11 +0100 Subject: [PATCH 212/432] dpif-netdev: add subtable lookup prio set command. This commit adds a command for the dpif-netdev to set a specific lookup function to a particular priority level. The command enables runtime switching of the dpcls subtable lookup implementation. Selection is performed based on a priority. Higher priorities take precedence, e.g. priority 5 will be selected instead of a priority 3. If lookup functions have the same priority, the first one in the list is selected. The two options available are 'autovalidator' and 'generic'. The below command will set a new priority for the given function: $ ovs-appctl dpif-netdev/subtable-lookup-prio-set generic 2 The autovalidator implementation can be selected at runtime now: $ ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 5 Signed-off-by: Harry van Haaren Acked-by: William Tu Signed-off-by: Ian Stokes --- lib/dpif-netdev.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e023ece6f..9ea4207f9 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -261,6 +261,7 @@ struct dp_packet_flow_map { static void dpcls_init(struct dpcls *); static void dpcls_destroy(struct dpcls *); static void dpcls_sort_subtable_vector(struct dpcls *); +static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls); static void dpcls_insert(struct dpcls *, struct dpcls_rule *, const struct netdev_flow_key *mask); static void dpcls_remove(struct dpcls *, struct dpcls_rule *); @@ -896,6 +897,9 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd, bool purge); static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd, struct tx_port *tx); +static inline struct dpcls * +dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd, + odp_port_t in_port); static inline bool emc_entry_alive(struct emc_entry *ce); static void emc_clear_entry(struct emc_entry *ce); @@ -1296,6 +1300,97 @@ sorted_poll_thread_list(struct dp_netdev *dp, *n = k; } +static void +dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc, + const char *argv[], void *aux OVS_UNUSED) +{ + /* This function requires 2 parameters (argv[1] and argv[2]) to execute. + * argv[1] is subtable name + * argv[2] is priority + * argv[3] is the datapath name (optional if only 1 datapath exists) + */ + const char *func_name = argv[1]; + + errno = 0; + char *err_char; + uint32_t new_prio = strtoul(argv[2], &err_char, 10); + if (errno != 0 || new_prio > UINT8_MAX) { + unixctl_command_reply_error(conn, + "error converting priority, use integer in range 0-255\n"); + return; + } + + int32_t err = dpcls_subtable_set_prio(func_name, new_prio); + if (err) { + unixctl_command_reply_error(conn, + "error, subtable lookup function not found\n"); + return; + } + + /* argv[3] is optional datapath instance. If no datapath name is provided + * and only one datapath exists, the one existing datapath is reprobed. + */ + ovs_mutex_lock(&dp_netdev_mutex); + struct dp_netdev *dp = NULL; + + if (argc == 4) { + dp = shash_find_data(&dp_netdevs, argv[3]); + } else if (shash_count(&dp_netdevs) == 1) { + dp = shash_first(&dp_netdevs)->data; + } + + if (!dp) { + ovs_mutex_unlock(&dp_netdev_mutex); + unixctl_command_reply_error(conn, + "please specify an existing datapath"); + return; + } + + /* Get PMD threads list, required to get DPCLS instances. */ + size_t n; + uint32_t lookup_dpcls_changed = 0; + uint32_t lookup_subtable_changed = 0; + struct dp_netdev_pmd_thread **pmd_list; + sorted_poll_thread_list(dp, &pmd_list, &n); + + /* take port mutex as HMAP iters over them. */ + ovs_mutex_lock(&dp->port_mutex); + + for (size_t i = 0; i < n; i++) { + struct dp_netdev_pmd_thread *pmd = pmd_list[i]; + if (pmd->core_id == NON_PMD_CORE_ID) { + continue; + } + + struct dp_netdev_port *port = NULL; + HMAP_FOR_EACH (port, node, &dp->ports) { + odp_port_t in_port = port->port_no; + struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port); + if (!cls) { + continue; + } + uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls); + if (subtbl_changes) { + lookup_dpcls_changed++; + lookup_subtable_changed += subtbl_changes; + } + } + } + + /* release port mutex before netdev mutex. */ + ovs_mutex_unlock(&dp->port_mutex); + ovs_mutex_unlock(&dp_netdev_mutex); + + struct ds reply = DS_EMPTY_INITIALIZER; + ds_put_format(&reply, + "Lookup priority change affected %d dpcls ports and %d subtables.\n", + lookup_dpcls_changed, lookup_subtable_changed); + const char *reply_str = ds_cstr(&reply); + unixctl_command_reply(conn, reply_str); + VLOG_INFO("%s", reply_str); + ds_destroy(&reply); +} + static void dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) @@ -1511,6 +1606,10 @@ dpif_netdev_init(void) unixctl_command_register("dpif-netdev/bond-show", "[dp]", 0, 1, dpif_netdev_bond_show, NULL); + unixctl_command_register("dpif-netdev/subtable-lookup-prio-set", + "[lookup_func] [prio] [dp]", + 2, 3, dpif_netdev_subtable_lookup_set, + NULL); return 0; } @@ -8459,6 +8558,28 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask) return dpcls_create_subtable(cls, mask); } +/* Checks for the best available implementation for each subtable lookup + * function, and assigns it as the lookup function pointer for each subtable. + * Returns the number of subtables that have changed lookup implementation. + */ +static uint32_t +dpcls_subtable_lookup_reprobe(struct dpcls *cls) +{ + struct pvector *pvec = &cls->subtables; + uint32_t subtables_changed = 0; + struct dpcls_subtable *subtable = NULL; + + PVECTOR_FOR_EACH (subtable, pvec) { + uint32_t u0_bits = subtable->mf_bits_set_unit0; + uint32_t u1_bits = subtable->mf_bits_set_unit1; + void *old_func = subtable->lookup_func; + subtable->lookup_func = dpcls_subtable_get_best_impl(u0_bits, u1_bits); + subtables_changed += (old_func != subtable->lookup_func); + } + pvector_publish(pvec); + + return subtables_changed; +} /* Periodically sort the dpcls subtable vectors according to hit counts */ static void -- GitLab From 9ff7cabfd78dc6736f4094400135f03fa3b9a3e3 Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Mon, 13 Jul 2020 13:42:12 +0100 Subject: [PATCH 213/432] dpif-netdev: add subtable-lookup-prio-get command. This commit adds a new command, "dpif-netdev/subtable-lookup-prio-get" which prints the available subtable lookup functions in this OVS binary. Example output from the command: Available lookup functions (priority : name) 0 : autovalidator 1 : generic Signed-off-by: Harry van Haaren Acked-by: William Tu Signed-off-by: Ian Stokes --- lib/dpif-netdev.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 9ea4207f9..5bb392cba 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1300,6 +1300,30 @@ sorted_poll_thread_list(struct dp_netdev *dp, *n = k; } +static void +dpif_netdev_subtable_lookup_get(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, + void *aux OVS_UNUSED) +{ + /* Get a list of all lookup functions. */ + struct dpcls_subtable_lookup_info_t *lookup_funcs = NULL; + int32_t count = dpcls_subtable_lookup_info_get(&lookup_funcs); + if (count < 0) { + unixctl_command_reply_error(conn, "error getting lookup names"); + return; + } + + /* Add all lookup functions to reply string. */ + struct ds reply = DS_EMPTY_INITIALIZER; + ds_put_cstr(&reply, "Available lookup functions (priority : name)\n"); + for (int i = 0; i < count; i++) { + ds_put_format(&reply, " %d : %s\n", lookup_funcs[i].prio, + lookup_funcs[i].name); + } + unixctl_command_reply(conn, ds_cstr(&reply)); + ds_destroy(&reply); +} + static void dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc, const char *argv[], void *aux OVS_UNUSED) @@ -1610,6 +1634,9 @@ dpif_netdev_init(void) "[lookup_func] [prio] [dp]", 2, 3, dpif_netdev_subtable_lookup_set, NULL); + unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", "", + 0, 0, dpif_netdev_subtable_lookup_get, + NULL); return 0; } -- GitLab From b250b39a7aa61881ded34ef1a0fffb6768fd7a49 Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Mon, 13 Jul 2020 13:42:13 +0100 Subject: [PATCH 214/432] dpdk: enable CPU feature detection. This commit implements a method to retrieve the CPU ISA capabilities. These ISA capabilities can be used in OVS to at runtime select a function implementation to make the best use of the available ISA on the CPU. Signed-off-by: Harry van Haaren Acked-by: William Tu Signed-off-by: Ian Stokes --- lib/dpdk-stub.c | 9 +++++++++ lib/dpdk.c | 30 ++++++++++++++++++++++++++++++ lib/dpdk.h | 2 ++ 3 files changed, 41 insertions(+) diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c index c332c217c..b7d577870 100644 --- a/lib/dpdk-stub.c +++ b/lib/dpdk-stub.c @@ -79,6 +79,15 @@ print_dpdk_version(void) { } +bool +dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED, + const char *feature OVS_UNUSED) +{ + VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, " + "cannot use CPU flag based optimizations"); + return false; +} + void dpdk_status(const struct ovsrec_open_vswitch *cfg) { diff --git a/lib/dpdk.c b/lib/dpdk.c index 31450d470..e46f56ba6 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -513,6 +514,35 @@ print_dpdk_version(void) puts(rte_version()); } +#define CHECK_CPU_FEATURE(feature, name_str, RTE_CPUFLAG) \ + do { \ + if (strncmp(feature, name_str, strlen(name_str)) == 0) { \ + int has_isa = rte_cpu_get_flag_enabled(RTE_CPUFLAG); \ + VLOG_DBG("CPU flag %s, available %s\n", name_str, \ + has_isa ? "yes" : "no"); \ + return true; \ + } \ + } while (0) + +bool +dpdk_get_cpu_has_isa(const char *arch, const char *feature) +{ + /* Ensure Arch is x86_64. */ + if (strncmp(arch, "x86_64", 6) != 0) { + return false; + } + +#if __x86_64__ + /* CPU flags only defined for the architecture that support it. */ + CHECK_CPU_FEATURE(feature, "avx512f", RTE_CPUFLAG_AVX512F); + CHECK_CPU_FEATURE(feature, "bmi2", RTE_CPUFLAG_BMI2); +#endif + + VLOG_WARN("Unknown CPU arch,feature: %s,%s. Returning not supported.\n", + arch, feature); + return false; +} + void dpdk_status(const struct ovsrec_open_vswitch *cfg) { diff --git a/lib/dpdk.h b/lib/dpdk.h index 736a64279..445a51d06 100644 --- a/lib/dpdk.h +++ b/lib/dpdk.h @@ -44,4 +44,6 @@ bool dpdk_per_port_memory(void); bool dpdk_available(void); void print_dpdk_version(void); void dpdk_status(const struct ovsrec_open_vswitch *); +bool dpdk_get_cpu_has_isa(const char *arch, const char *feature); + #endif /* dpdk.h */ -- GitLab From 352b6c7116cdc096c879fc4fa9ed5fe9c2ccef3b Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Mon, 13 Jul 2020 13:42:14 +0100 Subject: [PATCH 215/432] dpif-lookup: add avx512 gather implementation. This commit adds an AVX-512 dpcls lookup implementation. It uses the AVX-512 SIMD ISA to perform multiple miniflow operations in parallel. To run this implementation, the "avx512f" and "bmi2" ISAs are required. These ISA checks are performed at runtime while probing the subtable implementation. If a CPU does not provide both "avx512f" and "bmi2", then this code does not execute. The avx512 code is built as a separate static library, with added CFLAGS to enable the required ISA features. By building only this static library with avx512 enabled, it is ensured that the main OVS core library is *not* using avx512, and that OVS continues to run as before on CPUs that do not support avx512. The approach taken in this implementation is to use the gather instruction to access the packet miniflow, allowing any miniflow blocks to be loaded into an AVX-512 register. This maximizes the usefulness of the register, and hence this implementation handles any subtable with up to miniflow 8 bits. Note that specialization of these avx512 lookup routines still provides performance value, as the hashing of the resulting data is performed in scalar code, and compile-time loop unrolling occurs when specialized to miniflow bits. This commit checks at configure time if the assembling in use has a known bug in assembling AVX512 code. If this bug is present, all AVX512 code is disabled. Checking the version string of the binutils or assembler is not a good method to detect the issue, as back ported fixes would not be reflected. Signed-off-by: Harry van Haaren Acked-by: William Tu Signed-off-by: Ian Stokes --- configure.ac | 3 + lib/automake.mk | 21 ++ lib/dpif-netdev-lookup-avx512-gather.c | 264 +++++++++++++++++++++++++ lib/dpif-netdev-lookup.c | 20 ++ lib/dpif-netdev-lookup.h | 4 + m4/openvswitch.m4 | 30 +++ 6 files changed, 342 insertions(+) create mode 100644 lib/dpif-netdev-lookup-avx512-gather.c diff --git a/configure.ac b/configure.ac index 4a6995ea8..da76cd8a5 100644 --- a/configure.ac +++ b/configure.ac @@ -178,10 +178,13 @@ OVS_ENABLE_OPTION([-Wno-null-pointer-arithmetic]) OVS_ENABLE_OPTION([-Warray-bounds-pointer-arithmetic]) OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED]) OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], [HAVE_WNO_UNUSED_PARAMETER]) +OVS_CONDITIONAL_CC_OPTION([-mavx512f], [HAVE_AVX512F]) +OVS_CHECK_CC_OPTION([-mavx512f], [CFLAGS="$CFLAGS -DHAVE_AVX512F"]) OVS_ENABLE_WERROR OVS_ENABLE_SPARSE OVS_CTAGS_IDENTIFIERS OVS_CHECK_DPCLS_AUTOVALIDATOR +OVS_CHECK_BINUTILS_AVX512 AC_ARG_VAR(KARCH, [Kernel Architecture String]) AC_SUBST(KARCH) diff --git a/lib/automake.mk b/lib/automake.mk index 1fc1a209e..eca448a5a 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -11,6 +11,7 @@ lib_libopenvswitch_la_LIBADD = $(SSL_LIBS) lib_libopenvswitch_la_LIBADD += $(CAPNG_LDADD) lib_libopenvswitch_la_LIBADD += $(LIBBPF_LDADD) + if WIN32 lib_libopenvswitch_la_LIBADD += ${PTHREAD_LIBS} endif @@ -20,6 +21,26 @@ lib_libopenvswitch_la_LDFLAGS = \ -Wl,--version-script=$(top_builddir)/lib/libopenvswitch.sym \ $(AM_LDFLAGS) +if HAVE_AVX512F +# Build library of avx512 code with CPU ISA CFLAGS enabled. This allows the +# compiler to use the ISA features required for the ISA optimized code-paths. +# Use LDFLAGS to compile only static library of this code, as it should be +# statically linked into vswitchd even if vswitchd is a shared build. +lib_LTLIBRARIES += lib/libopenvswitchavx512.la +lib_libopenvswitch_la_LIBADD += lib/libopenvswitchavx512.la +lib_libopenvswitchavx512_la_CFLAGS = \ + -mavx512f \ + -mavx512bw \ + -mavx512dq \ + -mbmi2 \ + $(AM_CFLAGS) +lib_libopenvswitchavx512_la_SOURCES = \ + lib/dpif-netdev-lookup-avx512-gather.c +lib_libopenvswitchavx512_la_LDFLAGS = \ + -static +endif + +# Build core vswitch libraries as before lib_libopenvswitch_la_SOURCES = \ lib/aes128.c \ lib/aes128.h \ diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev-lookup-avx512-gather.c new file mode 100644 index 000000000..12a01a34a --- /dev/null +++ b/lib/dpif-netdev-lookup-avx512-gather.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2020, Intel Corperation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __x86_64__ +#if !defined(__CHECKER__) + +#include + +#include "dpif-netdev.h" +#include "dpif-netdev-lookup.h" +#include "dpif-netdev-private.h" +#include "cmap.h" +#include "flow.h" +#include "pvector.h" +#include "openvswitch/vlog.h" + +#include "immintrin.h" + +/* Each AVX512 register (zmm register in assembly notation) can contain up to + * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum + * number of miniflow blocks that can be processed in a single pass of the + * AVX512 code at a time. + */ +#define NUM_U64_IN_ZMM_REG (8) +#define BLOCKS_CACHE_SIZE (NETDEV_MAX_BURST * NUM_U64_IN_ZMM_REG) + + +VLOG_DEFINE_THIS_MODULE(dpif_lookup_avx512_gather); + +static inline __m512i +_mm512_popcnt_epi64_manual(__m512i v_in) +{ + static const uint8_t pop_lut[64] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + }; + __m512i v_pop_lut = _mm512_loadu_si512(pop_lut); + + __m512i v_in_srl8 = _mm512_srli_epi64(v_in, 4); + __m512i v_nibble_mask = _mm512_set1_epi8(0xF); + __m512i v_in_lo = _mm512_and_si512(v_in, v_nibble_mask); + __m512i v_in_hi = _mm512_and_si512(v_in_srl8, v_nibble_mask); + + __m512i v_lo_pop = _mm512_shuffle_epi8(v_pop_lut, v_in_lo); + __m512i v_hi_pop = _mm512_shuffle_epi8(v_pop_lut, v_in_hi); + __m512i v_u8_pop = _mm512_add_epi8(v_lo_pop, v_hi_pop); + + return _mm512_sad_epu8(v_u8_pop, _mm512_setzero_si512()); +} + +static inline uint64_t +netdev_rule_matches_key(const struct dpcls_rule *rule, + const uint32_t mf_bits_total, + const uint64_t * block_cache) +{ + const uint64_t *keyp = miniflow_get_values(&rule->flow.mf); + const uint64_t *maskp = miniflow_get_values(&rule->mask->mf); + const uint32_t lane_mask = (1 << mf_bits_total) - 1; + + /* Always load a full cache line from blocks_cache. Other loads must be + * trimmed to the amount of data required for mf_bits_total blocks. + */ + __m512i v_blocks = _mm512_loadu_si512(&block_cache[0]); + __m512i v_mask = _mm512_maskz_loadu_epi64(lane_mask, &maskp[0]); + __m512i v_key = _mm512_maskz_loadu_epi64(lane_mask, &keyp[0]); + + __m512i v_data = _mm512_and_si512(v_blocks, v_mask); + uint32_t res_mask = _mm512_mask_cmpeq_epi64_mask(lane_mask, v_data, v_key); + + /* returns 1 assuming result of SIMD compare is all blocks. */ + return res_mask == lane_mask; +} + +static inline uint32_t ALWAYS_INLINE +avx512_lookup_impl(struct dpcls_subtable *subtable, + uint32_t keys_map, + const struct netdev_flow_key *keys[], + struct dpcls_rule **rules, + const uint32_t bit_count_u0, + const uint32_t bit_count_u1) +{ + OVS_ALIGNED_VAR(CACHE_LINE_SIZE)uint64_t block_cache[BLOCKS_CACHE_SIZE]; + + const uint32_t bit_count_total = bit_count_u0 + bit_count_u1; + int i; + uint32_t hashes[NETDEV_MAX_BURST]; + const uint32_t n_pkts = __builtin_popcountll(keys_map); + ovs_assert(NETDEV_MAX_BURST >= n_pkts); + + const uint64_t tbl_u0 = subtable->mask.mf.map.bits[0]; + const uint64_t tbl_u1 = subtable->mask.mf.map.bits[1]; + + /* Load subtable blocks for masking later. */ + const uint64_t *tbl_blocks = miniflow_get_values(&subtable->mask.mf); + const __m512i v_tbl_blocks = _mm512_loadu_si512(&tbl_blocks[0]); + + /* Load pre-created subtable masks for each block in subtable. */ + const __mmask8 bit_count_total_mask = (1 << bit_count_total) - 1; + const __m512i v_mf_masks = _mm512_maskz_loadu_epi64(bit_count_total_mask, + subtable->mf_masks); + + ULLONG_FOR_EACH_1 (i, keys_map) { + const uint64_t pkt_mf_u0_bits = keys[i]->mf.map.bits[0]; + const uint64_t pkt_mf_u0_pop = __builtin_popcountll(pkt_mf_u0_bits); + + /* Pre-create register with *PER PACKET* u0 offset. */ + const __mmask8 u1_bcast_mask = (UINT8_MAX << bit_count_u0); + const __m512i v_idx_u0_offset = _mm512_maskz_set1_epi64(u1_bcast_mask, + pkt_mf_u0_pop); + + /* Broadcast u0, u1 bitmasks to 8x u64 lanes. */ + __m512i v_u0 = _mm512_set1_epi64(pkt_mf_u0_bits); + __m512i v_pkt_bits = _mm512_mask_set1_epi64(v_u0, u1_bcast_mask, + keys[i]->mf.map.bits[1]); + + /* Bitmask by pre-created masks. */ + __m512i v_masks = _mm512_and_si512(v_pkt_bits, v_mf_masks); + + /* Manual AVX512 popcount for u64 lanes. */ + __m512i v_popcnts = _mm512_popcnt_epi64_manual(v_masks); + + /* Offset popcounts for u1 with pre-created offset register. */ + __m512i v_indexes = _mm512_add_epi64(v_popcnts, v_idx_u0_offset); + + /* Gather u64 blocks from packet miniflow. */ + const __m512i v_zeros = _mm512_setzero_si512(); + const void *pkt_data = miniflow_get_values(&keys[i]->mf); + __m512i v_all_blocks = _mm512_mask_i64gather_epi64(v_zeros, + bit_count_total_mask, v_indexes, + pkt_data, 8); + + /* Zero out bits that pkt doesn't have: + * - 2x pext() to extract bits from packet miniflow as needed by TBL + * - Shift u1 over by bit_count of u0, OR to create zero bitmask + */ + uint64_t u0_to_zero = _pext_u64(keys[i]->mf.map.bits[0], tbl_u0); + uint64_t u1_to_zero = _pext_u64(keys[i]->mf.map.bits[1], tbl_u1); + uint64_t zero_mask = (u1_to_zero << bit_count_u0) | u0_to_zero; + + /* Mask blocks using AND with subtable blocks, use k-mask to zero + * where lanes as required for this packet. + */ + __m512i v_masked_blocks = _mm512_maskz_and_epi64(zero_mask, + v_all_blocks, v_tbl_blocks); + + /* Store to blocks cache, full cache line aligned. */ + _mm512_storeu_si512(&block_cache[i * 8], v_masked_blocks); + } + + /* Hash the now linearized blocks of packet metadata. */ + ULLONG_FOR_EACH_1 (i, keys_map) { + uint64_t *block_ptr = &block_cache[i * 8]; + uint32_t hash = hash_add_words64(0, block_ptr, bit_count_total); + hashes[i] = hash_finish(hash, bit_count_total * 8); + } + + /* Lookup: this returns a bitmask of packets where the hash table had + * an entry for the given hash key. Presence of a hash key does not + * guarantee matching the key, as there can be hash collisions. + */ + uint32_t found_map; + const struct cmap_node *nodes[NETDEV_MAX_BURST]; + found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes); + + /* Verify that packet actually matched rule. If not found, a hash + * collision has taken place, so continue searching with the next node. + */ + ULLONG_FOR_EACH_1 (i, found_map) { + struct dpcls_rule *rule; + + CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) { + const uint32_t cidx = i * 8; + uint32_t match = netdev_rule_matches_key(rule, bit_count_total, + &block_cache[cidx]); + if (OVS_LIKELY(match)) { + rules[i] = rule; + subtable->hit_cnt++; + goto next; + } + } + + /* None of the found rules was a match. Clear the i-th bit to + * search for this key in the next subtable. */ + ULLONG_SET0(found_map, i); + next: + ; /* Keep Sparse happy. */ + } + + return found_map; +} + +/* Expand out specialized functions with U0 and U1 bit attributes. */ +#define DECLARE_OPTIMIZED_LOOKUP_FUNCTION(U0, U1) \ + static uint32_t \ + dpcls_avx512_gather_mf_##U0##_##U1(struct dpcls_subtable *subtable, \ + uint32_t keys_map, \ + const struct netdev_flow_key *keys[], \ + struct dpcls_rule **rules) \ + { \ + return avx512_lookup_impl(subtable, keys_map, keys, rules, U0, U1); \ + } \ + +DECLARE_OPTIMIZED_LOOKUP_FUNCTION(5, 1) +DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 1) +DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 0) + +/* Check if a specialized function is valid for the required subtable. */ +#define CHECK_LOOKUP_FUNCTION(U0, U1) \ + ovs_assert((U0 + U1) <= NUM_U64_IN_ZMM_REG); \ + if (!f && u0_bits == U0 && u1_bits == U1) { \ + f = dpcls_avx512_gather_mf_##U0##_##U1; \ + } + +static uint32_t +dpcls_avx512_gather_mf_any(struct dpcls_subtable *subtable, uint32_t keys_map, + const struct netdev_flow_key *keys[], + struct dpcls_rule **rules) +{ + return avx512_lookup_impl(subtable, keys_map, keys, rules, + subtable->mf_bits_set_unit0, + subtable->mf_bits_set_unit1); +} + +dpcls_subtable_lookup_func +dpcls_subtable_avx512_gather_probe(uint32_t u0_bits, uint32_t u1_bits) +{ + dpcls_subtable_lookup_func f = NULL; + + int avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f"); + int bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2"); + if (!avx512f_available || !bmi2_available) { + return NULL; + } + + CHECK_LOOKUP_FUNCTION(5, 1); + CHECK_LOOKUP_FUNCTION(4, 1); + CHECK_LOOKUP_FUNCTION(4, 0); + + if (!f && (u0_bits + u1_bits) < NUM_U64_IN_ZMM_REG) { + f = dpcls_avx512_gather_mf_any; + VLOG_INFO("Using avx512_gather_mf_any for subtable (%d,%d)\n", + u0_bits, u1_bits); + } + + return f; +} + +#endif /* CHECKER */ +#endif /* __x86_64__ */ diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c index 530187e9c..bd0a99abe 100644 --- a/lib/dpif-netdev-lookup.c +++ b/lib/dpif-netdev-lookup.c @@ -42,6 +42,26 @@ static struct dpcls_subtable_lookup_info_t subtable_lookups[] = { { .prio = 1, .probe = dpcls_subtable_generic_probe, .name = "generic", }, + +#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__) + /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */ + { .prio = 0, + .probe = dpcls_subtable_avx512_gather_probe, + .name = "avx512_gather", }, +#else + /* Disabling AVX512 at compile time, as compile time requirements not met. + * This could be due to a number of reasons: + * 1) core OVS is not compiled with SSE4.2 instruction set. + * The SSE42 instructions are required to use CRC32 ISA for high- + * performance hashing. Consider ./configure of OVS with -msse42 (or + * newer) to enable CRC32 hashing and higher performance. + * 2) The assembler in binutils versions 2.30 and 2.31 has bugs in AVX512 + * assembly. Compile time probes check for this assembler issue, and + * disable the HAVE_LD_AVX512_GOOD check if an issue is detected. + * Please upgrade binutils, or backport this binutils fix commit: + * 2069ccaf8dc28ea699bd901fdd35d90613e4402a + */ +#endif }; int32_t diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h index 61f44b9e8..bd72aa29b 100644 --- a/lib/dpif-netdev-lookup.h +++ b/lib/dpif-netdev-lookup.h @@ -42,6 +42,10 @@ dpcls_subtable_autovalidator_probe(uint32_t u0_bit_count, dpcls_subtable_lookup_func dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count); +/* Probe function for AVX-512 gather implementation */ +dpcls_subtable_lookup_func +dpcls_subtable_avx512_gather_probe(uint32_t u0_bit_cnt, uint32_t u1_bit_cnt); + /* Subtable registration and iteration helpers */ struct dpcls_subtable_lookup_info_t { diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index add3aabcc..7c9a507e5 100644 --- a/m4/openvswitch.m4 +++ b/m4/openvswitch.m4 @@ -404,6 +404,36 @@ AC_DEFUN([OVS_CHECK_SPHINX], AC_ARG_VAR([SPHINXBUILD]) AM_CONDITIONAL([HAVE_SPHINX], [test "$SPHINXBUILD" != none])]) +dnl Checks for binutils/assembler known issue with AVX512. +dnl Due to backports, we probe assembling a reproducer instead of checking +dnl binutils version string. More details, including ASM dumps and debug here: +dnl GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90028 +dnl The checking of binutils funcationality instead of LD version is similar +dnl to as how DPDK proposes to solve this issue: +dnl http://patches.dpdk.org/patch/71723/ +AC_DEFUN([OVS_CHECK_BINUTILS_AVX512], + [AC_CACHE_CHECK( + [binutils avx512 assembler checks passing], + [ovs_cv_binutils_avx512_good], + [dnl Assemble a short snippet to test for issue in "build-aux" dir: + mkdir -p build-aux + OBJFILE=build-aux/binutils_avx512_check.o + GATHER_PARAMS='0x8(,%ymm1,1),%ymm0{%k2}' + echo "vpgatherqq $GATHER_PARAMS" | as --64 -o $OBJFILE - + if ($CC -dumpmachine | grep x86_64) >/dev/null 2>&1; then + if (objdump -d --no-show-raw-insn $OBJFILE | grep -q $GATHER_PARAMS) >/dev/null 2>&1; then + ovs_cv_binutils_avx512_good=yes + CFLAGS="$CFLAGS -DHAVE_LD_AVX512_GOOD" + else + ovs_cv_binutils_avx512_good=no + fi + else + ovs_cv_binutils_avx512_good=no + fi]) + rm $OBJFILE + AM_CONDITIONAL([HAVE_LD_AVX512_GOOD], + [test "$ovs_cv_binutils_avx512_good" = yes])]) + dnl Checks for dot. AC_DEFUN([OVS_CHECK_DOT], [AC_CACHE_CHECK( -- GitLab From 842c363050b1099f272c8626ff1c89536e5f5c75 Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Mon, 13 Jul 2020 13:42:15 +0100 Subject: [PATCH 216/432] docs/dpdk/bridge: add datapath performance section. This commit adds a section to the dpdk/bridge.rst netdev documentation, detailing the added DPCLS functionality. The newly added commands are documented, and sample output is provided. Running the DPCLS autovalidator with unit tests by default is possible through re-compiling the autovalidator to have the highest priority at startup time. This avoids making changes to all tests, and enables debug and CI builds to validate every lookup implementation with all unit tests. Add NEWS updates for CPU ISA, dynamic subtables, and AVX512 lookup. Signed-off-by: Harry van Haaren Acked-by: William Tu Signed-off-by: Ian Stokes --- Documentation/intro/install/dpdk.rst | 30 +++++++++++ Documentation/topics/dpdk/bridge.rst | 77 ++++++++++++++++++++++++++++ NEWS | 3 ++ 3 files changed, 110 insertions(+) diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index dbf88ec43..4d858304c 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -136,6 +136,16 @@ has to be configured to build against the DPDK library (``--with-dpdk``). While ``--with-dpdk`` is required, you can pass any other configuration option described in :ref:`general-configuring`. + It is strongly recommended to build OVS with at least ``-msse4.2`` and + ``-mpopcnt`` optimization flags. If these flags are not enabled, the AVX512 + optimized DPCLS implementation is not available in the resulting binary. + For technical details see the subtable registration code in the + ``lib/dpif-netdev-lookup.c`` file. + + An example that enables the AVX512 optimizations is:: + + $ ./configure --with-dpdk=$DPDK_BUILD CFLAGS="-Ofast -msse4.2 -mpopcnt" + #. Build and install OVS, as described in :ref:`general-building` Additional information can be found in :doc:`general`. @@ -147,6 +157,26 @@ Additional information can be found in :doc:`general`. __ https://github.com/openvswitch/ovs/blob/master/rhel/README.RHEL.rst + +Possible issues when enabling AVX512 +++++++++++++++++++++++++++++++++++++ + +The enabling of ISA optimized builds requires build-system support. +Certain versions of the assembler provided by binutils is known to have +AVX512 assembling issues. The binutils versions affected are 2.30 and 2.31. +As many distros backport fixes to previous versions of a package, checking +the version output of ``as -v`` can err on the side of disabling AVX512. To +remedy this, the OVS build system uses a build-time check to see if ``as`` +will correctly assemble the AVX512 code. The output of a good version when +running the ``./configure`` step of the build process is as follows:: + + $ checking binutils avx512 assembler checks passing... yes + +If a bug is detected in the binutils assembler, it would indicate ``no``. +Build an updated binutils, or request a backport of this binutils +fix commit ``2069ccaf8dc28ea699bd901fdd35d90613e4402a`` to fix the issue. + + Setup ----- diff --git a/Documentation/topics/dpdk/bridge.rst b/Documentation/topics/dpdk/bridge.rst index f0ef42ecc..526d5c959 100644 --- a/Documentation/topics/dpdk/bridge.rst +++ b/Documentation/topics/dpdk/bridge.rst @@ -137,3 +137,80 @@ currently turned off by default. To turn on SMC:: $ ovs-vsctl --no-wait set Open_vSwitch . other_config:smc-enable=true + +Datapath Classifier Performance +------------------------------- + +The datapath classifier (dpcls) performs wildcard rule matching, a compute +intensive process of matching a packet ``miniflow`` to a rule ``miniflow``. The +code that does this compute work impacts datapath performance, and optimizing +it can provide higher switching performance. + +Modern CPUs provide extensive SIMD instructions which can be used to get higher +performance. The CPU OVS is being deployed on must be capable of running these +SIMD instructions in order to take advantage of the performance benefits. +In OVS v2.14 runtime CPU detection was introduced to enable identifying if +these CPU ISA additions are available, and to allow the user to enable them. + +OVS provides multiple implementations of dpcls. The following command enables +the user to check what implementations are available in a running instance :: + + $ ovs-appctl dpif-netdev/subtable-lookup-prio-get + Available lookup functions (priority : name) + 0 : autovalidator + 1 : generic + 0 : avx512_gather + +To set the priority of a lookup function, run the ``prio-set`` command :: + + $ ovs-appctl dpif-netdev/subtable-lookup-prio-set avx512_gather 5 + Lookup priority change affected 1 dpcls ports and 1 subtables. + +The highest priority lookup function is used for classification, and the output +above indicates that one subtable of one DPCLS port is has changed its lookup +function due to the command being run. To verify the prioritization, re-run the +get command, note the updated priority of the ``avx512_gather`` function :: + + $ ovs-appctl dpif-netdev/subtable-lookup-prio-get + Available lookup functions (priority : name) + 0 : autovalidator + 1 : generic + 5 : avx512_gather + +If two lookup functions have the same priority, the first one in the list is +chosen, and the 2nd occurance of that priority is not used. Put in logical +terms, a subtable is chosen if its priority is greater than the previous +best candidate. + +CPU ISA Testing and Validation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As multiple versions of DPCLS can co-exist, each with different CPU ISA +optimizations, it is important to validate that they all give the exact same +results. To easily test all DPCLS implementations, an ``autovalidator`` +implementation of the DPCLS exists. This implementation runs all other +available DPCLS implementations, and verifies that the results are identical. + +Running the OVS unit tests with the autovalidator enabled ensures all +implementations provide the same results. Note that the performance of the +autovalidator is lower than all other implementations, as it tests the scalar +implementation against itself, and against all other enabled DPCLS +implementations. + +To adjust the DPCLS autovalidator priority, use this command :: + + $ ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 7 + +Running Unit Tests with Autovalidator ++++++++++++++++++++++++++++++++++++++ + +To run the OVS unit test suite with the DPCLS autovalidator as the default +implementation, it is required to recompile OVS. During the recompilation, +the default priority of the `autovalidator` implementation is set to the +maximum priority, ensuring every test will be run with every lookup +implementation :: + + $ ./configure --enable-autovalidator + +Compile OVS in debug mode to have `ovs_assert` statements error out if +there is a mis-match in the DPCLS lookup implementation. diff --git a/NEWS b/NEWS index e52e862e1..a88fc5462 100644 --- a/NEWS +++ b/NEWS @@ -26,6 +26,9 @@ Post-v2.13.0 * New configuration knob 'other_config:lb-output-action' for bond ports that enables new datapath action 'lb_output' to avoid recirculation in balance-tcp mode. Disabled by default. + * Add runtime CPU ISA detection to allow optimized ISA functions + * Add support for dynamically changing DPCLS subtable lookup functions + * Add ISA optimized DPCLS lookup function using AVX512 - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. -- GitLab From 02abe831c32215fa562d85b66d7300ff6e7af31e Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Thu, 2 Jul 2020 16:09:27 +0100 Subject: [PATCH 217/432] dpdk: Use DPDK 19.11.2 release. Modify travis linux build script to use DPDK 19.11.2 stable release and update docs to reference 19.11.2 stable release. Update release faq to reflect latest validated DPDK versions for all branches. Signed-off-by: Ian Stokes Acked-by: Kevin Traynor --- .travis/linux-build.sh | 2 +- Documentation/faq/releases.rst | 16 ++++++++-------- Documentation/intro/install/dpdk.rst | 8 ++++---- Documentation/topics/dpdk/vhost-user.rst | 6 +++--- Documentation/topics/userspace-tso.rst | 9 --------- NEWS | 3 +++ 6 files changed, 19 insertions(+), 25 deletions(-) diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 02615a8ec..e0a065291 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -170,7 +170,7 @@ fi if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then if [ -z "$DPDK_VER" ]; then - DPDK_VER="19.11" + DPDK_VER="19.11.2" fi install_dpdk $DPDK_VER if [ "$CC" = "clang" ]; then diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index e5cef3915..ac93e6e97 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -180,9 +180,9 @@ Q: What DPDK version does each Open vSwitch release work with? A: The following table lists the DPDK version against which the given versions of Open vSwitch will successfully build. - ============ ======= + ============ ======== Open vSwitch DPDK - ============ ======= + ============ ======== 2.2.x 1.6 2.3.x 1.6 2.4.x 2.0 @@ -190,12 +190,12 @@ Q: What DPDK version does each Open vSwitch release work with? 2.6.x 16.07.2 2.7.x 16.11.9 2.8.x 17.05.2 - 2.9.x 17.11.4 - 2.10.x 17.11.4 - 2.11.x 18.11.6 - 2.12.x 18.11.6 - 2.13.x 19.11.0 - ============ ======= + 2.9.x 17.11.10 + 2.10.x 17.11.10 + 2.11.x 18.11.9 + 2.12.x 18.11.9 + 2.13.x 19.11.2 + ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index 4d858304c..39544f835 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -42,7 +42,7 @@ Build requirements In addition to the requirements described in :doc:`general`, building Open vSwitch with DPDK will require the following: -- DPDK 19.11 +- DPDK 19.11.2 - A `DPDK supported NIC`_ @@ -71,9 +71,9 @@ Install DPDK #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: $ cd /usr/src/ - $ wget https://fast.dpdk.org/rel/dpdk-19.11.tar.xz - $ tar xf dpdk-19.11.tar.xz - $ export DPDK_DIR=/usr/src/dpdk-19.11 + $ wget https://fast.dpdk.org/rel/dpdk-19.11.2.tar.xz + $ tar xf dpdk-19.11.2.tar.xz + $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.2 $ cd $DPDK_DIR #. (Optional) Configure DPDK as a shared library diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index c6c6fd8bd..4bc5aef59 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -392,9 +392,9 @@ To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or DPDK sources to VM and build DPDK:: $ cd /root/dpdk/ - $ wget https://fast.dpdk.org/rel/dpdk-19.11.tar.xz - $ tar xf dpdk-19.11.tar.xz - $ export DPDK_DIR=/root/dpdk/dpdk-19.11 + $ wget https://fast.dpdk.org/rel/dpdk-19.11.2.tar.xz + $ tar xf dpdk-19.11.2.tar.xz + $ export DPDK_DIR=/root/dpdk/dpdk-stable-19.11.2 $ export DPDK_TARGET=x86_64-native-linuxapp-gcc $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET $ cd $DPDK_DIR diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index 0fbac93a5..aafa4a1bf 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -104,15 +104,6 @@ on ports without TSO support. That also means guests using vhost-user in client mode will receive TSO packet regardless of TSO being enabled or disabled within the guest. -When the NIC performing the segmentation is using the i40e DPDK PMD, a fix -must be included in the DPDK build, otherwise TSO will not work. The fix can -be found on `DPDK patchwork`__. - -__ https://patches.dpdk.org/patch/64136/ - -This fix is expected to be included in the 19.11.1 release. When OVS migrates -to this DPDK release, this limitation can be removed. - ~~~~~~~~~~~~~~~~~~ Performance Tuning ~~~~~~~~~~~~~~~~~~ diff --git a/NEWS b/NEWS index a88fc5462..015facff5 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,9 @@ Post-v2.13.0 * Add hardware offload support for matching IPv6 protocol (experimental). * Add hardware offload support for set of IPv6 src/dst/ttl and tunnel push-output actions (experimental). + * OVS validated with DPDK 19.11.2, due to the inclusion of fixes for + CVE-2020-10722, CVE-2020-10723, CVE-2020-10724, CVE-2020-10725 and + CVE-2020-10726, this DPDK version is strongly recommended to be used. - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: -- GitLab From e3ca911fc116714466e06de25901978cba8f9718 Mon Sep 17 00:00:00 2001 From: Gowrishankar Muthukrishnan Date: Mon, 20 Apr 2020 19:13:42 +0530 Subject: [PATCH 218/432] ofproto: report coverage on hitting datapath flow limit Whenever the number of flows in the datapath crosses above the flow limit set/autoconfigured, it is helpful to report this event through coverage counter for an operator/devops engineer to know and take proactive corrections in the switch configuration. Today, these events are reported in ovs vswitch log when a new flow can not be inserted in upcall processing in which case ovs writes a warning, otherwise an auto correction made by ovs to flush old flows without any intimation at all. Signed-off-by: Gowrishankar Muthukrishnan Signed-off-by: William Tu --- ofproto/ofproto-dpif-upcall.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 72751b9b3..72a5b4d73 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -56,6 +56,7 @@ COVERAGE_DEFINE(handler_duplicate_upcall); COVERAGE_DEFINE(upcall_ukey_contention); COVERAGE_DEFINE(upcall_ukey_replace); COVERAGE_DEFINE(revalidate_missed_dp_flow); +COVERAGE_DEFINE(upcall_flow_limit_hit); /* A thread that reads upcalls from dpif, forwards each upcall's packet, * and possibly sets up a kernel flow as a cache. */ @@ -1281,6 +1282,7 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall) atomic_read_relaxed(&udpif->flow_limit, &flow_limit); if (udpif_get_n_flows(udpif) >= flow_limit) { + COVERAGE_INC(upcall_flow_limit_hit); VLOG_WARN_RL(&rl, "upcall: datapath flow limit reached"); return false; } @@ -2642,6 +2644,10 @@ revalidate(struct revalidator *revalidator) * datapath flows, so we will recover before all the flows are * gone.) */ n_dp_flows = udpif_get_n_flows(udpif); + if (n_dp_flows >= flow_limit) { + COVERAGE_INC(upcall_flow_limit_hit); + } + kill_them_all = n_dp_flows > flow_limit * 2; max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle; -- GitLab From 46ee18f8845b1dfd86963289030e8456d1e46185 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 10 Jul 2020 08:57:51 +0800 Subject: [PATCH 219/432] lib: Remove duplicated includes Remove duplicated includes. Acked-by: Greg Rose Signed-off-by: Yunjian Wang Signed-off-by: William Tu --- lib/netdev-native-tnl.c | 1 - lib/tnl-ports.c | 1 - 2 files changed, 2 deletions(-) diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c index 0acc87953..b89dfdd52 100644 --- a/lib/netdev-native-tnl.c +++ b/lib/netdev-native-tnl.c @@ -29,7 +29,6 @@ #include #include -#include #include #include diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c index 446b40763..58269d3b1 100644 --- a/lib/tnl-ports.c +++ b/lib/tnl-ports.c @@ -30,7 +30,6 @@ #include "openvswitch/ofpbuf.h" #include "ovs-thread.h" #include "odp-util.h" -#include "ovs-thread.h" #include "unixctl.h" #include "util.h" -- GitLab From 240207eef89eee954879989b8924237e9f2ea374 Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 10 Jul 2020 08:58:02 +0800 Subject: [PATCH 220/432] ofproto: Remove duplicated includes Remove duplicated includes. Acked-by: Greg Rose Signed-off-by: Yunjian Wang Signed-off-by: William Tu --- ofproto/ofproto-dpif.h | 1 - ofproto/tunnel.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 4e5ae0c9e..1f5794f03 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -54,7 +54,6 @@ #include "ovs-thread.h" #include "ofproto-provider.h" #include "util.h" -#include "ovs-thread.h" struct dpif_flow_stats; struct ofproto_async_msg; diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c index 03f0ab765..3455ed233 100644 --- a/ofproto/tunnel.c +++ b/ofproto/tunnel.c @@ -13,8 +13,6 @@ * limitations under the License. */ #include -#include "tunnel.h" - #include #include "byte-order.h" -- GitLab From 252c24a61774cac1f593569dcce31e524725676c Mon Sep 17 00:00:00 2001 From: Yunjian Wang Date: Fri, 10 Jul 2020 08:58:12 +0800 Subject: [PATCH 221/432] datapath: Remove duplicated includes Remove duplicated includes. Acked-by: Greg Rose Signed-off-by: Yunjian Wang Signed-off-by: William Tu --- datapath/linux/compat/lisp.c | 1 - datapath/vport-stt.c | 1 - 2 files changed, 2 deletions(-) diff --git a/datapath/linux/compat/lisp.c b/datapath/linux/compat/lisp.c index 6dc066de8..49c60f4ed 100644 --- a/datapath/linux/compat/lisp.c +++ b/datapath/linux/compat/lisp.c @@ -38,7 +38,6 @@ #include "datapath.h" #include "gso.h" #include "vport.h" -#include "gso.h" #include "vport-netdev.h" #define LISP_UDP_PORT 4341 diff --git a/datapath/vport-stt.c b/datapath/vport-stt.c index 35c4942c5..71bbeda63 100644 --- a/datapath/vport-stt.c +++ b/datapath/vport-stt.c @@ -23,7 +23,6 @@ #include #include #include -#include #include "datapath.h" #include "vport.h" -- GitLab From c57e02cfd7978368dd90bba52cd20f75934ed8ed Mon Sep 17 00:00:00 2001 From: William Tu Date: Mon, 13 Jul 2020 13:34:32 -0700 Subject: [PATCH 222/432] ovs-bugtool: Fix Python3 bytes str issue. The patch fixes two errors due to type mismatched, when converting between str and bytes: File "/usr/local/sbin/ovs-bugtool", line 649, in main cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', '-m', d]) File "/usr/local/sbin/ovs-bugtool", line 278, in cmd_output label = ' '.join(a) TypeError: sequence item 3: expected str instance, bytes found And File "/usr/sbin/ovs-bugtool", line 721, in main collect_data() File "/usr/sbin/ovs-bugtool", line 366, in collect_data run_procs(process_lists.values()) File "/usr/sbin/ovs-bugtool", line 1354, in run_procs p.inst.write("\n** timeout **\n") File "/usr/sbin/ovs-bugtool", line 1403, in write BytesIO.write(self, s) TypeError: a bytes-like object is required, not 'str' VMware-BZ: #2602135 Fixed: 9e6c00bca9af ("bugtool: Fix for Python3.") Acked-by: Yi-Hung Wei Signed-off-by: William Tu --- utilities/bugtool/ovs-bugtool.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in index 1a5170d8c..ddb5bc8dc 100755 --- a/utilities/bugtool/ovs-bugtool.in +++ b/utilities/bugtool/ovs-bugtool.in @@ -643,7 +643,8 @@ exclude those logs from the archive. if os.path.exists(OPENVSWITCH_VSWITCHD_PID): cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'show', '-s']) for d in dp_list(): - cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', '-m', d]) + cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', '-m', + d.decode()]) cmd_output(CAP_PROCESS_LIST, [PS, 'wwwaxf', '-eo', 'pid,tty,stat,time,nice,psr,pcpu,pmem,nwchan,wchan:25,args'], @@ -1351,7 +1352,7 @@ def run_procs(procs): if p.running and now > p.timeout: output_ts("'%s' timed out" % p.cmdAsStr()) if p.inst: - p.inst.write("\n** timeout **\n") + p.inst.write("\n** timeout **\n".encode()) p.timed_out = True p.terminate() -- GitLab From 12d0edd75eba3e9262c44fa77938b27bda491e79 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 9 Jul 2020 12:09:29 +0200 Subject: [PATCH 223/432] dpif-netdev: Avoid deadlock with offloading during PMD thread deletion. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Main thread will try to pause/stop all revalidators during datapath reconfiguration via datapath purge callback (dp_purge_cb) while holding 'dp->port_mutex'. And deadlock happens in case any of revalidator threads is already waiting on 'dp->port_mutex' while dumping offloaded flows: main thread revalidator --------------------------------- ---------------------------------- ovs_mutex_lock(&dp->port_mutex) dpif_netdev_flow_dump_next() -> dp_netdev_flow_to_dpif_flow -> get_dpif_flow_status -> dpif_netdev_get_flow_offload_status() -> ovs_mutex_lock(&dp->port_mutex) reconfigure_datapath() -> reconfigure_pmd_threads() -> dp_netdev_del_pmd() -> dp_purge_cb() -> udpif_pause_revalidators() -> ovs_barrier_block(&udpif->pause_barrier) We're not allowed to call offloading API without holding global port mutex from the userspace datapath due to thread safety restrictions on netdev-offload-dpdk module. And it's also not easy to rework datapath reconfiguration process in order to move actual PMD removal and datapath purge out of the port mutex. So, for now, not sleeping on a mutex if it's not immediately available seem like an easiest workaround. This will have impact on flow statistics update rate and on ability to get the latest statistics before removing the flow (latest stats will be lost in case we were not able to take the mutex). However, this will allow us to operate normally avoiding the deadlock. The last bit is that to avoid flapping of flow attributes and statistics we're not failing the operation, but returning last statistics and attributes returned by offload provider. Since those might be updated in different threads, stores and reads are atomic. Reported-by: Frank Wang (王培辉) Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-June/371753.html Fixes: a309e4f52660 ("dpif-netdev: Update offloaded flows statistics.") Acked-by: Kevin Traynor Acked-by: Ian Stokes Tested-by: Eli Britstein Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + lib/dpif-netdev.c | 93 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 88 insertions(+), 6 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 8e6a0769f..eb36a01d0 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -504,6 +504,7 @@ Edwin Chiu echiu@vmware.com Eivind Bulie Haanaes Enas Ahmad enas.ahmad@kaust.edu.sa Eric Lopez +Frank Wang (王培辉) wangpeihui@inspur.com Frido Roose fr.roose@gmail.com Gaetano Catalli gaetano.catalli@gmail.com Gavin Remaley gavin_remaley@selinc.com diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 5bb392cba..2aad24511 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -492,6 +492,12 @@ struct dp_netdev_flow_stats { atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */ }; +/* Contained by struct dp_netdev_flow's 'last_attrs' member. */ +struct dp_netdev_flow_attrs { + atomic_bool offloaded; /* True if flow is offloaded to HW. */ + ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */ +}; + /* A flow in 'dp_netdev_pmd_thread's 'flow_table'. * * @@ -552,6 +558,11 @@ struct dp_netdev_flow { /* Statistics. */ struct dp_netdev_flow_stats stats; + /* Statistics and attributes received from the netdev offload provider. */ + atomic_int netdev_flow_get_result; + struct dp_netdev_flow_stats last_stats; + struct dp_netdev_flow_attrs last_attrs; + /* Actions. */ OVSRCU_TYPE(struct dp_netdev_actions *) actions; @@ -3277,9 +3288,56 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd, return NULL; } +static void +dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow, + const struct dpif_flow_stats *stats, + const struct dpif_flow_attrs *attrs, + int result) +{ + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; + + atomic_store_relaxed(&netdev_flow->netdev_flow_get_result, result); + if (result) { + return; + } + + atomic_store_relaxed(&last_stats->used, stats->used); + atomic_store_relaxed(&last_stats->packet_count, stats->n_packets); + atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes); + atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags); + + atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded); + atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer); + +} + +static void +dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow, + struct dpif_flow_stats *stats, + struct dpif_flow_attrs *attrs, + int *result) +{ + struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats; + struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs; + + atomic_read_relaxed(&netdev_flow->netdev_flow_get_result, result); + if (*result) { + return; + } + + atomic_read_relaxed(&last_stats->used, &stats->used); + atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets); + atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes); + atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags); + + atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded); + atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer); +} + static bool dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, - const struct dp_netdev_flow *netdev_flow, + struct dp_netdev_flow *netdev_flow, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs) { @@ -3302,11 +3360,31 @@ dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp, } ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf); /* Taking a global 'port_mutex' to fulfill thread safety - * restrictions for the netdev-offload-dpdk module. */ - ovs_mutex_lock(&dp->port_mutex); - ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid, - stats, attrs, &buf); - ovs_mutex_unlock(&dp->port_mutex); + * restrictions for the netdev-offload-dpdk module. + * + * XXX: Main thread will try to pause/stop all revalidators during datapath + * reconfiguration via datapath purge callback (dp_purge_cb) while + * holding 'dp->port_mutex'. So we're not waiting for mutex here. + * Otherwise, deadlock is possible, bcause revalidators might sleep + * waiting for the main thread to release the lock and main thread + * will wait for them to stop processing. + * This workaround might make statistics less accurate. Especially + * for flow deletion case, since there will be no other attempt. */ + if (!ovs_mutex_trylock(&dp->port_mutex)) { + ret = netdev_flow_get(netdev, &match, &actions, + &netdev_flow->mega_ufid, stats, attrs, &buf); + /* Storing statistics and attributes from the last request for + * later use on mutex contention. */ + dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs, ret); + ovs_mutex_unlock(&dp->port_mutex); + } else { + dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs, &ret); + if (!ret && !attrs->dp_layer) { + /* Flow was never reported as 'offloaded' so it's harmless + * to continue to think so. */ + ret = EAGAIN; + } + } netdev_close(netdev); if (ret) { return false; @@ -3575,6 +3653,9 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd, /* Do not allocate extra space. */ flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len); memset(&flow->stats, 0, sizeof flow->stats); + atomic_init(&flow->netdev_flow_get_result, 0); + memset(&flow->last_stats, 0, sizeof flow->last_stats); + memset(&flow->last_attrs, 0, sizeof flow->last_attrs); flow->dead = false; flow->batch = NULL; flow->mark = INVALID_FLOW_MARK; -- GitLab From b4e50218a0f8da43ffe7c420826ddb19985b0b03 Mon Sep 17 00:00:00 2001 From: Jeff Squyres Date: Thu, 9 Jul 2020 16:57:47 -0700 Subject: [PATCH 224/432] bond: Add 'primary' interface concept for active-backup mode. In AB bonding, if the current active slave becomes disabled, a replacement slave is arbitrarily picked from the remaining set of enabled slaves. This commit adds the concept of a "primary" slave: an interface that will always be (or become) the current active slave if it is enabled. The rationale for this functionality is to allow the designation of a preferred interface for a given bond. For example: 1. Bond is created with interfaces p1 (primary) and p2, both enabled. 2. p1 becomes the current active slave (because it was designated as the primary). 3. Later, p1 fails/becomes disabled. 4. p2 is chosen to become the current active slave. 5. Later, p1 becomes re-enabled. 6. p1 is chosen to become the current active slave (because it was designated as the primary) Note that p1 becomes the active slave once it becomes re-enabled, even if nothing has happened to p2. This "primary" concept exists in Linux kernel network interface bonding, but did not previously exist in OVS bonding. Only one primary slave interface is supported per bond, and is only supported for active/backup bonding. The primary slave interface is designated via "other_config:bond-primary" when creating a bond. Also, while adding tests for the "primary" concept, make a few small improvements to the non-primary AB bonding test. Signed-off-by: Jeff Squyres Reviewed-by: Aaron Conole Tested-by: Greg Rose Acked-by: Greg Rose Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- NEWS | 2 + ofproto/bond.c | 53 +++++++-- ofproto/bond.h | 2 + tests/lacp.at | 9 ++ tests/ofproto-dpif.at | 251 +++++++++++++++++++++++++++++++++++++++--- vswitchd/bridge.c | 5 + vswitchd/vswitch.xml | 8 ++ 7 files changed, 309 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index 015facff5..0de9ee1af 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,8 @@ Post-v2.13.0 * Add runtime CPU ISA detection to allow optimized ISA functions * Add support for dynamically changing DPCLS subtable lookup functions * Add ISA optimized DPCLS lookup function using AVX512 + - New configuration knob 'other_config:bond-primary' for AB bonds + that specifies interface will be the preferred port if it is active. - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. diff --git a/ofproto/bond.c b/ofproto/bond.c index 2466c4d02..40c9408bc 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -89,6 +89,7 @@ struct bond_slave { /* Link status. */ bool enabled; /* May be chosen for flows? */ bool may_enable; /* Client considers this slave bondable. */ + bool is_primary; /* This slave is preferred over others. */ long long delay_expires; /* Time after which 'enabled' may change. */ /* Rebalancing info. Used only by bond_rebalance(). */ @@ -124,6 +125,7 @@ struct bond { uint32_t basis; /* Basis for flow hash function. */ bool use_lb_output_action; /* Use lb_output action to avoid recirculation. Applicable only for Balance TCP mode. */ + char *primary; /* Name of the primary slave interface. */ /* SLB specific bonding info. */ struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */ @@ -241,6 +243,7 @@ bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto) bond->active_slave_mac = eth_addr_zero; bond->active_slave_changed = false; + bond->primary = NULL; bond_reconfigure(bond, s); return bond; @@ -294,6 +297,7 @@ bond_unref(struct bond *bond) update_recirc_rules__(bond); hmap_destroy(&bond->pr_rule_ops); + free(bond->primary); free(bond->name); free(bond); } @@ -471,6 +475,12 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) bond->bond_revalidate = false; } + if (!nullable_string_is_equal(bond->primary, s->primary)) { + free(bond->primary); + bond->primary = nullable_xstrdup(s->primary); + revalidate = true; + } + if (bond->balance != BM_AB) { if (!bond->recirc_id) { bond->recirc_id = recirc_alloc_id(bond->ofproto); @@ -586,6 +596,11 @@ bond_slave_register(struct bond *bond, void *slave_, free(slave->name); slave->name = xstrdup(netdev_get_name(netdev)); + if (bond->primary && !strcmp(bond->primary, slave->name)) { + slave->is_primary = true; + } else { + slave->is_primary = false; + } ovs_rwlock_unlock(&rwlock); } @@ -671,7 +686,7 @@ bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable) bool bond_run(struct bond *bond, enum lacp_status lacp_status) { - struct bond_slave *slave; + struct bond_slave *slave, *primary; bool revalidate; ovs_rwlock_wrlock(&rwlock); @@ -688,11 +703,19 @@ bond_run(struct bond *bond, enum lacp_status lacp_status) } /* Enable slaves based on link status and LACP feedback. */ + primary = NULL; HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { bond_link_status_update(slave); slave->change_seq = seq_read(connectivity_seq_get()); + + /* Discover if there is an active slave marked 'primary'. */ + if (bond->balance == BM_AB && slave->is_primary && slave->enabled) { + primary = slave; + } } - if (!bond->active_slave || !bond->active_slave->enabled) { + + if (!bond->active_slave || !bond->active_slave->enabled || + (primary && bond->active_slave != primary)) { bond_choose_active_slave(bond); } @@ -1440,16 +1463,25 @@ bond_print_details(struct ds *ds, const struct bond *bond) ds_put_format(ds, "lacp_fallback_ab: %s\n", bond->lacp_fallback_ab ? "true" : "false"); + bool found_primary = false; + HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { + if (slave->is_primary) { + found_primary = true; + } + shash_add(&slave_shash, slave->name, slave); + } + + ds_put_format(ds, "active-backup primary: %s%s\n", + bond->primary ? bond->primary : "", + (!found_primary && bond->primary) + ? " (no such slave)" : ""); + + slave = bond_find_slave_by_mac(bond, bond->active_slave_mac); ds_put_cstr(ds, "active slave mac: "); ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_slave_mac)); - slave = bond_find_slave_by_mac(bond, bond->active_slave_mac); ds_put_format(ds,"(%s)\n", slave ? slave->name : "none"); - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - shash_add(&slave_shash, slave->name, slave); - } sorted_slaves = shash_sort(&slave_shash); - for (i = 0; i < shash_count(&slave_shash); i++) { struct bond_entry *be; @@ -1909,6 +1941,13 @@ bond_choose_slave(const struct bond *bond) { struct bond_slave *slave, *best; + /* If there's a primary and it's active, return that. */ + HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { + if (slave->is_primary && slave->enabled) { + return slave; + } + } + /* Find the last active slave. */ slave = bond_find_slave_by_mac(bond, bond->active_slave_mac); if (slave && slave->enabled) { diff --git a/ofproto/bond.h b/ofproto/bond.h index 40c3258dc..ecb90919c 100644 --- a/ofproto/bond.h +++ b/ofproto/bond.h @@ -48,6 +48,8 @@ struct bond_settings { int rebalance_interval; /* Milliseconds between rebalances. Zero to disable rebalancing. */ + const char *primary; /* For AB mode, primary interface name. */ + /* Link status detection. */ int up_delay; /* ms before enabling an up slave. */ int down_delay; /* ms before disabling a down slave. */ diff --git a/tests/lacp.at b/tests/lacp.at index df1691731..5257f0cce 100644 --- a/tests/lacp.at +++ b/tests/lacp.at @@ -126,6 +126,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: active slave mac: 00:00:00:00:00:00(none) slave p1: disabled @@ -292,6 +293,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p0: enabled may_enable: true @@ -308,6 +310,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p2: enabled may_enable: true @@ -431,6 +434,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p0: disabled @@ -449,6 +453,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p2: disabled @@ -565,6 +570,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p0: disabled @@ -583,6 +589,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p2: disabled @@ -704,6 +711,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p0: enabled @@ -722,6 +730,7 @@ updelay: 0 ms downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false +active-backup primary: slave p2: enabled diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index c1455d8aa..feabb7380 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -29,12 +29,16 @@ AT_CHECK([ovs-appctl revalidator/wait]) OVS_VSWITCHD_STOP AT_CLEANUP -AT_SETUP([ofproto-dpif - active-backup bonding]) -# Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2 -# and br1 with interfaces p3, p4 and p8. -# toggle p1,p2 of bond0 up and down to test bonding in active-backup mode. +AT_SETUP([ofproto-dpif - active-backup bonding (with primary)]) + +dnl Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and +dnl p2 (p1 as primary) and br1 with interfaces p3, p4 and p8. +dnl toggle p1,p2 of bond0 up and down to test bonding in active-backup mode. +dnl With p1 down and p2 up/active, bring p1 back up. Since p1 is the primary, +dnl it should become active. OVS_VSWITCHD_START( - [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\ + [add-bond br0 bond0 p1 p2 bond_mode=active-backup \ + other_config:bond-primary=p1 -- \ set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \ @@ -45,8 +49,228 @@ OVS_VSWITCHD_START( add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \ add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \ add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --]) +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) WAIT_FOR_DUMMY_PORTS([p3], [p4]) +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: p1'`"]) + + +AT_CHECK([ovs-ofctl add-flow br0 action=normal]) +AT_CHECK([ovs-ofctl add-flow br1 action=normal]) +ovs-appctl netdev-dummy/set-admin-state up +ovs-appctl time/warp 100 +ovs-appctl netdev-dummy/set-admin-state p2 down +ovs-appctl time/stop +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +ovs-appctl time/warp 100 +ovs-appctl netdev-dummy/set-admin-state p2 up +ovs-appctl netdev-dummy/set-admin-state p1 down +ovs-appctl time/warp 100 +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +ovs-appctl time/warp 2000 100 +AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'in_port([[348]])' | strip_xout], [0], [dnl +recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: +]) + +ovs-appctl netdev-dummy/set-admin-state p1 up +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +---- bond0 ---- +bond_mode: active-backup +bond may use recirculation: no, +bond-hash-basis: 0 +updelay: 0 ms +downdelay: 0 ms +lacp_status: off +lacp_fallback_ab: false +active-backup primary: p1 + + +slave p1: enabled + active slave + may_enable: true + +slave p2: enabled + may_enable: true + +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto-dpif - active-backup bonding (primary validation)]) +dnl Make a switch with 3 ports in a bond, so that when we delete one of +dnl the ports from the bond, there are still 2 ports left and the bond +dnl remains functional. +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 p3 bond_mode=active-backup \ + other_config:bond-primary=p1 -- \ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ + set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \ + add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy --]) +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) + +dnl Make sure the initial primary interface is set +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: p1'`"]) + +dnl Down the primary interface and verify that we switched. Then +dnl bring the primary back and verify that we switched back to the +dnl primary. +ovs-appctl netdev-dummy/set-admin-state p1 down +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'slave p1: disabled'`"]) +ovs-appctl netdev-dummy/set-admin-state p1 up +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +---- bond0 ---- +bond_mode: active-backup +bond may use recirculation: no, +bond-hash-basis: 0 +updelay: 0 ms +downdelay: 0 ms +lacp_status: off +lacp_fallback_ab: false +active-backup primary: p1 + + +slave p1: enabled + active slave + may_enable: true + +slave p2: enabled + may_enable: true + +slave p3: enabled + may_enable: true + +]) + +dnl Now delete the primary and verify that the output shows that the +dnl primary is no longer enslaved +ovs-vsctl --id=@p1 get Interface p1 -- remove Port bond0 interfaces @p1 +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'active-backup primary: p1 (no such slave)'`"]) + +dnl Now re-add the primary and verify that the output shows that the +dnl primary is available again. +dnl +dnl First, get the UUIDs of the interfaces that exist on bond0. +dnl Strip the trailing ] so that we can add a new UUID to the end. +uuids=`ovs-vsctl get Port bond0 interfaces | sed -e 's/]//'` +dnl Create a new port "p1" and add its UUID to the set of interfaces +dnl on bond0. +ovs-vsctl \ + --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set Port bond0 interfaces="$uuids, @p1]" +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +---- bond0 ---- +bond_mode: active-backup +bond may use recirculation: no, +bond-hash-basis: 0 +updelay: 0 ms +downdelay: 0 ms +lacp_status: off +lacp_fallback_ab: false +active-backup primary: p1 + + +slave p1: enabled + active slave + may_enable: true + +slave p2: enabled + may_enable: true + +slave p3: enabled + may_enable: true + +]) + +dnl Switch to another primary +ovs-vsctl set port bond0 other_config:bond-primary=p2 +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +---- bond0 ---- +bond_mode: active-backup +bond may use recirculation: no, +bond-hash-basis: 0 +updelay: 0 ms +downdelay: 0 ms +lacp_status: off +lacp_fallback_ab: false +active-backup primary: p2 + + +slave p1: enabled + active slave + may_enable: true + +slave p2: enabled + may_enable: true + +slave p3: enabled + may_enable: true + +]) + +dnl Remove the "bond-primary" config directive from the bond. +AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary]) +ovs-appctl time/warp 100 +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +---- bond0 ---- +bond_mode: active-backup +bond may use recirculation: no, +bond-hash-basis: 0 +updelay: 0 ms +downdelay: 0 ms +lacp_status: off +lacp_fallback_ab: false +active-backup primary: + + +slave p1: enabled + active slave + may_enable: true + +slave p2: enabled + may_enable: true + +slave p3: enabled + may_enable: true + +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([ofproto-dpif - active-backup bonding (without primary)]) +dnl Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2 +dnl and br1 with interfaces p3, p4 and p8. +dnl toggle p1,p2 of bond0 up and down to test bonding in active-backup mode. +OVS_VSWITCHD_START( + [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\ + set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ + set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \ + add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \ + add-br br1 -- \ + set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \ + set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \ + fail-mode=secure -- \ + add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \ + add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \ + add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --]) AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) +WAIT_FOR_DUMMY_PORTS([p3], [p4]) +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: '`"]) AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-ofctl add-flow br1 action=normal]) @@ -63,15 +287,14 @@ ovs-appctl netdev-dummy/set-admin-state p1 down ovs-appctl time/warp 100 AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) -ovs-appctl time/warp 200 100 -sleep 1 -AT_CHECK([grep 'in_port([[348]])' ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl -recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), actions: -recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), actions: -recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(frag=no), actions: -recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(frag=no), actions: -recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), actions: -recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), actions: +ovs-appctl time/warp 2000 100 +AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'in_port([[348]])' | strip_xout], [0], [dnl +recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: +recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions: ]) OVS_VSWITCHD_STOP AT_CLEANUP diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 0bb4fa652..a3e7facd3 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -4565,6 +4565,11 @@ port_configure_bond(struct port *port, struct bond_settings *s) port->name); } + s->primary = NULL; + if (s->balance == BM_AB || s->lacp_fallback_ab_cfg) { + s->primary = smap_get(&port->cfg->other_config, "bond-primary"); + } + miimon_interval = smap_get_int(&port->cfg->other_config, "bond-miimon-interval", 0); if (miimon_interval <= 0) { diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index b6acb34ca..5fd15ce4f 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2003,6 +2003,14 @@ This knob does not affect other balancing modes. + + If a slave interface with this name exists in the bond and + is up, it will be made active. Relevant only when is + active-backup. + +

    An important part of link bonding is detecting that links are down so -- GitLab From 55ec5df3b56700eec4b3e8312cff989f1fef1597 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 17 Jul 2020 01:44:15 +0200 Subject: [PATCH 225/432] AUTHORS: Add Jeff Squyres. Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index eb36a01d0..763f199ec 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -192,6 +192,7 @@ Jason Kölker jason@koelker.net Jason Wessel jason.wessel@windriver.com Jasper Capel jasper@capel.tv Jean Tourrilhes jt@hpl.hp.com +Jeff Squyres jsquyres@cisco.com Jeremy Stribling Jeroen van Bemmel jvb127@gmail.com Jesse Gross jesse@kernel.org -- GitLab From 9ecaa5cb71b6e4eedfa3566fa2c5b0f537198ac9 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 15 Jul 2020 15:40:49 +0300 Subject: [PATCH 226/432] rhel: openvswitch-fedora.spec.in: Fix installed but not packaged. With the cited commit, we get an error from rpmbuild about installed but not packaged /usr/lib64/libopenvswitchavx512.a. Fix it by treating it as the other la files. Fixes: 352b6c7116cd ("dpif-lookup: add avx512 gather implementation.") Signed-off-by: Roi Dayan Acked-by: Ian Stokes Tested-by: Greg Rose Acked-by: Greg Rose Signed-off-by: Ilya Maximets --- rhel/openvswitch-fedora.spec.in | 1 + 1 file changed, 1 insertion(+) diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in index 7bc8c34b8..e3e0d8acf 100644 --- a/rhel/openvswitch-fedora.spec.in +++ b/rhel/openvswitch-fedora.spec.in @@ -402,6 +402,7 @@ fi %{_includedir}/openvswitch/* %{_includedir}/openflow/* %exclude %{_libdir}/*.la +%exclude %{_libdir}/*.a %if 0%{?rhel} > 7 || 0%{?fedora} > 28 %files -n network-scripts-%{name} -- GitLab From 9af9dbcec933401ae95b4f51c47cc9db21a7b351 Mon Sep 17 00:00:00 2001 From: David Marchand Date: Mon, 13 Jul 2020 10:06:21 +0200 Subject: [PATCH 227/432] dpdk: Add commands to configure log levels. Enabling debug logs in dpdk can be a challenge to be sure of what is actually enabled, add commands to list and change those log levels. However, these commands do not help when tracking issues in dpdk init itself: dump log levels right after init. Example: $ ovs-appctl dpdk/log-list global log level is debug id 0: lib.eal, level is info id 1: lib.malloc, level is info id 2: lib.ring, level is info id 3: lib.mempool, level is info id 4: lib.timer, level is info id 5: pmd, level is info [...] id 37: pmd.net.bnxt.driver, level is notice id 38: pmd.net.e1000.init, level is notice id 39: pmd.net.e1000.driver, level is notice id 40: pmd.net.enic, level is info [...] $ ovs-appctl dpdk/log-set debug pmd.*:notice $ ovs-appctl dpdk/log-list global log level is debug id 0: lib.eal, level is debug id 1: lib.malloc, level is debug id 2: lib.ring, level is debug id 3: lib.mempool, level is debug id 4: lib.timer, level is debug id 5: pmd, level is debug [...] id 37: pmd.net.bnxt.driver, level is notice id 38: pmd.net.e1000.init, level is notice id 39: pmd.net.e1000.driver, level is notice id 40: pmd.net.enic, level is notice [...] Signed-off-by: David Marchand Signed-off-by: Ilya Maximets --- NEWS | 2 + lib/automake.mk | 1 + lib/dpdk-unixctl.man | 14 +++++ lib/dpdk.c | 110 +++++++++++++++++++++++++++++++++++-- vswitchd/ovs-vswitchd.8.in | 1 + 5 files changed, 123 insertions(+), 5 deletions(-) create mode 100644 lib/dpdk-unixctl.man diff --git a/NEWS b/NEWS index 0de9ee1af..9a3afc98f 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,8 @@ Post-v2.13.0 * OVS validated with DPDK 19.11.2, due to the inclusion of fixes for CVE-2020-10722, CVE-2020-10723, CVE-2020-10724, CVE-2020-10725 and CVE-2020-10726, this DPDK version is strongly recommended to be used. + * New 'ovs-appctl dpdk/log-list' and 'ovs-appctl dpdk/log-set' commands + to list and change log levels in DPDK components. - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: diff --git a/lib/automake.mk b/lib/automake.mk index eca448a5a..920c958e3 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -543,6 +543,7 @@ MAN_FRAGMENTS += \ lib/daemon-syn.man \ lib/db-ctl-base.man \ lib/dpctl.man \ + lib/dpdk-unixctl.man \ lib/memory-unixctl.man \ lib/netdev-dpdk-unixctl.man \ lib/dpif-netdev-unixctl.man \ diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man new file mode 100644 index 000000000..2d6d576f2 --- /dev/null +++ b/lib/dpdk-unixctl.man @@ -0,0 +1,14 @@ +.SS "DPDK COMMANDS" +These commands manage DPDK components. +.IP "\fBdpdk/log-list\fR" +Lists all DPDK components that emit logs and their logging levels. +.IP "\fBdpdk/log-set\fR [\fIspec\fR]" +Sets DPDK components logging level. Without any \fIspec\fR, sets the logging +\fBlevel\fR for all DPDK components to \fBdebug\fR. Otherwise, \fIspec\fR is a +list of words separated by spaces: a word can be either a logging \fBlevel\fR +(\fBemergency\fR, \fBalert\fR, \fBcritical\fR, \fBerror\fR, \fBwarning\fR, +\fBnotice\fR, \fBinfo\fR or \fBdebug\fR) or a \fBpattern\fR matching DPDK +components (see \fBdpdk/log-list\fR command on \fBovs\-appctl\fR(8)) separated +by a colon from the logging \fBlevel\fR to apply. +.RE +. diff --git a/lib/dpdk.c b/lib/dpdk.c index e46f56ba6..2f235a742 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -37,6 +37,7 @@ #include "ovs-numa.h" #include "smap.h" #include "svec.h" +#include "unixctl.h" #include "util.h" #include "vswitch-idl.h" @@ -262,6 +263,99 @@ static cookie_io_functions_t dpdk_log_func = { .write = dpdk_log_write, }; +static void +dpdk_unixctl_mem_stream(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[] OVS_UNUSED, void *aux) +{ + void (*callback)(FILE *) = aux; + char *response = NULL; + FILE *stream; + size_t size; + + stream = open_memstream(&response, &size); + if (!stream) { + response = xasprintf("Unable to open memstream: %s.", + ovs_strerror(errno)); + unixctl_command_reply_error(conn, response); + goto out; + } + + callback(stream); + fclose(stream); + unixctl_command_reply(conn, response); +out: + free(response); +} + +static int +dpdk_parse_log_level(const char *s) +{ + static const char * const levels[] = { + [RTE_LOG_EMERG] = "emergency", + [RTE_LOG_ALERT] = "alert", + [RTE_LOG_CRIT] = "critical", + [RTE_LOG_ERR] = "error", + [RTE_LOG_WARNING] = "warning", + [RTE_LOG_NOTICE] = "notice", + [RTE_LOG_INFO] = "info", + [RTE_LOG_DEBUG] = "debug", + }; + int i; + + for (i = 1; i < ARRAY_SIZE(levels); ++i) { + if (!strcmp(s, levels[i])) { + return i; + } + } + return -1; +} + +static void +dpdk_unixctl_log_set(struct unixctl_conn *conn, int argc, const char *argv[], + void *aux OVS_UNUSED) +{ + int i; + + /* With no argument, set all components level to 'debug'. */ + if (argc == 1) { + rte_log_set_level_pattern("*", RTE_LOG_DEBUG); + } + for (i = 1; i < argc; i++) { + char *err_msg = NULL; + char *level_string; + char *pattern; + char *s; + int level; + + s = xstrdup(argv[i]); + level_string = strchr(s, ':'); + if (level_string == NULL) { + pattern = "*"; + level_string = s; + } else { + pattern = s; + level_string[0] = '\0'; + level_string++; + } + + level = dpdk_parse_log_level(level_string); + if (level == -1) { + err_msg = xasprintf("invalid log level: '%s'", level_string); + } else if (rte_log_set_level_pattern(pattern, level) < 0) { + err_msg = xasprintf("cannot set log level for '%s'", argv[i]); + } + + if (err_msg) { + unixctl_command_reply_error(conn, err_msg); + free(err_msg); + free(s); + return; + } + free(s); + } + unixctl_command_reply(conn, NULL); +} + static bool dpdk_init__(const struct smap *ovs_other_config) { @@ -414,18 +508,24 @@ dpdk_init__(const struct smap *ovs_other_config) FILE *stream = open_memstream(&response, &size); if (stream) { + fprintf(stream, "rte_memzone_dump:\n"); rte_memzone_dump(stream); + fprintf(stream, "rte_log_dump:\n"); + rte_log_dump(stream); fclose(stream); - if (size) { - VLOG_DBG("rte_memzone_dump:\n%s", response); - } + VLOG_DBG("%s", response); free(response); } else { - VLOG_DBG("Could not dump memzone. Unable to open memstream: %s.", - ovs_strerror(errno)); + VLOG_DBG("Could not dump memzone and log levels. " + "Unable to open memstream: %s.", ovs_strerror(errno)); } } + unixctl_command_register("dpdk/log-list", "", 0, 0, + dpdk_unixctl_mem_stream, rte_log_dump); + unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0, + INT_MAX, dpdk_unixctl_log_set, NULL); + /* We are called from the main thread here */ RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID; diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index 0ad8bd2bc..c06452928 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -272,6 +272,7 @@ type). .. .so lib/dpctl.man . +.so lib/dpdk-unixctl.man .so lib/dpif-netdev-unixctl.man .so lib/netdev-dpdk-unixctl.man .so ofproto/ofproto-dpif-unixctl.man -- GitLab From 8231c9f624b3e0c4f0b28f78767240292a65d5ef Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Tue, 23 Jun 2020 18:48:38 +0200 Subject: [PATCH 228/432] acinclude: Remove libmnl for MLX5 PMD. libmnl is not used anymore for MLX5 PMD since DPDK 19.08. Signed-off-by: Timothy Redaelli Acked-by: Numan Siddique Reviewed-by: David Marchand Signed-off-by: Ilya Maximets --- acinclude.m4 | 1 - 1 file changed, 1 deletion(-) diff --git a/acinclude.m4 b/acinclude.m4 index 0f1986184..4bac9dbdd 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -396,7 +396,6 @@ AC_DEFUN([OVS_CHECK_DPDK], [ ], [], [[#include ]]) AC_CHECK_DECL([RTE_LIBRTE_MLX5_PMD], [dnl found - OVS_FIND_DEPENDENCY([mnl_attr_put], [mnl], [libmnl]) AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found OVS_FIND_DEPENDENCY([mlx5dv_create_wq], [mlx5], [libmlx5]) OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs]) -- GitLab From 68a95c9ca7cea83f91c7fd152608e8ee6621013d Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 14 Jul 2020 10:24:41 +0300 Subject: [PATCH 229/432] checkpatch: Add argument to skip gerrit change id check. This arg can be used internally by groups using gerrit for code reviews. Acked-by: Flavio Leitner Signed-off-by: Roi Dayan Signed-off-by: Ilya Maximets --- utilities/checkpatch.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py index fc9e20bf1..7f1d21a40 100755 --- a/utilities/checkpatch.py +++ b/utilities/checkpatch.py @@ -182,6 +182,7 @@ __regex_if_macros = re.compile(r'^ +(%s) \([\S]([\s\S]+[\S])*\) { +\\' % skip_leading_whitespace_check = False skip_trailing_whitespace_check = False +skip_gerrit_change_id_check = False skip_block_whitespace_check = False skip_signoff_check = False @@ -814,7 +815,8 @@ def ovs_checkpatch_parse(text, filename, author=None, committer=None): elif is_co_author.match(line): m = is_co_author.match(line) co_authors.append(m.group(2)) - elif is_gerrit_change_id.match(line): + elif (is_gerrit_change_id.match(line) and + not skip_gerrit_change_id_check): print_error( "Remove Gerrit Change-Id's before submitting upstream.") print("%d: %s\n" % (lineno, line)) @@ -885,7 +887,8 @@ Check options: -s|--skip-signoff-lines Tolerate missing Signed-off-by line -S|--spellcheck Check C comments and commit-message for possible spelling mistakes --t|--skip-trailing-whitespace Skips the trailing whitespace test""" +-t|--skip-trailing-whitespace Skips the trailing whitespace test + --skip-gerrit-change-id Skips the gerrit change id test""" % sys.argv[0]) @@ -942,6 +945,7 @@ if __name__ == '__main__': "skip-leading-whitespace", "skip-signoff-lines", "skip-trailing-whitespace", + "skip-gerrit-change-id", "spellcheck", "quiet"]) except: @@ -960,6 +964,8 @@ if __name__ == '__main__': skip_signoff_check = True elif o in ("-t", "--skip-trailing-whitespace"): skip_trailing_whitespace_check = True + elif o in ("--skip-gerrit-change-id"): + skip_gerrit_change_id_check = True elif o in ("-f", "--check-file"): checking_file = True elif o in ("-S", "--spellcheck"): -- GitLab From a5cef5eb82c8696b568208de9448fb62500cdc81 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 17 Jul 2020 03:38:31 +0200 Subject: [PATCH 230/432] Prepare for 2.14.0. Acked-by: Ian Stokes Signed-off-by: Ilya Maximets --- NEWS | 2 +- configure.ac | 2 +- debian/changelog | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 9a3afc98f..43dde98a3 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Post-v2.13.0 +v2.14.0 - xx xxx xxxx --------------------- - ovs-vswitchd no longer deletes datapath flows on exit by default. - OpenFlow: diff --git a/configure.ac b/configure.ac index da76cd8a5..e9c4e5503 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.13.90, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.14.0, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/debian/changelog b/debian/changelog index d5c1db839..1fab6e04b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -openvswitch (2.13.90-1) unstable; urgency=low +openvswitch (2.14.0-1) unstable; urgency=low * New upstream version - -- Open vSwitch team Tue, 21 Jan 2020 12:44:30 -0700 + -- Open vSwitch team Fri, 17 Jul 2020 03:36:19 +0200 openvswitch (2.13.0-1) unstable; urgency=low [ Open vSwitch team] -- GitLab From 9cfb1d0f7d65b52cc8f14d05c9e9ea8b64f90773 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 17 Jul 2020 03:41:32 +0200 Subject: [PATCH 231/432] Prepare for post-2.14.0 (2.14.90). Acked-by: Ian Stokes Signed-off-by: Ilya Maximets --- NEWS | 4 ++++ configure.ac | 2 +- debian/changelog | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 43dde98a3..dceda95a3 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Post-v2.14.0 +--------------------- + + v2.14.0 - xx xxx xxxx --------------------- - ovs-vswitchd no longer deletes datapath flows on exit by default. diff --git a/configure.ac b/configure.ac index e9c4e5503..8d37af9db 100644 --- a/configure.ac +++ b/configure.ac @@ -13,7 +13,7 @@ # limitations under the License. AC_PREREQ(2.63) -AC_INIT(openvswitch, 2.14.0, bugs@openvswitch.org) +AC_INIT(openvswitch, 2.14.90, bugs@openvswitch.org) AC_CONFIG_SRCDIR([datapath/datapath.c]) AC_CONFIG_MACRO_DIR([m4]) AC_CONFIG_AUX_DIR([build-aux]) diff --git a/debian/changelog b/debian/changelog index 1fab6e04b..fd88fec3b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +openvswitch (2.14.90-1) unstable; urgency=low + + * New upstream version + + -- Open vSwitch team Fri, 17 Jul 2020 03:40:20 +0200 + openvswitch (2.14.0-1) unstable; urgency=low * New upstream version -- GitLab From 9e11517e6ca6814c8927cea78df98503890a21e2 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 21 Jul 2020 14:47:32 +0200 Subject: [PATCH 232/432] ovs-router: Fix flushing of local routes. Since commit 8e4e45887ec3, priority of 'local' route entries no longer matches with 'plen'. This should be taken into account while flushing cached routes, otherwise they will remain in OVS even after removing them from the system: # ifconfig eth0 11.0.0.1 # ovs-appctl ovs/route/show --- A new route synchronized from kernel route table --- Cached: 11.0.0.1/32 dev eth0 SRC 11.0.0.1 local # ifconfig eth0 0 # ovs-appctl ovs/route/show -- the new route entry is still in ovs route table --- Cached: 11.0.0.1/32 dev eth0 SRC 11.0.0.1 local CC: wenxu Fixes: 8e4e45887ec3 ("ofproto-dpif-xlate: makes OVS native tunneling honor tunnel-specified source addresses") Reported-by: Zheng Jingzhou Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-July/373093.html Acked-by: William Tu Signed-off-by: Ilya Maximets Signed-off-by: William Tu --- AUTHORS.rst | 1 + lib/ovs-router.c | 2 +- tests/automake.mk | 3 ++- tests/system-route.at | 28 ++++++++++++++++++++++++++++ tests/system-userspace-testsuite.at | 1 + 5 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 tests/system-route.at diff --git a/AUTHORS.rst b/AUTHORS.rst index 763f199ec..10ce012ba 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -666,6 +666,7 @@ Ying Chen yingchen@vmware.com Yongqiang Liu liuyq7809@gmail.com ZHANG Zhiming zhangzhiming@yunshan.net.cn Zhangguanghui zhang.guanghui@h3c.com +Zheng Jingzhou glovejmm@163.com Ziyou Wang ziyouw@vmware.com ankur dwivedi ankurengg2003@gmail.com chen zhang 3zhangchen9211@gmail.com diff --git a/lib/ovs-router.c b/lib/ovs-router.c index bfb2b7071..09b81c6e5 100644 --- a/lib/ovs-router.c +++ b/lib/ovs-router.c @@ -505,7 +505,7 @@ ovs_router_flush(void) ovs_mutex_lock(&mutex); classifier_defer(&cls); CLS_FOR_EACH(rt, cr, &cls) { - if (rt->priority == rt->plen) { + if (rt->priority == rt->plen || rt->local) { rt_entry_delete__(&rt->cr); } } diff --git a/tests/automake.mk b/tests/automake.mk index cbba5b170..677b99a6b 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -154,7 +154,8 @@ SYSTEM_KMOD_TESTSUITE_AT = \ SYSTEM_USERSPACE_TESTSUITE_AT = \ tests/system-userspace-testsuite.at \ tests/system-userspace-macros.at \ - tests/system-userspace-packet-type-aware.at + tests/system-userspace-packet-type-aware.at \ + tests/system-route.at SYSTEM_TSO_TESTSUITE_AT = \ tests/system-tso-testsuite.at \ diff --git a/tests/system-route.at b/tests/system-route.at new file mode 100644 index 000000000..1714273e3 --- /dev/null +++ b/tests/system-route.at @@ -0,0 +1,28 @@ +AT_BANNER([system-route]) + +dnl Add an interface, add/del ip address, check that OVS catches route updates. +AT_SETUP([ovs-route - add/remove system route]) +AT_KEYWORDS([route]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Create tap port. +AT_CHECK([ip tuntap add name p1-route mode tap]) +AT_CHECK([ip link set p1-route up]) +on_exit 'ip link del p1-route' + +dnl Add ip address. +AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout]) + +dnl Check that OVS catches route updates. +OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl +Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17 +Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local +]) + +dnl Delete ip address. +AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout]) +dnl Check that routes was removed from OVS. +OVS_WAIT_UNTIL([test `ovs-appctl ovs/route/show | grep -c 'p1-route'` -eq 0 ]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at index b40da9579..2e9659a67 100644 --- a/tests/system-userspace-testsuite.at +++ b/tests/system-userspace-testsuite.at @@ -26,3 +26,4 @@ m4_include([tests/system-traffic.at]) m4_include([tests/system-layer3-tunnels.at]) m4_include([tests/system-interface.at]) m4_include([tests/system-userspace-packet-type-aware.at]) +m4_include([tests/system-route.at]) -- GitLab From 05062e814c466b4714deaa09d64f98241bebef5a Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Fri, 5 Jun 2020 16:24:53 -0300 Subject: [PATCH 233/432] docs: Remove duplicate word from vhost-user doc. Fixes: 49df3c0fe779 ("docs: DPDK isn't a datapath, so don't use the term.") Acked-by: Greg Rose Signed-off-by: Flavio Leitner Signed-off-by: William Tu --- Documentation/topics/dpdk/vhost-user.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index 4bc5aef59..b1eb5d9da 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -25,9 +25,9 @@ DPDK vHost User Ports ===================== -OVS userspace switching supports supports vHost user ports as a -primary way to interact with guests. For more information on vHost -User, refer to the `QEMU documentation`_ on same. +OVS userspace switching supports vHost user ports as a primary way to +interact with guests. For more information on vHost User, refer to +the `QEMU documentation`_ on same. .. important:: -- GitLab From d1c507a18b86ee8bad0e5fee5b22438083d31c73 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Thu, 16 Jul 2020 19:14:44 +0800 Subject: [PATCH 234/432] dpctl: Fix memory leak in dpctl_dump_flows() Goto label accurately to avoid memleak. Fixes: a692410af0f7 ("dpctl: Expand the flow dump type filter") Cc: Gavi Teitz Reviewed-by: Roi Dayan Signed-off-by: Tonghao Zhang Signed-off-by: William Tu --- lib/dpctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dpctl.c b/lib/dpctl.c index db2b1f896..09ae97f25 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -1031,7 +1031,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) memset(&dump_types, 0, sizeof dump_types); error = populate_dump_types(types_list, &dump_types, dpctl_p); if (error) { - goto out_free; + goto out_dpifclose; } determine_dpif_flow_dump_types(&dump_types, &dpif_dump_types); -- GitLab From 275f78f95a827c80e2f17988f3e452fbbbb7a09a Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 23 Jul 2020 17:17:24 +0200 Subject: [PATCH 235/432] dpif-netdev.at: Wait for miss upcall log. Some tests checks for 'miss upcall' log in a log file immediately after sending the packet, this causes test failures while running them under valgrind or on the overloaded system. Fix that by waiting for appearance of the actual string in the log file. Some other tests uses 'sleep 1' to fix that, but it's better to wait for event than sleep for a specific amount of time. Signed-off-by: Ilya Maximets Signed-off-by: William Tu --- tests/dpif-netdev.at | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index ec5ffc290..2862a3c9b 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -70,11 +70,13 @@ AT_CHECK([ovs-ofctl add-flow br0 action=normal]) ovs-appctl time/stop ovs-appctl time/warp 5000 AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:01,dst=50:54:00:00:02:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack)']) + OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log]) AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:01,dst=50:54:00:00:02:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack) ]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:06:00),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.6,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack)' --len 1024]) + OVS_WAIT_UNTIL([test `grep -c "miss upcall" ovs-vswitchd.log` -ge 2]) AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:06:00),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.6,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack) ]) @@ -134,8 +136,8 @@ m4_define([DPIF_NETDEV_MISS_FLOW_INSTALL], AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) ovs-appctl ofproto/trace 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)' - sleep 1 + OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log]) AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) @@ -147,8 +149,8 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50: AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled ]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) - sleep 1 + OVS_WAIT_UNTIL([test `grep -c "miss upcall" ovs-vswitchd.log` -ge 2]) AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) @@ -231,11 +233,12 @@ m4_define([DPIF_NETDEV_MISS_FLOW_DUMP], AT_CHECK([ovs-ofctl add-flow br0 action=normal]) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) - sleep 1 + OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log]) AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) + ovs-appctl revalidator/wait AT_CHECK([filter_flow_dump < ovs-vswitchd.log | strip_xout], [0], [dnl skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions: ]) @@ -246,11 +249,12 @@ skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label AT_CHECK([ovs-appctl upcall/disable-ufid], [0], [Datapath dumping tersely using UFID disabled ], []) AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) - sleep 1 + OVS_WAIT_UNTIL([test `grep -c "miss upcall" ovs-vswitchd.log` -ge 2]) AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) + ovs-appctl revalidator/wait AT_CHECK([filter_flow_dump < ovs-vswitchd.log | strip_xout], [0], [dnl skb_priority(0),skb_mark(0),ct_state(0/0xff),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions: -- GitLab From 850e834fa0ff0e40fb72140f0a7c40491c926e23 Mon Sep 17 00:00:00 2001 From: Toms Atteka Date: Wed, 22 Jul 2020 14:25:00 -0700 Subject: [PATCH 236/432] debian: Fixed openvswitch-test package dependency. Python3 does not have python3-twisted-web. Required codebase is inside python3-twisted. Fixes: 1ca0323e7c29 ("Require Python 3 and remove support for Python 2.") Signed-off-by: Toms Atteka Acked-by: Greg Rose Signed-off-by: Ilya Maximets --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 0646b22a1..6420b9d3e 100644 --- a/debian/control +++ b/debian/control @@ -188,7 +188,7 @@ Description: Python bindings for Open vSwitch Package: openvswitch-test Architecture: all Depends: python3, - python3-twisted-web, + python3-twisted, ${misc:Depends}, ${python3:Depends} Description: Open vSwitch test package -- GitLab From a4953c5fe37fbd108d276ca75c31d4ae7d06a62b Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Sat, 23 May 2020 18:33:20 +0800 Subject: [PATCH 237/432] Revert "dpif-netdev: includes microsecond delta in meter bucket calculation". This reverts commit 5c41c31ebd64fda821fb733a5784a7a440a794f8. Use the pktgen-dpdk to test the commit 5c41c31ebd64 ("dpif-netdev: includes microsecond delta in meter bucket calculation"), it does't work as expected. And it broken the meter function (e.g. set rate 200Mbps, the rate watched was 400Mbps). To reproduce it: $ ovs-vsctl add-br br-int -- set bridge br-int datapath_type=netdev $ ovs-ofctl -O OpenFlow13 add-meter br-int \ "meter=100 kbps burst stats bands=type=drop rate=200000 burst_size=200000" $ ovs-ofctl -O OpenFlow13 add-flow br-int \ "in_port=dpdk0 action=meter:100,output:dpdk1" $ pktgen -l 1,3,5,7,9,11,13,15,17,19 -n 8 --socket-mem 4096 \ --file-prefix pg1 -w 0000:82:00.0 -w 0000:82:00.1 -- \ -T -P -m "[3/5/7/9/11/13/15].[0-1]" -f meter-test.pkt meter-test.pkt: | set 0 count 0 | set 0 size 1500 | set 0 rate 100 | set 0 burst 64 | set 0 sport 1234 | set 0 dport 5678 | set 0 prime 1 | set 0 type ipv4 | set 0 proto udp | set 0 dst ip 1.1.1.2 | set 0 src ip 1.1.1.1/24 | set 0 dst mac ec:0d:9a:ab:54:0a | set 0 src mac ec:0d:9a:bf:df:bb | set 0 vlanid 0 | start 0 Note that the issue that patch 5c41c31ebd64 was intended to fix was already fixed by commit: 42697ca7757b ("dpif-netdev: fix meter at high packet rate.") Signed-off-by: Tonghao Zhang Signed-off-by: Ilya Maximets --- lib/dpif-netdev.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 2aad24511..02df8f11e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -6095,7 +6095,6 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, struct dp_packet *packet; long long int long_delta_t; /* msec */ uint32_t delta_t; /* msec */ - uint32_t delta_in_us; /* usec */ const size_t cnt = dp_packet_batch_size(packets_); uint32_t bytes, volume; int exceeded_band[NETDEV_MAX_BURST]; @@ -6126,9 +6125,6 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, Assuming that all racing threads received packets at the same time to avoid overflow. */ long_delta_t = 0; - delta_in_us = 0; - } else { - delta_in_us = (now - meter->used) % 1000; } /* Make sure delta_t will not be too large, so that bucket will not @@ -6164,7 +6160,6 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct dp_packet_batch *packets_, /* Update band's bucket. */ band->bucket += delta_t * band->up.rate; - band->bucket += delta_in_us * band->up.rate / 1000; if (band->bucket > band->up.burst_size) { band->bucket = band->up.burst_size; } -- GitLab From 5e06e7ac99dcbeb4ce0de94991e1d08f1a3a21b1 Mon Sep 17 00:00:00 2001 From: William Tu Date: Thu, 23 Jul 2020 09:32:06 -0700 Subject: [PATCH 238/432] tests: Refactor the iptables accept rule. Certain Linux distributions, like CentOS, have default iptable rules to reject input traffic from br-underlay. Refactor by creating a macro 'IPTABLES_ACCEPT([bridge])' for adding the accept rule to the iptable input chain. Signed-off-by: William Tu --- tests/ovs-macros.at | 7 +++++++ tests/system-traffic.at | 12 ++---------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at index fee509015..b1f666f4e 100644 --- a/tests/ovs-macros.at +++ b/tests/ovs-macros.at @@ -333,3 +333,10 @@ m4_ifndef([AT_FAIL_IF], [m4_define([AT_FAIL_IF], [AT_CHECK([($1) \ && exit 99 || exit 0], [0], [ignore], [ignore])])]) + +dnl Certain Linux distributions, like CentOS, have default iptable rules +dnl to reject input traffic from bridges such as br-underlay. +dnl Add a rule to always accept the traffic. +m4_define([IPTABLES_ACCEPT], + [AT_CHECK([iptables -I INPUT 1 -i $1 -j ACCEPT]) + on_exit 'iptables -D INPUT 1 -i $1']) diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 2a0fbadff..02f0e2716 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -688,11 +688,7 @@ AT_CHECK([ip link set dev br-underlay up]) dnl Set up tunnel endpoints on OVS outside the namespace. ADD_OVS_TUNNEL([gre], [br0], [at_gre0], [172.31.1.1], [10.1.1.100/24]) -dnl Certain Linux distributions, like CentOS, have default iptable rules -dnl to reject input traffic from br-underlay. Here we add a rule to walk -dnl around it. -iptables -I INPUT 1 -i br-underlay -j ACCEPT -on_exit 'iptables -D INPUT 1' +IPTABLES_ACCEPT([br-underlay]) ip netns exec at_ns0 tcpdump -n -i p0 dst host 172.31.1.1 -l > p0.pcap & sleep 1 @@ -739,11 +735,7 @@ dnl Set up tunnel endpoints on OVS outside the namespace and emulate a native dnl linux device inside the namespace. ADD_OVS_TUNNEL([erspan], [br0], [at_erspan0], [172.31.1.1], [10.1.1.100/24], [options:key=1 options:erspan_ver=1 options:erspan_idx=7]) -dnl Certain Linux distributions, like CentOS, have default iptable rules -dnl to reject input traffic from br-underlay. Here we add a rule to walk -dnl around it. -iptables -I INPUT 1 -i br-underlay -j ACCEPT -on_exit 'iptables -D INPUT 1' +IPTABLES_ACCEPT([br-underlay]) ip netns exec at_ns0 tcpdump -n -x -i p0 dst host 172.31.1.1 -l > p0.pcap & sleep 1 -- GitLab From b793a65c1667c028d94d91e3cd5e36ca26e38d8b Mon Sep 17 00:00:00 2001 From: Yifeng Sun Date: Mon, 27 Jul 2020 12:27:23 -0700 Subject: [PATCH 239/432] bfd: Support overlay BFD Current OVS intercepts and processes all BFD packets, thus VM-2-VM BFD packets get lost and the recipient VM never sees them. This patch fixes it by only intercepting and processing BFD packets destined to a configured BFD instance, and other BFD packets are made available to the OVS flow table for forwarding. This patch keeps BFD's backward compatibility. VMware-BZ: #2579326 Signed-off-by: Yifeng Sun Signed-off-by: William Tu --- lib/bfd.c | 16 +++++++++++++--- tests/bfd.at | 32 ++++++++++++++++++++++++++++++++ vswitchd/vswitch.xml | 7 +++++++ 3 files changed, 52 insertions(+), 3 deletions(-) diff --git a/lib/bfd.c b/lib/bfd.c index cc8c6857a..3c965699a 100644 --- a/lib/bfd.c +++ b/lib/bfd.c @@ -149,6 +149,9 @@ BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg)); #define FLAGS_MASK 0x3f #define DEFAULT_MULT 3 +#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */ +#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */ + struct bfd { struct hmap_node node; /* In 'all_bfds'. */ uint32_t disc; /* bfd.LocalDiscr. Key in 'all_bfds' hmap. */ @@ -457,9 +460,9 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg, &bfd->rmt_eth_dst); bfd_lookup_ip(smap_get_def(cfg, "bfd_src_ip", ""), - htonl(0xA9FE0101) /* 169.254.1.1 */, &bfd->ip_src); + htonl(BFD_DEFAULT_SRC_IP), &bfd->ip_src); bfd_lookup_ip(smap_get_def(cfg, "bfd_dst_ip", ""), - htonl(0xA9FE0100) /* 169.254.1.0 */, &bfd->ip_dst); + htonl(BFD_DEFAULT_DST_IP), &bfd->ip_dst); forwarding_if_rx = smap_get_bool(cfg, "forwarding_if_rx", false); if (bfd->forwarding_if_rx != forwarding_if_rx) { @@ -674,7 +677,14 @@ bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow, memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto); if (flow->nw_proto == IPPROTO_UDP && !(flow->nw_frag & FLOW_NW_FRAG_LATER) - && tp_dst_equals(flow, BFD_DEST_PORT, wc)) { + && tp_dst_equals(flow, BFD_DEST_PORT, wc) + && (bfd->ip_src == htonl(BFD_DEFAULT_SRC_IP) + || bfd->ip_src == flow->nw_dst)) { + + if (bfd->ip_src == flow->nw_dst) { + memset(&wc->masks.nw_dst, 0xffffffff, sizeof wc->masks.nw_dst); + } + bool check_tnl_key; atomic_read_relaxed(&bfd->check_tnl_key, &check_tnl_key); diff --git a/tests/bfd.at b/tests/bfd.at index 8654ca5db..f5c6409f6 100644 --- a/tests/bfd.at +++ b/tests/bfd.at @@ -1101,3 +1101,35 @@ BFD_CHECK_MULT([p1], [3], [3]) OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([bfd - overlay]) +OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ + options:remote_ip=2.2.2.2 ofport_request=1 -- \ + set interface p1 bfd:enable=true bfd:bfd_src_ip=2.2.2.1 -- \ + set bridge br0 fail-mode=standalone]) + +# Userspace slow path handles normal BFD packets. +AT_CHECK([ovs-appctl ofproto/trace --l7-len 0 ovs-dummy 'tunnel(tun_id=0x0,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,tp_src=0,tp_dst=0,flags()),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:55,dst=00:23:20:00:00:01),eth_type(0x0800),ipv4(src=2.2.2.2/0.0.0.0,dst=2.2.2.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout]) +# check that the packet should be handled as BFD packet. +AT_CHECK([tail -2 stdout], [0], [dnl +This flow is handled by the userspace slow path because it: + - Consists of BFD packets. +], []) + +# Userspace slow path won't handle overlay BFD packets. Instead, other OVS flows, if configured, will handle them. +AT_CHECK([ovs-appctl ofproto/trace --l7-len 0 ovs-dummy 'tunnel(tun_id=0x0,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,tp_src=0,tp_dst=0,flags()),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:66,dst=00:23:20:00:00:77),eth_type(0x0800),ipv4(src=192.168.2.2/0.0.0.0,dst=192.168.2.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout]) +AT_CHECK([tail -10 stdout], [0], [dnl +bridge("br0") +------------- + 0. priority 0 + NORMAL + -> learned that 00:11:22:33:44:66 is on port p1 in VLAN 0 + -> no learned MAC for destination, flooding + +Final flow: unchanged +Megaflow: recirc_id=0,eth,udp,tun_id=0,tun_src=2.2.2.2,tun_dst=2.2.2.1,tun_tos=0,tun_flags=-df-csum+key,in_port=1,dl_src=00:11:22:33:44:66,dl_dst=00:23:20:00:00:77,nw_frag=no,tp_dst=3784 +Datapath actions: 100 +], []) + +OVS_VSWITCHD_STOP +AT_CLEANUP diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 5fd15ce4f..81c84927f 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3670,6 +3670,13 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ optional Authentication or ``Echo Mode'' features.

    +

    + OVS 2.13 and earlier intercepted and processed all BFD packets. + OVS 2.14 and later only intercept and process BFD packets destined + to a configured BFD instance, and other BFD packets are made available + to the OVS flow table for forwarding. +

    +

    A controller sets up key-value pairs in the -- GitLab From 3f5dff904f468754ecbf1d2ff121169d21593184 Mon Sep 17 00:00:00 2001 From: Jinjun Gao Date: Thu, 23 Jul 2020 12:05:51 +0800 Subject: [PATCH 240/432] datapath-windows: Reset ct_mark/ct_label to support ALG The ct_mark/ct_label setting on related connection keep the same behavior with Linux datapath. If one CT entry has parent/master entry, its ct_mark and ct_label should inherit from the corresponding part of parent/master entry at initialization. Signed-off-by: Jinjun Gao Acked-by: Alin Gabriel Serdean Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Conntrack.c | 86 ++++++++++++++++++----------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c index d0655911b..2610d626a 100644 --- a/datapath-windows/ovsext/Conntrack.c +++ b/datapath-windows/ovsext/Conntrack.c @@ -789,60 +789,82 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx, static __inline VOID OvsConntrackSetMark(OvsFlowKey *key, POVS_CT_ENTRY entry, - UINT32 value, - UINT32 mask, + MD_MARK *mark, BOOLEAN *markChanged) { - UINT32 newMark; - newMark = value | (entry->mark & ~(mask)); - if (entry->mark != newMark) { + POVS_CT_ENTRY parent = entry->parent; + BOOLEAN changed = FALSE; + UINT32 newMark = 0; + + if (parent && parent->mark) { + newMark = parent->mark; + changed = TRUE; + } else if (mark) { + newMark = mark->value | (entry->mark & ~(mark->mask)); + changed = TRUE; + } + + if (changed && entry->mark != newMark) { entry->mark = newMark; key->ct.mark = newMark; *markChanged = TRUE; } } +static __inline BOOLEAN +OvsConntrackIsLabelsNonZero(const struct ovs_key_ct_labels *labels) +{ + UINT8 i; + + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) { + if (labels->ct_labels_32[i]) { + return TRUE; + } + } + + return FALSE; +} + static __inline void OvsConntrackSetLabels(OvsFlowKey *key, POVS_CT_ENTRY entry, - struct ovs_key_ct_labels *val, - struct ovs_key_ct_labels *mask, + MD_LABELS *labels, BOOLEAN *labelChanged) { - ovs_u128 v, m, pktMdLabel = {0}; - memcpy(&v, val, sizeof v); - memcpy(&m, mask, sizeof m); - memcpy(&pktMdLabel, &entry->labels, sizeof(struct ovs_key_ct_labels)); + POVS_CT_ENTRY parent = entry->parent; - pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo)); - pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi)); + /* Inherit master's labels at labels initialization, if any. */ + if (!OvsConntrackIsLabelsNonZero(&entry->labels) && + parent && OvsConntrackIsLabelsNonZero(&parent->labels)) { + RtlCopyMemory(&entry->labels, &parent->labels, OVS_CT_LABELS_LEN); + *labelChanged = TRUE; + } + + /* Update labels according to value of ct_label in ct commit */ + if (labels && OvsConntrackIsLabelsNonZero(&labels->mask)) { + UINT8 i; + UINT32 *dst = entry->labels.ct_labels_32; + for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) { + dst[i] = (dst[i] & ~(labels->mask.ct_labels_32[i])) | + (labels->value.ct_labels_32[i] & labels->mask.ct_labels_32[i]); + } - if (!NdisEqualMemory(&entry->labels, &pktMdLabel, - sizeof(struct ovs_key_ct_labels))) { *labelChanged = TRUE; } - NdisMoveMemory(&entry->labels, &pktMdLabel, - sizeof(struct ovs_key_ct_labels)); - NdisMoveMemory(&key->ct.labels, &pktMdLabel, - sizeof(struct ovs_key_ct_labels)); + + /* Update flow key's ct labels */ + NdisMoveMemory(&key->ct.labels, &entry->labels, OVS_CT_LABELS_LEN); } static void OvsCtSetMarkLabel(OvsFlowKey *key, - POVS_CT_ENTRY entry, - MD_MARK *mark, - MD_LABELS *labels, - BOOLEAN *triggerUpdateEvent) + POVS_CT_ENTRY entry, + MD_MARK *mark, + MD_LABELS *labels, + BOOLEAN *triggerUpdateEvent) { - if (mark) { - OvsConntrackSetMark(key, entry, mark->value, mark->mask, - triggerUpdateEvent); - } - - if (labels) { - OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask, - triggerUpdateEvent); - } + OvsConntrackSetMark(key, entry, mark, triggerUpdateEvent); + OvsConntrackSetLabels(key, entry, labels, triggerUpdateEvent); } /* -- GitLab From ba73001b6e1d1048df149d7d024e5fecd8ca618d Mon Sep 17 00:00:00 2001 From: Jinjun Gao Date: Wed, 29 Jul 2020 11:33:18 +0800 Subject: [PATCH 241/432] datapath-windows: Update flow key in SET action The flow key is not updated when process OVS_ACTION_ATTR_SET action. It will impact follow-up actions, such as, conntrack module cannot find created conntrack entry if passing old flow key to it. Reported-by: Rui Cao Signed-off-by: Jinjun Gao Signed-off-by: Alin Gabriel Serdean --- datapath-windows/ovsext/Actions.c | 31 ++++++++++++++++++++++++------- datapath-windows/ovsext/Actions.h | 3 +++ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c index 4a11cea5e..4f4336984 100644 --- a/datapath-windows/ovsext/Actions.c +++ b/datapath-windows/ovsext/Actions.c @@ -1259,6 +1259,7 @@ OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx, */ static __inline NDIS_STATUS OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_ethernet *ethAttr) { PNET_BUFFER curNb; @@ -1285,9 +1286,11 @@ OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx, } ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb)); - RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, - sizeof ethHdr->Destination); - RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source); + RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, ETH_ADDR_LENGTH); + RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, ETH_ADDR_LENGTH); + /* Update l2 flow key */ + RtlCopyMemory(key->l2.dlDst, ethAttr->eth_dst, ETH_ADDR_LENGTH); + RtlCopyMemory(key->l2.dlSrc, ethAttr->eth_src, ETH_ADDR_LENGTH); return NDIS_STATUS_SUCCESS; } @@ -1376,6 +1379,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, */ NDIS_STATUS OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_udp *udpAttr) { PUINT8 bufferStart; @@ -1400,15 +1404,19 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, udpHdr->check = ChecksumUpdate16(udpHdr->check, udpHdr->source, udpAttr->udp_src); udpHdr->source = udpAttr->udp_src; + key->ipKey.l4.tpSrc = udpAttr->udp_src; } if (udpHdr->dest != udpAttr->udp_dst) { udpHdr->check = ChecksumUpdate16(udpHdr->check, udpHdr->dest, udpAttr->udp_dst); udpHdr->dest = udpAttr->udp_dst; + key->ipKey.l4.tpDst = udpAttr->udp_dst; } } else { udpHdr->source = udpAttr->udp_src; + key->ipKey.l4.tpSrc = udpAttr->udp_src; udpHdr->dest = udpAttr->udp_dst; + key->ipKey.l4.tpDst = udpAttr->udp_dst; } return NDIS_STATUS_SUCCESS; @@ -1423,6 +1431,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, */ NDIS_STATUS OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_tcp *tcpAttr) { PUINT8 bufferStart; @@ -1447,11 +1456,13 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, tcpHdr->check = ChecksumUpdate16(tcpHdr->check, tcpHdr->source, tcpAttr->tcp_src); tcpHdr->source = tcpAttr->tcp_src; + key->ipKey.l4.tpSrc = tcpAttr->tcp_src; } if (tcpHdr->dest != tcpAttr->tcp_dst) { tcpHdr->check = ChecksumUpdate16(tcpHdr->check, tcpHdr->dest, tcpAttr->tcp_dst); tcpHdr->dest = tcpAttr->tcp_dst; + key->ipKey.l4.tpDst = tcpAttr->tcp_dst; } return NDIS_STATUS_SUCCESS; @@ -1579,6 +1590,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx, */ NDIS_STATUS OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_ipv4 *ipAttr) { PUINT8 bufferStart; @@ -1632,6 +1644,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ipAttr->ipv4_src); } ipHdr->saddr = ipAttr->ipv4_src; + key->ipKey.nwSrc = ipAttr->ipv4_src; } if (ipHdr->daddr != ipAttr->ipv4_dst) { if (tcpHdr) { @@ -1647,6 +1660,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ipAttr->ipv4_dst); } ipHdr->daddr = ipAttr->ipv4_dst; + key->ipKey.nwDst = ipAttr->ipv4_dst; } if (ipHdr->protocol != ipAttr->ipv4_proto) { UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00; @@ -1661,6 +1675,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto); } ipHdr->protocol = ipAttr->ipv4_proto; + key->ipKey.nwProto = ipAttr->ipv4_proto; } if (ipHdr->ttl != ipAttr->ipv4_ttl) { UINT16 oldTtl = (ipHdr->ttl) & 0xff; @@ -1669,6 +1684,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl); } ipHdr->ttl = ipAttr->ipv4_ttl; + key->ipKey.nwTtl = ipAttr->ipv4_ttl; } return NDIS_STATUS_SUCCESS; @@ -1691,12 +1707,12 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, switch (type) { case OVS_KEY_ATTR_ETHERNET: - status = OvsUpdateEthHeader(ovsFwdCtx, + status = OvsUpdateEthHeader(ovsFwdCtx, key, NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet))); break; case OVS_KEY_ATTR_IPV4: - status = OvsUpdateIPv4Header(ovsFwdCtx, + status = OvsUpdateIPv4Header(ovsFwdCtx, key, NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4))); break; @@ -1709,16 +1725,17 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx, status = SUCCEEDED(convertStatus) ? NDIS_STATUS_SUCCESS : NDIS_STATUS_FAILURE; ASSERT(status == NDIS_STATUS_SUCCESS); RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey); + RtlCopyMemory(&key->tunKey, &tunKey, sizeof key->tunKey); break; } case OVS_KEY_ATTR_UDP: - status = OvsUpdateUdpPorts(ovsFwdCtx, + status = OvsUpdateUdpPorts(ovsFwdCtx, key, NlAttrGetUnspec(a, sizeof(struct ovs_key_udp))); break; case OVS_KEY_ATTR_TCP: - status = OvsUpdateTcpPorts(ovsFwdCtx, + status = OvsUpdateTcpPorts(ovsFwdCtx, key, NlAttrGetUnspec(a, sizeof(struct ovs_key_tcp))); break; diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h index fd050d5dd..bc12e1166 100644 --- a/datapath-windows/ovsext/Actions.h +++ b/datapath-windows/ovsext/Actions.h @@ -115,14 +115,17 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx, NDIS_STATUS OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_udp *udpAttr); NDIS_STATUS OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_tcp *tcpAttr); NDIS_STATUS OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx, + OvsFlowKey *key, const struct ovs_key_ipv4 *ipAttr); NDIS_STATUS -- GitLab From e8bf77748ab8661391d6e00f5e51df5b02faeefe Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 27 Jul 2020 17:41:35 +0200 Subject: [PATCH 242/432] odp-util: Fix clearing match mask if set action is partially unnecessary. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While committing set() actions, commit() could wildcard all the fields that are same in match key and in the set action. This leads to situation where mask after commit could actually contain less bits than it was before. And if set action was partially committed, all the fields that were the same will be cleared out from the matching key resulting in the incorrect (too wide) flow. For example, for the flow that matches on both src and dst mac addresses, if the dst mac is the same and only src should be changed by the set() action, destination address will be wildcarded in the match key and will never be matched, i.e. flows with any destination mac will match, which is not correct. Setting OF rule: in_port=1,dl_src=50:54:00:00:00:09 actions=mod_dl_dst(50:54:00:00:00:0a),output(2) Sending following packets on port 1: 1. eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800) 2. eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800) 3. eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800) Resulted datapath flows: eth(dst=50:54:00:00:00:0c),<...>, actions:set(eth(dst=50:54:00:00:00:0a)),2 eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),<...>, actions:2 The first flow doesn't have any match on source MAC address and the third packet successfully matched on it while it must be dropped. Fix that by updating the match mask with only the new bits set by commit(), but keeping those that were cleared (OR operation). With fix applied, resulted correct flows are: eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),<...>, actions:2 eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),<...>, actions:set(eth(dst=50:54:00:00:00:0a)),2 eth(src=50:54:00:00:00:0b),<...>, actions:drop The code before commit dbf4a92800d0 was not able to reduce the mask, it was only possible to expand it to exact match, so it was OK to update original matching mask with the new value in all cases. Fixes: dbf4a92800d0 ("odp-util: Do not rewrite fields with the same values as matched") Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1854376 Acked-by: Eli Britstein Tested-by: Adrián Moreno Signed-off-by: Ilya Maximets --- lib/odp-util.c | 67 ++++++++++++++++++++++++++++++++----------- lib/util.c | 13 +++++++++ lib/util.h | 1 + tests/ofproto-dpif.at | 23 +++++++++++++++ 4 files changed, 88 insertions(+), 16 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 011db9ebb..e54a78b43 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -7701,6 +7701,28 @@ struct offsetof_sizeof { int size; }; + +/* Performs bitwise OR over the fields in 'dst_' and 'src_' specified in + * 'offsetof_sizeof_arr' array. Result is stored in 'dst_'. */ +static void +or_masks(void *dst_, const void *src_, + struct offsetof_sizeof *offsetof_sizeof_arr) +{ + int field, size, offset; + const uint8_t *src = src_; + uint8_t *dst = dst_; + + for (field = 0; ; field++) { + size = offsetof_sizeof_arr[field].size; + offset = offsetof_sizeof_arr[field].offset; + + if (!size) { + return; + } + or_bytes(dst + offset, src + offset, size); + } +} + /* Compares each of the fields in 'key0' and 'key1'. The fields are specified * in 'offsetof_sizeof_arr', which is an array terminated by a 0-size field. * Returns true if all of the fields are equal, false if at least one differs. @@ -7779,9 +7801,10 @@ commit_set_ether_action(const struct flow *flow, struct flow *base_flow, struct flow_wildcards *wc, bool use_masked) { - struct ovs_key_ethernet key, base, mask; + struct ovs_key_ethernet key, base, mask, orig_mask; struct offsetof_sizeof ovs_key_ethernet_offsetof_sizeof_arr[] = OVS_KEY_ETHERNET_OFFSETOF_SIZEOF_ARR; + if (flow->packet_type != htonl(PT_ETH)) { return; } @@ -7789,11 +7812,13 @@ commit_set_ether_action(const struct flow *flow, struct flow *base_flow, get_ethernet_key(flow, &key); get_ethernet_key(base_flow, &base); get_ethernet_key(&wc->masks, &mask); + memcpy(&orig_mask, &mask, sizeof mask); if (commit(OVS_KEY_ATTR_ETHERNET, use_masked, &key, &base, &mask, sizeof key, ovs_key_ethernet_offsetof_sizeof_arr, odp_actions)) { put_ethernet_key(&base, base_flow); + or_masks(&mask, &orig_mask, ovs_key_ethernet_offsetof_sizeof_arr); put_ethernet_key(&mask, &wc->masks); } } @@ -7917,7 +7942,7 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow, struct ofpbuf *odp_actions, struct flow_wildcards *wc, bool use_masked) { - struct ovs_key_ipv4 key, mask, base; + struct ovs_key_ipv4 key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_ipv4_offsetof_sizeof_arr[] = OVS_KEY_IPV4_OFFSETOF_SIZEOF_ARR; @@ -7928,6 +7953,7 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow, get_ipv4_key(flow, &key, false); get_ipv4_key(base_flow, &base, false); get_ipv4_key(&wc->masks, &mask, true); + memcpy(&orig_mask, &mask, sizeof mask); mask.ipv4_proto = 0; /* Not writeable. */ mask.ipv4_frag = 0; /* Not writable. */ @@ -7939,9 +7965,8 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow, if (commit(OVS_KEY_ATTR_IPV4, use_masked, &key, &base, &mask, sizeof key, ovs_key_ipv4_offsetof_sizeof_arr, odp_actions)) { put_ipv4_key(&base, base_flow, false); - if (mask.ipv4_proto != 0) { /* Mask was changed by commit(). */ - put_ipv4_key(&mask, &wc->masks, true); - } + or_masks(&mask, &orig_mask, ovs_key_ipv4_offsetof_sizeof_arr); + put_ipv4_key(&mask, &wc->masks, true); } } @@ -7974,7 +7999,7 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow, struct ofpbuf *odp_actions, struct flow_wildcards *wc, bool use_masked) { - struct ovs_key_ipv6 key, mask, base; + struct ovs_key_ipv6 key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_ipv6_offsetof_sizeof_arr[] = OVS_KEY_IPV6_OFFSETOF_SIZEOF_ARR; @@ -7985,6 +8010,7 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow, get_ipv6_key(flow, &key, false); get_ipv6_key(base_flow, &base, false); get_ipv6_key(&wc->masks, &mask, true); + memcpy(&orig_mask, &mask, sizeof mask); mask.ipv6_proto = 0; /* Not writeable. */ mask.ipv6_frag = 0; /* Not writable. */ mask.ipv6_label &= htonl(IPV6_LABEL_MASK); /* Not writable. */ @@ -7997,9 +8023,8 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow, if (commit(OVS_KEY_ATTR_IPV6, use_masked, &key, &base, &mask, sizeof key, ovs_key_ipv6_offsetof_sizeof_arr, odp_actions)) { put_ipv6_key(&base, base_flow, false); - if (mask.ipv6_proto != 0) { /* Mask was changed by commit(). */ - put_ipv6_key(&mask, &wc->masks, true); - } + or_masks(&mask, &orig_mask, ovs_key_ipv6_offsetof_sizeof_arr); + put_ipv6_key(&mask, &wc->masks, true); } } @@ -8031,17 +8056,19 @@ static enum slow_path_reason commit_set_arp_action(const struct flow *flow, struct flow *base_flow, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { - struct ovs_key_arp key, mask, base; + struct ovs_key_arp key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_arp_offsetof_sizeof_arr[] = OVS_KEY_ARP_OFFSETOF_SIZEOF_ARR; get_arp_key(flow, &key); get_arp_key(base_flow, &base); get_arp_key(&wc->masks, &mask); + memcpy(&orig_mask, &mask, sizeof mask); if (commit(OVS_KEY_ATTR_ARP, true, &key, &base, &mask, sizeof key, ovs_key_arp_offsetof_sizeof_arr, odp_actions)) { put_arp_key(&base, base_flow); + or_masks(&mask, &orig_mask, ovs_key_arp_offsetof_sizeof_arr); put_arp_key(&mask, &wc->masks); return SLOW_ACTION; } @@ -8068,7 +8095,7 @@ static enum slow_path_reason commit_set_icmp_action(const struct flow *flow, struct flow *base_flow, struct ofpbuf *odp_actions, struct flow_wildcards *wc) { - struct ovs_key_icmp key, mask, base; + struct ovs_key_icmp key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_icmp_offsetof_sizeof_arr[] = OVS_KEY_ICMP_OFFSETOF_SIZEOF_ARR; enum ovs_key_attr attr; @@ -8084,10 +8111,12 @@ commit_set_icmp_action(const struct flow *flow, struct flow *base_flow, get_icmp_key(flow, &key); get_icmp_key(base_flow, &base); get_icmp_key(&wc->masks, &mask); + memcpy(&orig_mask, &mask, sizeof mask); if (commit(attr, false, &key, &base, &mask, sizeof key, ovs_key_icmp_offsetof_sizeof_arr, odp_actions)) { put_icmp_key(&base, base_flow); + or_masks(&mask, &orig_mask, ovs_key_icmp_offsetof_sizeof_arr); put_icmp_key(&mask, &wc->masks); return SLOW_ACTION; } @@ -8135,17 +8164,19 @@ commit_set_nd_action(const struct flow *flow, struct flow *base_flow, struct ofpbuf *odp_actions, struct flow_wildcards *wc, bool use_masked) { - struct ovs_key_nd key, mask, base; + struct ovs_key_nd key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_nd_offsetof_sizeof_arr[] = OVS_KEY_ND_OFFSETOF_SIZEOF_ARR; get_nd_key(flow, &key); get_nd_key(base_flow, &base); get_nd_key(&wc->masks, &mask); + memcpy(&orig_mask, &mask, sizeof mask); if (commit(OVS_KEY_ATTR_ND, use_masked, &key, &base, &mask, sizeof key, ovs_key_nd_offsetof_sizeof_arr, odp_actions)) { put_nd_key(&base, base_flow); + or_masks(&mask, &orig_mask, ovs_key_nd_offsetof_sizeof_arr); put_nd_key(&mask, &wc->masks); return SLOW_ACTION; } @@ -8159,18 +8190,20 @@ commit_set_nd_extensions_action(const struct flow *flow, struct ofpbuf *odp_actions, struct flow_wildcards *wc, bool use_masked) { - struct ovs_key_nd_extensions key, mask, base; + struct ovs_key_nd_extensions key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_nd_extensions_offsetof_sizeof_arr[] = OVS_KEY_ND_EXTENSIONS_OFFSETOF_SIZEOF_ARR; get_nd_extensions_key(flow, &key); get_nd_extensions_key(base_flow, &base); get_nd_extensions_key(&wc->masks, &mask); + memcpy(&orig_mask, &mask, sizeof mask); if (commit(OVS_KEY_ATTR_ND_EXTENSIONS, use_masked, &key, &base, &mask, sizeof key, ovs_key_nd_extensions_offsetof_sizeof_arr, odp_actions)) { put_nd_extensions_key(&base, base_flow); + or_masks(&mask, &orig_mask, ovs_key_nd_extensions_offsetof_sizeof_arr); put_nd_extensions_key(&mask, &wc->masks); return SLOW_ACTION; } @@ -8385,7 +8418,7 @@ commit_set_port_action(const struct flow *flow, struct flow *base_flow, bool use_masked) { enum ovs_key_attr key_type; - union ovs_key_tp key, mask, base; + union ovs_key_tp key, mask, orig_mask, base; struct offsetof_sizeof ovs_key_tp_offsetof_sizeof_arr[] = OVS_KEY_TCP_OFFSETOF_SIZEOF_ARR; @@ -8411,10 +8444,12 @@ commit_set_port_action(const struct flow *flow, struct flow *base_flow, get_tp_key(flow, &key); get_tp_key(base_flow, &base); get_tp_key(&wc->masks, &mask); + memcpy(&orig_mask, &mask, sizeof mask); if (commit(key_type, use_masked, &key, &base, &mask, sizeof key, ovs_key_tp_offsetof_sizeof_arr, odp_actions)) { put_tp_key(&base, base_flow); + or_masks(&mask, &orig_mask, ovs_key_tp_offsetof_sizeof_arr); put_tp_key(&mask, &wc->masks); } } @@ -8438,7 +8473,7 @@ commit_set_priority_action(const struct flow *flow, struct flow *base_flow, if (commit(OVS_KEY_ATTR_PRIORITY, use_masked, &key, &base, &mask, sizeof key, ovs_key_prio_offsetof_sizeof_arr, odp_actions)) { base_flow->skb_priority = base; - wc->masks.skb_priority = mask; + wc->masks.skb_priority |= mask; } } @@ -8462,7 +8497,7 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base_flow, sizeof key, ovs_key_pkt_mark_offsetof_sizeof_arr, odp_actions)) { base_flow->pkt_mark = base; - wc->masks.pkt_mark = mask; + wc->masks.pkt_mark |= mask; } } diff --git a/lib/util.c b/lib/util.c index 830e14516..25635b27f 100644 --- a/lib/util.c +++ b/lib/util.c @@ -1395,6 +1395,19 @@ is_all_ones(const void *p, size_t n) return is_all_byte(p, n, 0xff); } +/* *dst |= *src for 'n' bytes. */ +void +or_bytes(void *dst_, const void *src_, size_t n) +{ + const uint8_t *src = src_; + uint8_t *dst = dst_; + size_t i; + + for (i = 0; i < n; i++) { + *dst++ |= *src++; + } +} + /* Copies 'n_bits' bits starting from bit 'src_ofs' in 'src' to the 'n_bits' * starting from bit 'dst_ofs' in 'dst'. 'src' is 'src_len' bytes long and * 'dst' is 'dst_len' bytes long. diff --git a/lib/util.h b/lib/util.h index 7ad8758fe..067dcad15 100644 --- a/lib/util.h +++ b/lib/util.h @@ -484,6 +484,7 @@ be64_is_superset(ovs_be64 super, ovs_be64 sub) bool is_all_zeros(const void *, size_t); bool is_all_ones(const void *, size_t); bool is_all_byte(const void *, size_t, uint8_t byte); +void or_bytes(void *dst, const void *src, size_t n); void bitwise_copy(const void *src, unsigned int src_len, unsigned int src_ofs, void *dst, unsigned int dst_len, unsigned int dst_ofs, unsigned int n_bits); diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index feabb7380..d63ef237a 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -8914,6 +8914,29 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=50:54:00:00:00:0c),eth_ty OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([ofproto-dpif megaflow - set dl_dst with match on dl_src]) +OVS_VSWITCHD_START +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) +add_of_ports br0 1 2 +AT_DATA([flows.txt], [dnl +table=0 in_port=1,dl_src=50:54:00:00:00:09 actions=mod_dl_dst(50:54:00:00:00:0a),output(2) +]) +AT_CHECK([ovs-ofctl add-flows br0 flows.txt]) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.5,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)']) +sleep 1 +dnl The first packet is essentially a no-op, as the new destination MAC is the +dnl same as the original. The second entry actually updates the destination +dnl MAC. The last one must be dropped as it doesn't match with dl_src. +AT_CHECK([strip_ufid < ovs-vswitchd.log | filter_flow_install | strip_used], [0], [dnl +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), actions:2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), actions:set(eth(dst=50:54:00:00:00:0a)),2 +recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b),eth_type(0x0800),ipv4(frag=no), actions:drop +]) +OVS_VSWITCHD_STOP +AT_CLEANUP + m4_define([OFPROTO_DPIF_MEGAFLOW_DISABLED], [AT_SETUP([ofproto-dpif megaflow - disabled$1]) OVS_VSWITCHD_START([], [], [], [m4_if([$1], [], [], [--dummy-numa="0,0,0,0,1,1,1,1"])]) -- GitLab From 8594d9ae4e79c78b53937a47ab349216509c7c66 Mon Sep 17 00:00:00 2001 From: Peng He Date: Tue, 4 Aug 2020 09:54:56 +0800 Subject: [PATCH 243/432] odp-util: Clear padding in the nd_extension. Silimar to the patch 67eb8110171f ("odp-util: Fix passing uninitialized bytes in OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV*.") when change from flow into the netlink format, the tail padding of nd_extension should be cleared. this fixes the following warning logs: |ofproto_dpif_upcall(pmd-...)|WARN|Conflicting ukey for flows: ufid:763c7d3b-4d0c-4bff-aafc-fdfb6089c2ba <...>,eth(...),eth_type(0x86dd),ipv6(...),icmpv6(type=135,code=0),\ nd(target=fdbd:dc02:ff:1:1::1,sll=fa:16:3e:75:b3:a9,tll=00:00:00:00:00:00),\ nd_ext(nd_reserved=0x0,nd_options_type=1) ufid:763c7d3b-4d0c-4bff-aafc-fdfb6089c2ba <...>,eth(...),eth_type(0x86dd),ipv6(...),icmpv6(type=135,code=0),\ nd(target=fdbd:dc02:ff:1:1::1,sll=fa:16:3e:75:b3:a9,tll=00:00:00:00:00:00),\ nd_ext(nd_reserved=0x0,nd_options_type=1) |ofproto_dpif_upcall(pmd-...)|WARN|upcall_cb failure: ukey installation fails Fixes: 9b2b84973db7 ("Support for match & set ICMPv6 reserved and options type fields") Signed-off-by: Peng He Signed-off-by: Ilya Maximets --- lib/odp-util.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index e54a78b43..5989381e9 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -6357,7 +6357,9 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms, struct ovs_key_nd_extensions *nd_ext_key; if (data->igmp_group_ip4 != 0 || data->tcp_flags != 0) { - nd_ext_key = nl_msg_put_unspec_uninit(buf, + /* 'struct ovs_key_nd_extensions' has padding, + * clear it. */ + nd_ext_key = nl_msg_put_unspec_zero(buf, OVS_KEY_ATTR_ND_EXTENSIONS, sizeof *nd_ext_key); nd_ext_key->nd_reserved = data->igmp_group_ip4; -- GitLab From 4ed57c502830c0c5834194e3bad38892b16eb439 Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Wed, 29 Jul 2020 11:59:32 +0100 Subject: [PATCH 244/432] dpif-netdev/avx512: avoid compiling avx512 code if binutils check fails This commit avoids compiling and linking of avx512 code into the vswitch_la library if the binutils check fails. This avoids compiling code into OVS that will not be executed due to binutils issue. Signed-off-by: Harry van Haaren Signed-off-by: Ian Stokes --- lib/automake.mk | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/automake.mk b/lib/automake.mk index 920c958e3..218dc7313 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -22,6 +22,7 @@ lib_libopenvswitch_la_LDFLAGS = \ $(AM_LDFLAGS) if HAVE_AVX512F +if HAVE_LD_AVX512_GOOD # Build library of avx512 code with CPU ISA CFLAGS enabled. This allows the # compiler to use the ISA features required for the ISA optimized code-paths. # Use LDFLAGS to compile only static library of this code, as it should be @@ -39,6 +40,7 @@ lib_libopenvswitchavx512_la_SOURCES = \ lib_libopenvswitchavx512_la_LDFLAGS = \ -static endif +endif # Build core vswitch libraries as before lib_libopenvswitch_la_SOURCES = \ -- GitLab From ba5e3117828f6cc2a0c9eefd292b2b4f7da75e6b Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Wed, 29 Jul 2020 11:59:33 +0100 Subject: [PATCH 245/432] dpif-netdev/avx512: add -fPIC flag to enable shared builds In certain scenarios with OVS built with --enable-shared and DPDK enabled as shared build too, Position Independant Code is required to link the avx512.a file into the relocatable .so that it must be linked into. Signed-off-by: Harry van Haaren Signed-off-by: Ian Stokes --- lib/automake.mk | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/automake.mk b/lib/automake.mk index 218dc7313..380a67228 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -34,6 +34,7 @@ lib_libopenvswitchavx512_la_CFLAGS = \ -mavx512bw \ -mavx512dq \ -mbmi2 \ + -fPIC \ $(AM_CFLAGS) lib_libopenvswitchavx512_la_SOURCES = \ lib/dpif-netdev-lookup-avx512-gather.c -- GitLab From 930f135f5ddc4372c1615bc7674dda35c229b6bd Mon Sep 17 00:00:00 2001 From: Harry van Haaren Date: Wed, 29 Jul 2020 11:59:34 +0100 Subject: [PATCH 246/432] configure: explicitly disable avx512 if bintuils check fails This commit explicitly disables avx512f if the binutils assembler check fails to correctly assemble its input. Without this fix, there is a possibility that users can see undefined behaviour when compiling with -march=native on a CPU which supports avx512 and with a buggy binutils version (v2.30 and 2.31), without a backported fix, if the compiler's vectorizing optimizations convert scalar code to avx512 instructions. Signed-off-by: Harry van Haaren Signed-off-by: Ian Stokes --- m4/openvswitch.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index 7c9a507e5..6fe79297e 100644 --- a/m4/openvswitch.m4 +++ b/m4/openvswitch.m4 @@ -426,8 +426,12 @@ AC_DEFUN([OVS_CHECK_BINUTILS_AVX512], CFLAGS="$CFLAGS -DHAVE_LD_AVX512_GOOD" else ovs_cv_binutils_avx512_good=no + dnl Explicitly disallow avx512f to stop compiler auto-vectorizing + dnl and causing zmm usage with buggy binutils versions. + CFLAGS="$CFLAGS -mno-avx512f" fi else + dnl non x86_64 architectures don't have avx512, so not affected ovs_cv_binutils_avx512_good=no fi]) rm $OBJFILE -- GitLab From d5659751f65ebc17d9aec40b60c1cff3a2d87162 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 4 Aug 2020 09:37:21 +0300 Subject: [PATCH 247/432] tc: Use skip_hw flag when probing tc features There is no need to pass tc rules to hw when just probing for tc features. this will avoid redundant errors from hw drivers that may happen. Signed-off-by: Roi Dayan Acked-By: Vlad Buslov Reviewed-by: Tonghao Zhang Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 2 ++ lib/tc.c | 13 ++++++------- lib/tc.h | 10 ++++++++++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 2c9c6f4ca..18ff380f9 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1918,6 +1918,7 @@ probe_multi_mask_per_prio(int ifindex) memset(&flower, 0, sizeof flower); + flower.tc_policy = TC_POLICY_SKIP_HW; flower.key.eth_type = htons(ETH_P_IP); flower.mask.eth_type = OVS_BE16_MAX; memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac); @@ -1965,6 +1966,7 @@ probe_tc_block_support(int ifindex) memset(&flower, 0, sizeof flower); + flower.tc_policy = TC_POLICY_SKIP_HW; flower.key.eth_type = htons(ETH_P_IP); flower.mask.eth_type = OVS_BE16_MAX; memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac); diff --git a/lib/tc.c b/lib/tc.c index c96d09538..8761304c9 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -65,12 +65,6 @@ VLOG_DEFINE_THIS_MODULE(tc); static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); -enum tc_offload_policy { - TC_POLICY_NONE, - TC_POLICY_SKIP_SW, - TC_POLICY_SKIP_HW -}; - static enum tc_offload_policy tc_policy = TC_POLICY_NONE; struct tc_pedit_key_ex { @@ -2757,6 +2751,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) bool is_vlan = eth_type_vlan(flower->key.eth_type); bool is_qinq = is_vlan && eth_type_vlan(flower->key.encap_eth_type[0]); bool is_mpls = eth_type_mpls(flower->key.eth_type); + enum tc_offload_policy policy = flower->tc_policy; int err; /* need to parse acts first as some acts require changing the matching @@ -2882,7 +2877,11 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) } } - nl_msg_put_u32(request, TCA_FLOWER_FLAGS, tc_get_tc_cls_policy(tc_policy)); + if (policy == TC_POLICY_NONE) { + policy = tc_policy; + } + + nl_msg_put_u32(request, TCA_FLOWER_FLAGS, tc_get_tc_cls_policy(policy)); if (flower->tunnel) { nl_msg_put_flower_tunnel(request, flower); diff --git a/lib/tc.h b/lib/tc.h index 028eed5d0..281231c0d 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -312,6 +312,14 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2) && id1->chain == id2->chain; } +enum tc_offload_policy { + TC_POLICY_NONE = 0, + TC_POLICY_SKIP_SW, + TC_POLICY_SKIP_HW +}; + +BUILD_ASSERT_DECL(TC_POLICY_NONE == 0); + struct tc_flower { struct tc_flower_key key; struct tc_flower_key mask; @@ -337,6 +345,8 @@ struct tc_flower { bool needs_full_ip_proto_mask; enum tc_offloaded_state offloaded_state; + /* Used to force skip_hw when probing tc features. */ + enum tc_offload_policy tc_policy; }; /* assert that if we overflow with a masked write of uint32_t to the last byte -- GitLab From f2cf667730c980e3343228b0e44b6e3ca8538964 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Wed, 5 Aug 2020 21:40:51 +0200 Subject: [PATCH 248/432] ovsdb-server: Replace in-memory DB contents at raft install_snapshot. Every time a follower has to install a snapshot received from the leader, it should also replace the data in memory. Right now this only happens when snapshots are installed that also change the schema. This can lead to inconsistent DB data on follower nodes and the snapshot may fail to get applied. Fixes: bda1f6b60588 ("ovsdb-server: Don't disconnect clients after raft install_snapshot.") Acked-by: Han Zhou Signed-off-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- ovsdb/ovsdb-server.c | 21 +++++++++++++-------- tests/idltest.ovsschema | 9 +++++++++ tests/ovsdb-cluster.at | 32 +++++++++++++++++++++++++++++--- tests/ovsdb-idl.at | 1 + 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index ef4e996df..fd7891a72 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -543,13 +543,14 @@ parse_txn(struct server_config *config, struct db *db, const struct ovsdb_schema *schema, const struct json *txn_json, const struct uuid *txnid) { - if (schema && (!db->db->schema || strcmp(schema->version, - db->db->schema->version))) { + if (schema) { /* We're replacing the schema (and the data). Destroy the database * (first grabbing its storage), then replace it with the new schema. * The transaction must also include the replacement data. * - * Only clustered database schema changes go through this path. */ + * Only clustered database schema changes and snapshot installs + * go through this path. + */ ovs_assert(txn_json); ovs_assert(ovsdb_storage_is_clustered(db->db->storage)); @@ -559,11 +560,15 @@ parse_txn(struct server_config *config, struct db *db, return error; } - ovsdb_jsonrpc_server_reconnect( - config->jsonrpc, false, - (db->db->schema - ? xasprintf("database %s schema changed", db->db->name) - : xasprintf("database %s connected to storage", db->db->name))); + if (!db->db->schema || + strcmp(schema->version, db->db->schema->version)) { + ovsdb_jsonrpc_server_reconnect( + config->jsonrpc, false, + (db->db->schema + ? xasprintf("database %s schema changed", db->db->name) + : xasprintf("database %s connected to storage", + db->db->name))); + } ovsdb_replace(db->db, ovsdb_create(ovsdb_schema_clone(schema), NULL)); diff --git a/tests/idltest.ovsschema b/tests/idltest.ovsschema index e02b975bc..e04755ea0 100644 --- a/tests/idltest.ovsschema +++ b/tests/idltest.ovsschema @@ -54,6 +54,15 @@ }, "isRoot" : true }, + "indexed": { + "columns": { + "i": { + "type": "integer" + } + }, + "indexes": [["i"]], + "isRoot" : true + }, "simple": { "columns": { "b": { diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index 971454515..e0758e954 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -332,13 +332,29 @@ for i in `seq $n`; do AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected]) done +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", + {"op": "insert", + "table": "indexed", + "row": {"i": 0}}]]'], [0], [ignore], [ignore]) + # Kill one follower (s2) and write some data to cluster, so that the follower is falling behind printf "\ns2: stopping\n" OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid]) +# Delete "i":0 and readd it to get a different UUID for it. +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", + {"op": "delete", + "table": "indexed", + "where": [["i", "==", 0]]}]]'], [0], [ignore], [ignore]) + AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", {"op": "insert", - "table": "simple", + "table": "indexed", + "row": {"i": 0}}]]'], [0], [ignore], [ignore]) + +AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest", + {"op": "insert", + "table": "indexed", "row": {"i": 1}}]]'], [0], [ignore], [ignore]) # Compact leader online to generate snapshot @@ -355,8 +371,18 @@ AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected]) # succeed. AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest", {"op": "insert", - "table": "simple", - "row": {"i": 1}}]]'], [0], [ignore], [ignore]) + "table": "indexed", + "row": {"i": 2}}]]'], [0], [ignore], [ignore]) + +# The snapshot should overwrite the in-memory contents of the DB on S2 +# without generating any constraint violations. All tree records (0, 1, 2) +# should be in the DB at this point. +AT_CHECK([ovsdb-client --no-headings dump unix:s2.ovsdb idltest indexed | uuidfilt | sort -k 2], [0], [dnl +<0> 0 +<1> 1 +<2> 2 +indexed table +]) for i in `seq $n`; do OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid]) diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 4efed88e4..789ae23a9 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -954,6 +954,7 @@ AT_CHECK([sort stdout | uuidfilt], [0], # Check that ovsdb-idl figured out that table link2 and column l2 are missing. AT_CHECK([grep ovsdb_idl stderr | sort], [0], [dnl +test-ovsdb|ovsdb_idl|idltest database lacks indexed table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks link2 table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks simple5 table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks singleton table (database needs upgrade?) -- GitLab From 4a5bba046f9e2da6d8d6310b85edb4e71625452d Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Tue, 11 Aug 2020 18:21:44 +0100 Subject: [PATCH 249/432] releases: Add OVS 2.14 to DPDK mapping. Add an entry for OVS 2.14 to map to the validated DPDK release. Signed-off-by: Ian Stokes Acked-by: Flavio Leitner --- Documentation/faq/releases.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index ac93e6e97..9a7a6444c 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -195,6 +195,7 @@ Q: What DPDK version does each Open vSwitch release work with? 2.11.x 18.11.9 2.12.x 18.11.9 2.13.x 19.11.2 + 2.14.x 19.11.2 ============ ======== Q: Are all the DPDK releases that OVS versions work with maintained? -- GitLab From 74e6bdad8f2a215c8cffbb2d5f305ce982fb6ebb Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Thu, 14 May 2020 10:36:12 -0700 Subject: [PATCH 250/432] faq: Mention Linux kernel versions supported by 2.13.x. This is based on acinclude.m4 in branch-2.13, which rejects anything newer than 5.0. Reported-by: Han Zhou Acked-by: Greg Rose Acked-by: Flavio Leitner Signed-off-by: Ben Pfaff --- Documentation/faq/releases.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 9a7a6444c..9d5d2c3e1 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -70,6 +70,7 @@ Q: What Linux kernel versions does each Open vSwitch release work with? 2.10.x 3.16 to 4.17 2.11.x 3.16 to 4.18 2.12.x 3.16 to 5.0 + 2.13.x 3.16 to 5.0 2.14.x 3.16 to 5.5 ============ ============== -- GitLab From b8d42f875c8c876008e296ec485d6bbac08bc4c9 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 12 Aug 2020 10:57:07 +0200 Subject: [PATCH 251/432] acinclude: Fix build with kernels with prandom* moved to prandom.h. Recent commit c0842fbc1b18 ("random32: move the pseudo-random 32-bit definitions to prandom.h") in upstream kernel moved the definition of prandom_* functions from random.h to prandom.h. This change was also backported to stable kernels. Fixing our configure script to look for these functions in a new location and avoid build failures: datapath/linux/compat/include/linux/random.h:11:19: error: redefinition of 'prandom_u32_max' Acked-by: Greg Rose Signed-off-by: Ilya Maximets --- acinclude.m4 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/acinclude.m4 b/acinclude.m4 index 4bac9dbdd..84f344da0 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -817,6 +817,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ [prandom_u32[[\(]]], [OVS_DEFINE([HAVE_PRANDOM_U32])]) OVS_GREP_IFELSE([$KSRC/include/linux/random.h], [prandom_u32_max]) + OVS_GREP_IFELSE([$KSRC/include/linux/prandom.h], + [prandom_u32[[\(]]], + [OVS_DEFINE([HAVE_PRANDOM_U32])]) + OVS_GREP_IFELSE([$KSRC/include/linux/prandom.h], [prandom_u32_max]) OVS_GREP_IFELSE([$KSRC/include/net/rtnetlink.h], [get_link_net]) OVS_GREP_IFELSE([$KSRC/include/net/rtnetlink.h], [name_assign_type]) -- GitLab From 71417ef0119e1b522edf928b453753ab8ab513d9 Mon Sep 17 00:00:00 2001 From: Sivaprasad Tummala Date: Thu, 26 Mar 2020 12:09:20 +0000 Subject: [PATCH 252/432] netdev-dpdk: linear buffer check with zero-copy As of DPDK 19.11, in order to use dequeue-zero-copy in DPDK Vhost library, the application has to disable the linear buffer option. Hence dequeue-zero-copy is not supported for vhost application that requires linear buffers. An alternative DPDK based approach to disable the linear buffers within the vhost library itself was proposed in [1], however the consensus was that application should be responsible for disabling linear buffers. As such this patch disables linear buffers when zero-copy is enabled. [1] https://patches.dpdk.org/patch/67200/ Fixes: 127b6a6eea02 ("dpdk: Update to use DPDK 19.11.") Signed-off-by: Sivaprasad Tummala Acked-by: Ilya Maximets Signed-off-by: Ian Stokes --- lib/netdev-dpdk.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 44ebf96da..b940b1ac2 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -5059,6 +5059,12 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) /* Enable zero copy flag, if requested */ if (zc_enabled) { vhost_flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; + /* DPDK vHost library doesn't allow zero-copy with linear buffers. + * Hence disable Linear buffer. + */ + vhost_flags &= ~RTE_VHOST_USER_LINEARBUF_SUPPORT; + VLOG_WARN("Zero copy enabled, disabling linear buffer" + " check for vHost port %s", dev->up.name); } /* Enable External Buffers if TCP Segmentation Offload is enabled. */ -- GitLab From e919fd4955f86de827e128808fe1a4c34e788947 Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Thu, 6 Aug 2020 16:28:35 +0100 Subject: [PATCH 253/432] dpdk: Deprecate vhost-user dequeue zero-copy. Dequeue zero-copy is no longer supported for vhost-user client mode in DPDK due to commit [1]. In addition to this, zero-copy mode has been proposed to be marked deprecated in [2] with removal in the next DPDK LTS release. This commit deprecates support for vhost-user dequeue zero-copy in OVS with its removal expected in the next OVS release. [1] 715070ea10e6 ("vhost: prevent zero-copy with incompatible client mode") [2] http://mails.dpdk.org/archives/dev/2020-August/177236.html Signed-off-by: Ian Stokes Acked-by: Maxime Coquelin Acked-by: Ilya Maximets --- Documentation/topics/dpdk/vhost-user.rst | 5 +++++ NEWS | 2 ++ lib/netdev-dpdk.c | 2 ++ 3 files changed, 9 insertions(+) diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index b1eb5d9da..4af738d11 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -556,6 +556,11 @@ shown with:: vhost-user Dequeue Zero Copy (experimental) ------------------------------------------- +.. warning:: + + vhost-user Dequeue Zero Copy is deprecated in OVS and will be removed in + the next release. + Normally when dequeuing a packet from a vHost User device, a memcpy operation must be used to copy that packet from guest address space to host address space. This memcpy can be removed by enabling dequeue zero-copy like so:: diff --git a/NEWS b/NEWS index dceda95a3..5d6489f26 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,8 @@ v2.14.0 - xx xxx xxxx CVE-2020-10726, this DPDK version is strongly recommended to be used. * New 'ovs-appctl dpdk/log-list' and 'ovs-appctl dpdk/log-set' commands to list and change log levels in DPDK components. + * Vhost-user Dequeue zero-copy support is deprecated and will be removed + in the next release. - Linux datapath: * Support for kernel versions up to 5.5.x. - AF_XDP: diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index b940b1ac2..18c4adcc7 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -5085,6 +5085,8 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) dev->up.name, dev->vhost_id); if (zc_enabled) { VLOG_INFO("Zero copy enabled for vHost port %s", dev->up.name); + VLOG_WARN("Zero copy support is deprecated and will be " + "removed in the next OVS release."); } } -- GitLab From 1acc884abb5d1f972f2ed66a6fed378e8455d099 Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Wed, 12 Aug 2020 18:28:39 +0100 Subject: [PATCH 254/432] AUTHORS: Add Sivaprasad Tummala. Signed-off-by: Ian Stokes --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 10ce012ba..e300ca505 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -359,6 +359,7 @@ Shih-Hao Li shihli@vmware.com Shu Shen shu.shen@radisys.com Simon Horman horms@verge.net.au Simon Horman simon.horman@netronome.com +Sivaprasad Tummala sivaprasad.tummala@intel.com Sorin Vinturis svinturis@cloudbasesolutions.com Sriharsha Basavapatna sriharsha.basavapatna@broadcom.com Steffen Gebert steffen.gebert@informatik.uni-wuerzburg.de -- GitLab From d08a602b351faed7d62edcdf988c450366931feb Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Mon, 10 Aug 2020 23:15:10 -0700 Subject: [PATCH 255/432] Revert "ovsdb-idl: Fix NULL deref reported by Coverity." This reverts commit 68bc6f88a3a36549fcd3b6248c25c5e2e6deb8f3. The commit causes a regression in OVN scale test. ovn-northd's CPU more than doubled for the test scenario: create and bind 12k ports. Below are some perf data of ovn-northd when running command: ovn-nbctl --wait=sb sync Before reverting this commit: - 92.42% 0.62% ovn-northd ovn-northd [.] main - 91.80% main + 68.93% ovn_db_run (inlined) + 22.45% ovsdb_idl_loop_commit_and_wait After reverting this commit: - 92.84% 0.60% ovn-northd ovn-northd [.] main - 92.24% main + 92.03% ovn_db_run (inlined) Reverting this commit avoided 22.45% of the CPU caused by ovsdb_idl_loop_commit_and_wait(). The commit changed the logic of ovsdb_idl_txn_write__() by adding the check "datum->keys && datum->values" before discarding unchanged data in a transaction. However, it is normal for OVSDB clients ( such as ovn-northd) to try to set columns with same empty data as it is before the transaction. IDL would discard these changes and avoid sending big transactions to server (which would end up as no-op on server side). In the ovn scale test scenario mentioned above, each iteration of ovn-northd would send a transaction to server that includes all rows of the huge Port_Binding table, which caused the significant CPU increase of ovn-northd (and also the OVN SB DB server), resulted in longer end to end latency of OVN configuration changes. For the original problem the commit 68bc6f88 was trying to fix, it doesn't seem to be a real problem. The NULL deref reported by Coverity may be addressed in a future patch using a different approach, if necessary. Signed-off-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index ef3b97b23..d8f221ca6 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -4631,8 +4631,7 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, * transaction only does writes of existing values, without making any real * changes, we will drop the whole transaction later in * ovsdb_idl_txn_commit().) */ - if (datum->keys && datum->values && - write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column), + if (write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column), datum, &column->type)) { goto discard_datum; } -- GitLab From 023f257852f6b26da6b3362e507a1a9df2a30c44 Mon Sep 17 00:00:00 2001 From: Emma Finn Date: Fri, 14 Aug 2020 14:38:49 +0100 Subject: [PATCH 256/432] netdev-offload-dpdk: Fix for broken ethernet matching HWOL for XL710NIC. This patch introduces a temporary work around to fix partial hardware offload for XL710 devices. Currently the incorrect ethernet pattern is being set. This patch will be removed once this issue is fixed within the i40e PMD. Signed-off-by: Emma Finn Signed-off-by: Eli Britstein Co-authored-by: Eli Britstein Tested-by: Ian Stokes Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index de6101e4d..5b632bac4 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -691,9 +691,22 @@ parse_flow_match(struct flow_patterns *patterns, consumed_masks->packet_type = 0; /* Eth */ - if (match->wc.masks.dl_type || - !eth_addr_is_zero(match->wc.masks.dl_src) || - !eth_addr_is_zero(match->wc.masks.dl_dst)) { + if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow) + && eth_addr_is_zero(match->wc.masks.dl_dst) + && eth_addr_is_zero(match->wc.masks.dl_src)) { + /* + * This is a temporary work around to fix ethernet pattern for partial + * hardware offload for X710 devices. This fix will be reverted once + * the issue is fixed within the i40e PMD driver. + */ + add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); + + memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst); + memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src); + consumed_masks->dl_type = 0; + } else if (match->wc.masks.dl_type || + !eth_addr_is_zero(match->wc.masks.dl_src) || + !eth_addr_is_zero(match->wc.masks.dl_dst)) { struct rte_flow_item_eth *spec, *mask; spec = xzalloc(sizeof *spec); -- GitLab From 8a09c2590ef2ea0edc250ec46e3d41bd5874b4ab Mon Sep 17 00:00:00 2001 From: lzhecheng Date: Thu, 6 Aug 2020 04:23:39 +0000 Subject: [PATCH 257/432] ovs-monitor-ipsec: Convert Python2 code to Python3. Submitted-at: https://github.com/openvswitch/ovs/pull/331 Reported-at: https://github.com/openvswitch/ovs-issues/issues/192 Fixes: 1ca0323e7c29 ("Require Python 3 and remove support for Python 2.") Signed-off-by: lzhecheng Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + ipsec/ovs-monitor-ipsec.in | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index e300ca505..4d8eaa3bd 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -429,6 +429,7 @@ Zhenyu Gao sysugaozhenyu@gmail.com ZhiPeng Lu luzhipeng@uniudc.com Zhou Yangchao 1028519445@qq.com aginwala amginwal@gmail.com +lzhecheng lzhecheng@vmware.com parameswaran krishnamurthy parkrish@gmail.com solomon liwei.solomon@gmail.com wenxu wenxu@ucloud.cn diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in index 37e370324..1c185bbd8 100755 --- a/ipsec/ovs-monitor-ipsec.in +++ b/ipsec/ovs-monitor-ipsec.in @@ -101,7 +101,7 @@ class XFRM(object): proc = subprocess.Popen([self.IP, 'xfrm', 'policy'], stdout=subprocess.PIPE) while True: - line = proc.stdout.readline().strip() + line = proc.stdout.readline().strip().decode() if line == '': break a = line.split(" ") @@ -124,7 +124,7 @@ class XFRM(object): proc = subprocess.Popen([self.IP, 'xfrm', 'state'], stdout=subprocess.PIPE) while True: - line = proc.stdout.readline().strip() + line = proc.stdout.readline().strip().decode() if line == '': break a = line.split(" ") @@ -246,7 +246,7 @@ conn prevent_unencrypted_vxlan proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE) while True: - line = proc.stdout.readline().strip() + line = proc.stdout.readline().strip().decode() if line == '': break tunnel_name = line.split(":") @@ -340,7 +340,7 @@ conn prevent_unencrypted_vxlan # about possibility of ovs-monitor-ipsec to block for each tunnel # while strongSwan sends IKE messages over Internet. conns_dict = self.get_active_conns() - for ifname, conns in conns_dict.iteritems(): + for ifname, conns in conns_dict.items(): tunnel = monitor.tunnels.get(ifname) for conn in conns: # IPsec "connection" names that we choose in strongswan @@ -536,7 +536,7 @@ conn prevent_unencrypted_vxlan # Delete old connections conns_dict = self.get_active_conns() - for ifname, conns in conns_dict.iteritems(): + for ifname, conns in conns_dict.items(): tunnel = monitor.tunnels.get(ifname) for conn in conns: @@ -608,7 +608,7 @@ conn prevent_unencrypted_vxlan proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE) while True: - line = proc.stdout.readline().strip() + line = proc.stdout.readline().strip().decode() if line == '': break @@ -989,7 +989,7 @@ class IPsecMonitor(object): skb_mark = None is_valid = False - for row in data["Open_vSwitch"].rows.itervalues(): + for row in data["Open_vSwitch"].rows.values(): pki[0] = row.other_config.get("certificate") pki[1] = row.other_config.get("private_key") pki[2] = row.other_config.get("ca_cert") @@ -1016,7 +1016,7 @@ class IPsecMonitor(object): table.""" ifaces = set() - for row in data["Interface"].rows.itervalues(): + for row in data["Interface"].rows.values(): if not self.is_tunneling_type_supported(row.type): continue if not self.is_ipsec_required(row.options): @@ -1047,7 +1047,7 @@ class IPsecMonitor(object): return s = "" conns = self.ike_helper.get_active_conns() - for name, tunnel in self.tunnels.iteritems(): + for name, tunnel in self.tunnels.items(): s += tunnel.show(policies, securities, conns) unix_conn.reply(s) @@ -1064,7 +1064,7 @@ class IPsecMonitor(object): if self.ike_helper.config_global(self): needs_refresh = True - for name, tunnel in self.tunnels.iteritems(): + for name, tunnel in self.tunnels.items(): if tunnel.last_refreshed_version != tunnel.version: tunnel.last_refreshed_version = tunnel.version needs_refresh = True @@ -1094,7 +1094,7 @@ class IPsecMonitor(object): proc.wait() if proc.returncode: raise Exception(proc.stderr.read()) - m = re.search(r"CN=(.+?),", proc.stdout.readline()) + m = re.search(r"CN=(.+?),", proc.stdout.readline().decode()) if not m: raise Exception("No CN in the certificate subject.") except Exception as e: -- GitLab From 9688b4c7713e50289b3cf1b9e858be1dde0c517d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Wed, 12 Aug 2020 16:07:55 -0400 Subject: [PATCH 258/432] connmgr: Support changing openflow versions without restarting. When commit a0baa7dfa4fe ("connmgr: Make treatment of active and passive connections more uniform") was applied, it did not take into account that a reconfiguration of the allowed_versions setting would require a reload of the ofservice object (only accomplished via a restart of OvS). For now, during the reconfigure cycle, we delete the ofservice object and then recreate it immediately. A new test is added to ensure we do not break this behavior again. Fixes: a0baa7dfa4fe ("connmgr: Make treatment of active and passive connections more uniform") Suggested-by: Ben Pfaff Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1782834 Signed-off-by: Aaron Conole Acked-by: Flavio Leitner Acked-by: Numan Siddique Tested-by: Numan Siddique Signed-off-by: Ilya Maximets --- ofproto/connmgr.c | 25 +++++++++++++++++-------- tests/bridge.at | 17 +++++++++++++++++ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index 51d656cba..aee676d93 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -190,8 +190,8 @@ struct ofservice { static void ofservice_run(struct ofservice *); static void ofservice_wait(struct ofservice *); -static void ofservice_reconfigure(struct ofservice *, - const struct ofproto_controller *) +static int ofservice_reconfigure(struct ofservice *, + const struct ofproto_controller *) OVS_REQUIRES(ofproto_mutex); static void ofservice_create(struct connmgr *mgr, const char *target, const struct ofproto_controller *) @@ -602,7 +602,15 @@ connmgr_set_controllers(struct connmgr *mgr, struct shash *controllers) target); ofservice_destroy(ofservice); } else { - ofservice_reconfigure(ofservice, c); + if (ofservice_reconfigure(ofservice, c)) { + char *target_to_restore = xstrdup(target); + VLOG_INFO("%s: Changes to controller \"%s\" " + "expects re-initialization: Re-initializing now.", + mgr->name, target); + ofservice_destroy(ofservice); + ofservice_create(mgr, target_to_restore, c); + free(target_to_restore); + } } } @@ -2011,16 +2019,15 @@ ofservice_wait(struct ofservice *ofservice) } } -static void +static int ofservice_reconfigure(struct ofservice *ofservice, const struct ofproto_controller *settings) OVS_REQUIRES(ofproto_mutex) { - /* If the allowed OpenFlow versions change, close all of the existing - * connections to allow them to reconnect and possibly negotiate a new - * version. */ + /* If the allowed OpenFlow versions change, a full cleanup is needed + * for the ofservice and connections. */ if (ofservice->s.allowed_versions != settings->allowed_versions) { - ofservice_close_all(ofservice); + return -EINVAL; } ofservice->s = *settings; @@ -2029,6 +2036,8 @@ ofservice_reconfigure(struct ofservice *ofservice, LIST_FOR_EACH (ofconn, ofservice_node, &ofservice->conns) { ofconn_reconfigure(ofconn, settings); } + + return 0; } /* Finds and returns the ofservice within 'mgr' that has the given diff --git a/tests/bridge.at b/tests/bridge.at index d48463e26..904f1381c 100644 --- a/tests/bridge.at +++ b/tests/bridge.at @@ -103,3 +103,20 @@ AT_CHECK([ovs-appctl -t ovs-vswitchd version], [0], [ignore]) OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) OVS_APP_EXIT_AND_WAIT([ovsdb-server]) AT_CLEANUP + +AT_SETUP([bridge - change ofproto versions]) +dnl Start vswitch and add a version test bridge +OVS_VSWITCHD_START( + [add-br vr_test0 -- \ + set bridge vr_test0 datapath-type=dummy \ + protocols=OpenFlow10]) + +dnl set the version to include, say, OpenFlow14 +AT_CHECK([ovs-vsctl set bridge vr_test0 protocols=OpenFlow10,OpenFlow14]) + +dnl now try to use bundle action on a flow +AT_CHECK([ovs-ofctl add-flow vr_test0 --bundle actions=normal]) + +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +OVS_APP_EXIT_AND_WAIT([ovsdb-server]) +AT_CLEANUP -- GitLab From 5601e86c4ec5268cf11b1b92308fd85a7b5cc0ab Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 17 Aug 2020 14:17:17 +0200 Subject: [PATCH 259/432] Set release date for 2.14.0. Acked-by: Ian Stokes Acked-by: Simon Horman Signed-off-by: Ilya Maximets --- NEWS | 2 +- debian/changelog | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 5d6489f26..2f67d5047 100644 --- a/NEWS +++ b/NEWS @@ -2,7 +2,7 @@ Post-v2.14.0 --------------------- -v2.14.0 - xx xxx xxxx +v2.14.0 - 17 Aug 2020 --------------------- - ovs-vswitchd no longer deletes datapath flows on exit by default. - OpenFlow: diff --git a/debian/changelog b/debian/changelog index fd88fec3b..2a57585e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -8,7 +8,7 @@ openvswitch (2.14.0-1) unstable; urgency=low * New upstream version - -- Open vSwitch team Fri, 17 Jul 2020 03:36:19 +0200 + -- Open vSwitch team Mon, 17 Aug 2020 14:17:17 +0200 openvswitch (2.13.0-1) unstable; urgency=low [ Open vSwitch team] -- GitLab From 00d3374d8d54283428e9cbbf158cbe60a08531f3 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 21 Aug 2020 14:04:05 +0200 Subject: [PATCH 260/432] travis: Test build of debian packages. We had a lot of issues with debian packaging lately. This job will check build and installation of debian packages to avoid most of such issues in the future. Installing only minimal set of tools, most of dependencies will be installed according to package description, this way we will check if we have all required dependencies listed. Not trying to install openvswitch-ipsec package as there is an issue that python from the pyenv for some reason doesn't see ovs packages installed from python3-openvswitch, i.e. ipsec service is not able to start. Tests are skipped because they are tested in many other scenarios. No need to waste time. Signed-off-by: Ilya Maximets Acked-by: Aaron Conole --- .travis.yml | 12 ++++++++++++ .travis/linux-build.sh | 11 +++++++++++ .travis/linux-prepare.sh | 6 ++++++ 3 files changed, 29 insertions(+) diff --git a/.travis.yml b/.travis.yml index 527240a67..b5eaaaea2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -67,6 +67,18 @@ matrix: compiler: clang env: OPTS="--disable-ssl" +matrix: + include: + - env: DEB_PACKAGE=1 + addons: + apt: + packages: + - linux-headers-$(uname -r) + - build-essential + - fakeroot + - devscripts + - equivs + script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS notifications: diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index e0a065291..6981d1d47 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -164,6 +164,17 @@ function build_ovs() fi } +if [ "$DEB_PACKAGE" ]; then + mk-build-deps --install --root-cmd sudo --remove debian/control + dpkg-checkbuilddeps + DEB_BUILD_OPTIONS='parallel=4 nocheck' fakeroot debian/rules binary + # Not trying to install ipsec package as there are issues with system-wide + # installed python3-openvswitch package and the pyenv used by Travis. + packages=$(ls $(pwd)/../*.deb | grep -v ipsec) + sudo apt install ${packages} + exit 0 +fi + if [ "$KERNEL" ]; then install_kernel $KERNEL fi diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh index 8cbbd5623..71eb347e8 100755 --- a/.travis/linux-prepare.sh +++ b/.travis/linux-prepare.sh @@ -2,6 +2,12 @@ set -ev +if [ "$DEB_PACKAGE" ]; then + # We're not using sparse for debian packages, tests are skipped and + # all extra dependencies tracked by mk-build-deps. + exit 0 +fi + # Build and install sparse. # # Explicitly disable sparse support for llvm because some travis -- GitLab From 79a5251a501d64b04e76c68db50274a7b8ebfc88 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 18 Aug 2020 16:13:29 +0200 Subject: [PATCH 261/432] test-conntrack: Fix conntrack benchmark by clearing conntrack metadata. Packets in the benchmark must be treated as new packets, i.e. they should not have conntrack metadata set. Current code will set up 'pkt->md.conn' after the first run and all subsequent calls will hit the 'fast' processing that is intended for recirculated packets making a false impression that current conntrack implementation is lightning fast. Before the change: $ ./ovstest test-conntrack benchmark 4 33554432 32 1 conntrack: 1059 ms After (correct): $ ./ovstest test-conntrack benchmark 4 33554432 32 1 conntrack: 92785 ms Fixes: 594570ea1cde ("conntrack: Optimize recirculations.") Signed-off-by: Ilya Maximets Acked-by: Aaron Conole --- tests/test-conntrack.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c index e7c73220a..24c93e4a4 100644 --- a/tests/test-conntrack.c +++ b/tests/test-conntrack.c @@ -82,6 +82,7 @@ ct_thread_main(void *aux_) { struct thread_aux *aux = aux_; struct dp_packet_batch *pkt_batch; + struct dp_packet *pkt; ovs_be16 dl_type; size_t i; long long now = time_msec(); @@ -91,6 +92,9 @@ ct_thread_main(void *aux_) for (i = 0; i < n_pkts; i += batch_size) { conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL, 0, 0, NULL, NULL, now, 0); + DP_PACKET_BATCH_FOR_EACH (j, pkt, pkt_batch) { + pkt_metadata_init_conn(&pkt->md); + } } ovs_barrier_block(&barrier); destroy_packets(pkt_batch); -- GitLab From 90c1cb3f0fefc8316b358f1f25133335f73273c9 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Fri, 21 Aug 2020 13:30:07 -0700 Subject: [PATCH 262/432] python: Fixup python shebangs to python3. Builds on RHEL 8.2 systems are failing due to this issue. See [1] as to why this is necessary. I used the following command to identify files that need this fix: find . -type f -executable | /usr/lib/rpm/redhat/brp-mangle-shebangs I also updated the copyright notices as needed. 1. https://fedoraproject.org/wiki/Changes/Make_ambiguous_python_shebangs_error Fixes: 1ca0323e7c29 ("Require Python 3 and remove support for Python 2.") Signed-off-by: Greg Rose Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- ofproto/ipfix-gen-entities | 4 ++-- ovsdb/dot2pic | 4 ++-- ovsdb/ovsdb-doc | 4 ++-- python/build/soutil.py | 4 ++-- tests/ovsdb-monitor-sort.py | 15 ++++++++++++++- tests/sendpkt.py | 4 ++-- tests/test-l7.py | 4 ++-- tests/uuidfilt.py | 18 +++++++++++++++++- utilities/ovs-dev.py | 4 ++-- utilities/ovs-pipegen.py | 4 ++-- .../etc_xapi.d_plugins_openvswitch-cfg-update | 4 ++-- ...opt_xensource_libexec_interface-reconfigure | 2 +- ...usr_share_openvswitch_scripts_ovs-xapi-sync | 4 ++-- 13 files changed, 52 insertions(+), 23 deletions(-) diff --git a/ofproto/ipfix-gen-entities b/ofproto/ipfix-gen-entities index 0be719967..d5abe9c2e 100755 --- a/ofproto/ipfix-gen-entities +++ b/ofproto/ipfix-gen-entities @@ -1,6 +1,6 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 # -# Copyright (C) 2012 Nicira, Inc. +# Copyright (C) 2012, 2020 Nicira, Inc. # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright diff --git a/ovsdb/dot2pic b/ovsdb/dot2pic index de67261ac..2f858e19d 100755 --- a/ovsdb/dot2pic +++ b/ovsdb/dot2pic @@ -1,6 +1,6 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 -# Copyright (c) 2009, 2010, 2011, 2013, 2017 Nicira, Inc. +# Copyright (c) 2009, 2010, 2011, 2013, 2017, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/ovsdb/ovsdb-doc b/ovsdb/ovsdb-doc index 406c29311..10d0c0c13 100755 --- a/ovsdb/ovsdb-doc +++ b/ovsdb/ovsdb-doc @@ -1,6 +1,6 @@ -#! /usr/bin/python +#!/usr/bin/python3 -# Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc. +# Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/build/soutil.py b/python/build/soutil.py index b8027af86..a65882302 100755 --- a/python/build/soutil.py +++ b/python/build/soutil.py @@ -1,6 +1,6 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 -# Copyright (c) 2008, 2017 Nicira, Inc. +# Copyright (c) 2008, 2017, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/ovsdb-monitor-sort.py b/tests/ovsdb-monitor-sort.py index 7d368a7af..8a7976bdc 100755 --- a/tests/ovsdb-monitor-sort.py +++ b/tests/ovsdb-monitor-sort.py @@ -1,4 +1,17 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 +# Copyright (c) 2020 VMware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # Breaks lines read from stdin into groups using blank lines as # group separators, then sorts lines within the groups for diff --git a/tests/sendpkt.py b/tests/sendpkt.py index 328ae2bc9..49ac45275 100755 --- a/tests/sendpkt.py +++ b/tests/sendpkt.py @@ -1,6 +1,6 @@ -#! /usr/bin/env python +#!/usr/bin/env python3 -# Copyright (c) 2018 VMware, Inc. +# Copyright (c) 2018, 2020 VMware, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/test-l7.py b/tests/test-l7.py index d7854a1df..32a77392c 100755 --- a/tests/test-l7.py +++ b/tests/test-l7.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# Copyright (c) 2015, 2016 Nicira, Inc. +#!/usr/bin/env python3 +# Copyright (c) 2015, 2016, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/uuidfilt.py b/tests/uuidfilt.py index bc49aa480..39679dd44 100755 --- a/tests/uuidfilt.py +++ b/tests/uuidfilt.py @@ -1,4 +1,20 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 +# Copyright (c) 2020 VMware, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Breaks lines read from stdin into groups using blank lines as +# group separators, then sorts lines within the groups for +# reproducibility. import re import sys diff --git a/utilities/ovs-dev.py b/utilities/ovs-dev.py index 248d22ab9..c45788acd 100755 --- a/utilities/ovs-dev.py +++ b/utilities/ovs-dev.py @@ -1,5 +1,5 @@ -#!/usr/bin/env python -# Copyright (c) 2013, 2014, 2015, 2016 Nicira, Inc. +#!/usr/bin/env python3 +# Copyright (c) 2013, 2014, 2015, 2016, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/utilities/ovs-pipegen.py b/utilities/ovs-pipegen.py index ee5797221..a3b6a661d 100755 --- a/utilities/ovs-pipegen.py +++ b/utilities/ovs-pipegen.py @@ -1,5 +1,5 @@ -#! /usr/bin/env python -# Copyright (c) 2013, 2014, 2015 Nicira, Inc. +#! /usr/bin/env python3 +# Copyright (c) 2013, 2014, 2015, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update index e7404e3b0..b8db88194 100755 --- a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update +++ b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update @@ -1,10 +1,10 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # xapi plugin script to update the cache of configuration items in the # ovs-vswitchd configuration that are managed in the xapi database when # integrated with Citrix management tools. -# Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. +# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/xenserver/opt_xensource_libexec_interface-reconfigure b/xenserver/opt_xensource_libexec_interface-reconfigure index a82043fb5..9c20725de 100755 --- a/xenserver/opt_xensource_libexec_interface-reconfigure +++ b/xenserver/opt_xensource_libexec_interface-reconfigure @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # Copyright (c) 2008,2009 Citrix Systems, Inc. # diff --git a/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync b/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync index cf8960025..bff85464b 100755 --- a/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync +++ b/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync @@ -1,5 +1,5 @@ -#! /usr/bin/env python -# Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. +#!/usr/bin/env python3 +# Copyright (c) 2009, 2010, 2011, 2012, 2013, 2020 Nicira, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. -- GitLab From 046321d48317595162499348b26b0c624ce924dd Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Thu, 6 Aug 2020 18:33:50 +0200 Subject: [PATCH 263/432] meta-flow: fix a typo in "MPLS Bottom of Stack Field" paragraph. In the ovs-fields.7 manual page, the "MPLS Bottom of Stack Field" paragraph says: * When mpls_bos is 1, there is another MPLS label following this one, so the Ethertype passed to pop_mpls should be an MPLS Ethertype. [...] * When mpls_bos is 0, this MPLS label is the last one, so the Ethertype passed to pop_mpls should be a non-MPLS Ethertype such as IPv4. [...] The values 0 and 1 have been swapped: when BOS is 1, then no more label stack entries follows. Fixes: 96fee5e0a2a0 ("ovs-fields: New manpage to document Open vSwitch and OpenFlow fields.") Reported-at: https://bugzilla.redhat.com/1842032 Reported-by: Guillaume Nault Signed-off-by: Timothy Redaelli Acked-by: Greg Rose Signed-off-by: Ilya Maximets --- lib/meta-flow.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index 154675874..e72ba52ec 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -3920,18 +3920,18 @@ r r c c c.

    • - When is 1, there is another MPLS label + When is 0, there is another MPLS label following this one, so the Ethertype passed to pop_mpls should be an MPLS Ethertype. For example: table=0, - dl_type=0x8847, mpls_bos=1, actions=pop_mpls:0x8847, + dl_type=0x8847, mpls_bos=0, actions=pop_mpls:0x8847, goto_table:1
    • - When is 0, this MPLS label is the last one, + When is 1, this MPLS label is the last one, so the Ethertype passed to pop_mpls should be a non-MPLS Ethertype such as IPv4. For example: table=1, dl_type=0x8847, - mpls_bos=0, actions=pop_mpls:0x0800, goto_table:2 + mpls_bos=1, actions=pop_mpls:0x0800, goto_table:2
    -- GitLab From 6a328b6e24d43073316d2bc1b29fb87c9eacc85c Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Fri, 19 Jun 2020 15:53:52 +0200 Subject: [PATCH 264/432] ovs-dpctl-top: Skip "eth()" element. With commit efde188622ae ("odp-util: Print eth() for Ethernet flows if packet_type is absent.") "eth()" is printed for Ethernet flows if packet_type is absent, but this broke "ovs-dpctl-top" since it expects that every element has a value. This commit skips the parsing of the empty "eth()" element. Fixes: efde188622ae ("odp-util: Print eth() for Ethernet flows if packet_type is absent.") Signed-off-by: Timothy Redaelli Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- utilities/ovs-dpctl-top.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utilities/ovs-dpctl-top.in b/utilities/ovs-dpctl-top.in index 011cc64b7..fbe6e4f56 100755 --- a/utilities/ovs-dpctl-top.in +++ b/utilities/ovs-dpctl-top.in @@ -480,6 +480,8 @@ def elements_to_dict(elements): """ Convert line to a hierarchy of dictionaries. """ result = {} for element in elements: + if (element == "eth()"): + continue match = FIELDS_CMPND.search(element) if (match): key = match.group(1) -- GitLab From 11bbae97718c7daab934ecf62fdbd49914f7ebf0 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 26 Aug 2020 18:43:44 +0200 Subject: [PATCH 265/432] travis: Merge matrix entries. It's not possible to use 'matrix' twice. This makes travis to use the latest one dropping all the osx and arm64 jobs. Fixes: 00d3374d8d54 ("travis: Test build of debian packages.") Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- .travis.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index b5eaaaea2..43e6a75cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -66,9 +66,6 @@ matrix: - arch: arm64 compiler: clang env: OPTS="--disable-ssl" - -matrix: - include: - env: DEB_PACKAGE=1 addons: apt: -- GitLab From 0bddf55fba1df1f9a199baf5a85f8dcfc7b08c06 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Mon, 7 Sep 2020 09:45:02 +0100 Subject: [PATCH 266/432] AUTHORS: update email for Mark Gray Update email address for Mark Gray Signed-off-by: Mark Gray Signed-off-by: Alin Gabriel Serdean --- .mailmap | 1 + AUTHORS.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 894062d48..9175d23a4 100644 --- a/.mailmap +++ b/.mailmap @@ -54,6 +54,7 @@ Justin Pettit Kmindg Kyle Mestery Lance Richardson +Mark Gray Mauricio Vasquez Miguel Angel Ajo Neil McKee diff --git a/AUTHORS.rst b/AUTHORS.rst index 4d8eaa3bd..cb26e0197 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -255,7 +255,7 @@ Madhu Challa challa@noironetworks.com Manohar K C manukc@gmail.com Marcin Mirecki mmirecki@redhat.com Mario Cabrera mario.cabrera@hpe.com -Mark D. Gray mark.d.gray@intel.com +Mark D. Gray mark.d.gray@redhat.com Mark Hamilton Mark Kavanagh mark.b.kavanagh81@gmail.com Mark Maglana mmaglana@gmail.com -- GitLab From 5198e8a06928e3324e6fd11f6209c336611dffd2 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Mon, 7 Sep 2020 11:48:24 -0300 Subject: [PATCH 267/432] ovsdb-idl.at: Wait all servers to join the cluster. The test 'Check Python IDL reconnects to leader - Python3 (leader only)' fails sometimes when the first ovsdb-server gets killed before the others had joined the cluster. Fix the function ovsdb_cluster_start_idltest to wait them to join the cluster. Fixes: c39751e44539 ("python: Monitor Database table to manage lifecycle of IDL client.") Co-authored-by:: Ilya Maximets Signed-off-by: Flavio Leitner Signed-off-by: Ilya Maximets --- tests/ovsdb-idl.at | 52 +++++++++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 789ae23a9..261f4f323 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -12,25 +12,6 @@ ovsdb_start_idltest () { on_exit 'kill `cat ovsdb-server.pid`' } -# ovsdb_cluster_start_idltest [REMOTE] [SCHEMA] -# -# Creates a database using SCHEMA (default: idltest.ovsschema) and -# starts a database cluster listening on punix:socket and REMOTE (if -# specified). -ovsdb_cluster_start_idltest () { - local n=$1 - ovsdb-tool create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft || return $? - cid=`ovsdb-tool db-cid s1.db` - schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema` - for i in `seq 2 $n`; do - ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft || return $? - done - for i in `seq $n`; do - ovsdb-server -vraft -vconsole:warn --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb ${2:+--remote=$2} s$i.db || return $? - done - on_exit 'kill `cat s*.pid`' -} - # ovsdb_cluster_leader [REMOTES] [DATABASE] # # Returns the leader of the DATABASE cluster. @@ -48,6 +29,35 @@ ovsdb_cluster_leader () { done }]) +# OVSDB_CLUSTER_START_IDLTEST([N], [REMOTE]) +# +# Creates a clustered database using idltest.ovsschema and starts a database +# cluster of N servers listening on punix:socket and REMOTE (if specified). +m4_define([OVSDB_CLUSTER_START_IDLTEST], + [n=$1 + AT_CHECK([ovsdb-tool create-cluster s1.db \ + $abs_srcdir/idltest.ovsschema unix:s1.raft]) + cid=$(ovsdb-tool db-cid s1.db) + schema_name=$(ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema) + for i in $(seq 2 $n); do + AT_CHECK([ovsdb-tool join-cluster s$i.db \ + $schema_name unix:s$i.raft unix:s1.raft]) + done + for i in $(seq $n); do + AT_CHECK([ovsdb-server -vraft -vconsole:warn --detach --no-chdir \ + --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i \ + --remote=punix:s$i.ovsdb \ + m4_if([$2], [], [], [--remote=$2]) s$i.db]) + done + on_exit 'kill $(cat s*.pid)' + + for i in $(seq $n); do + OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status ${schema_name} \ + | grep -q 'Status: cluster member']) + done +]) + + # OVSDB_CHECK_IDL_C(TITLE, [PRE-IDL-TXN], TRANSACTIONS, OUTPUT, [KEYWORDS], # [FILTER]) # @@ -1813,7 +1823,7 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY], AT_SKIP_IF([test "$IS_ARM64" = "yes"]) AT_KEYWORDS([ovsdb server idl Python leader_only with tcp socket]) m4_define([LPBK],[127.0.0.1]) - AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK]) + OVSDB_CLUSTER_START_IDLTEST([$2], ["ptcp:0:"LPBK]) PARSE_LISTENING_PORT([s2.log], [TCP_PORT_1]) PARSE_LISTENING_PORT([s3.log], [TCP_PORT_2]) PARSE_LISTENING_PORT([s1.log], [TCP_PORT_3]) @@ -1836,7 +1846,7 @@ m4_define([OVSDB_CHECK_CLUSTER_IDL_C], [AT_SETUP([$1 - C - tcp]) AT_KEYWORDS([ovsdb server idl positive tcp socket $5]) m4_define([LPBK],[127.0.0.1]) - AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK]) + OVSDB_CLUSTER_START_IDLTEST([$2], ["ptcp:0:"LPBK]) PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1]) PARSE_LISTENING_PORT([s2.log], [TCP_PORT_2]) PARSE_LISTENING_PORT([s3.log], [TCP_PORT_3]) -- GitLab From 74aba1ff68178c371e450dc4a73b14d3026f89ef Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Mon, 22 Jul 2019 10:35:24 -0700 Subject: [PATCH 268/432] Documentation: Correct claims about Debian packaging. The documentation reported the union of all possible Debian- and Debian-derived packaging. This isn't realistic: there are differences between OVS upstream, Debian downstream, and Ubuntu downstream. This commit distinguishes them. Reported-by: Ravi Kerur Signed-off-by: Ben Pfaff Signed-off-by: Ilya Maximets --- Documentation/intro/install/distributions.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/intro/install/distributions.rst b/Documentation/intro/install/distributions.rst index 54362c0a4..b68a764d1 100644 --- a/Documentation/intro/install/distributions.rst +++ b/Documentation/intro/install/distributions.rst @@ -44,10 +44,13 @@ that includes the core userspace components of the switch. 2. For kernel datapath, ``openvswitch-datapath-dkms`` can be installed to automatically build and install Open vSwitch kernel module for your running -kernel. +kernel. This package is only available when the .deb packages are built from +the Open vSwitch repository; it is not downstream in Debian or Ubuntu releases. 3. For fast userspace switching, Open vSwitch with DPDK support is -bundled in the package ``openvswitch-switch-dpdk``. +bundled in the package ``openvswitch-switch-dpdk``. This package is only +available in the Ubuntu distribution; it is not upstream in the Open vSwitch +repository or downstream in Debian. Fedora ------ -- GitLab From d3daf73db3e48d71a32839243e6ec323e02c0dd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 30 Apr 2019 19:10:19 +0200 Subject: [PATCH 269/432] rhel: Fix logrotate group when dpdk is enabled. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise logrotate will fail to generate the rotated log files. Signed-off-by: Jaime Caamaño Ruiz Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- rhel/openvswitch-fedora.spec.in | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in index e3e0d8acf..2c0c4fa18 100644 --- a/rhel/openvswitch-fedora.spec.in +++ b/rhel/openvswitch-fedora.spec.in @@ -318,18 +318,19 @@ exit 0 %post %if %{with libcapng} if [ $1 -eq 1 ]; then - sed -i 's:^#OVS_USER_ID=:OVS_USER_ID=:' /etc/sysconfig/openvswitch - sed -i 's:\(.*su\).*:\1 openvswitch openvswitch:' %{_sysconfdir}/logrotate.d/openvswitch - %if %{with dpdk} - sed -i \ - 's@OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:hugetlbfs"@'\ - /etc/sysconfig/openvswitch + %define gname hugetlbfs +%else + %define gname openvswitch %endif + sed -i \ + 's@^#OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:%{gname}"@'\ + %{_sysconfdir}/sysconfig/openvswitch + sed -i 's:\(.*su\).*:\1 openvswitch %{gname}:' %{_sysconfdir}/logrotate.d/openvswitch - # In the case of upgrade, this is not needed. - chown -R openvswitch:openvswitch /etc/openvswitch - chown -R openvswitch:openvswitch /var/log/openvswitch + # In the case of upgrade, this is not needed + chown -R openvswitch:openvswitch %{_sysconfdir}/openvswitch + chown -R openvswitch:%{gname} %{_localstatedir}/log/openvswitch fi %endif -- GitLab From b0008d6233f9fe40f97ce9b37cdd797b30fbc069 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 10 Mar 2020 11:39:36 -0300 Subject: [PATCH 270/432] userspace-tso: Document the minimum kernel version. The kernel needs to be at least 4.19-rc7 to include the commit 9d2f67e43b73 ("net/packet: fix packet drop as of virtio gso") otherwise the TSO packets are dropped when using raw sockets. Fixes: 29cf9c1b3b9c ("userspace: Add TCP Segmentation Offload support") Reported-by: Yi Yang Signed-off-by: Flavio Leitner Signed-off-by: Ilya Maximets --- Documentation/topics/userspace-tso.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index aafa4a1bf..14a7c6fb3 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -104,6 +104,12 @@ on ports without TSO support. That also means guests using vhost-user in client mode will receive TSO packet regardless of TSO being enabled or disabled within the guest. +All kernel devices that use the raw socket interface (veth, for example) +require the kernel commit 9d2f67e43b73 ("net/packet: fix packet drop as of +virtio gso") in order to work properly. This commit was merged in upstream +kernel 4.19-rc7, so make sure your kernel is either newer or contains the +backport. + ~~~~~~~~~~~~~~~~~~ Performance Tuning ~~~~~~~~~~~~~~~~~~ -- GitLab From 0287f840e8b7ab0947ba6f7397c310a652a831d8 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 17 Mar 2020 14:39:40 -0700 Subject: [PATCH 271/432] classifier: Fix use of uninitialized value. Coverity reports use of uninitialized value of cursor. This happens in cls_cursor_start(), when rule is false, cursor.subtable is uninitialized. CID 279324. Signed-off-by: William Tu Reviewed-by: Greg Rose Signed-off-by: Ilya Maximets --- lib/classifier.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/classifier.c b/lib/classifier.c index f2c3497c2..2a1d155da 100644 --- a/lib/classifier.c +++ b/lib/classifier.c @@ -1370,6 +1370,7 @@ cls_cursor_start(const struct classifier *cls, const struct cls_rule *target, struct cls_cursor cursor; struct cls_subtable *subtable; + memset(&cursor, 0x0, sizeof cursor); cursor.cls = cls; cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL; cursor.version = version; -- GitLab From 9d15c02a8bf9cc732cce33012bd1577758260521 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 12 May 2020 18:38:20 +0200 Subject: [PATCH 272/432] rhel: Fix reload of OVS_USER_ID on startup. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OVS_USER_ID was being picked up from a previously existing openvswitch.useropts rendering innefective any configuration change through sysconfig. There is no ordering between Exec* and Environment* stanzas of systemd, full Enviroment* is always loaded before each Exec*. We make sure that openvswitch.useropts is removed in a first Exec so that a fresh OVS_USER_ID can be picked up from config in successive Exec*. Fixes: 94e1e8b ("rhel: run ovn with the same user as ovs") Signed-off-by: Jaime Caamaño Ruiz Acked-by: Greg Rose Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- rhel/usr_lib_systemd_system_ovsdb-server.service | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/rhel/usr_lib_systemd_system_ovsdb-server.service b/rhel/usr_lib_systemd_system_ovsdb-server.service index 4c170c09b..98338b9df 100644 --- a/rhel/usr_lib_systemd_system_ovsdb-server.service +++ b/rhel/usr_lib_systemd_system_ovsdb-server.service @@ -11,10 +11,16 @@ PIDFile=/var/run/openvswitch/ovsdb-server.pid Restart=on-failure EnvironmentFile=/etc/openvswitch/default.conf EnvironmentFile=-/etc/sysconfig/openvswitch +EnvironmentFile=-/run/openvswitch.useropts + +# Environment is reloaded for each Exec*, make sure to +# remove openvswitch.useropts first to reload a fresh +# OVS_USER_ID from default.conf or sysconfig. +ExecStartPre=/usr/bin/rm -f /run/openvswitch.useropts + ExecStartPre=-/usr/bin/chown ${OVS_USER_ID} /var/run/openvswitch /var/log/openvswitch -ExecStartPre=/bin/sh -c 'rm -f /run/openvswitch.useropts; /usr/bin/echo "OVS_USER_ID=${OVS_USER_ID}" > /run/openvswitch.useropts' +ExecStartPre=/bin/sh -c '/usr/bin/echo "OVS_USER_ID=${OVS_USER_ID}" > /run/openvswitch.useropts' ExecStartPre=/bin/sh -c 'if [ "$${OVS_USER_ID/:*/}" != "root" ]; then /usr/bin/echo "OVS_USER_OPT=--ovs-user=${OVS_USER_ID}" >> /run/openvswitch.useropts; fi' -EnvironmentFile=-/run/openvswitch.useropts ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \ --no-ovs-vswitchd --no-monitor --system-id=random \ ${OVS_USER_OPT} \ -- GitLab From db7041716bfe068ddd0cef05e830b65690ad5c8a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 19 May 2020 13:37:01 +0200 Subject: [PATCH 273/432] netdev-dpdk: Don't set rx mq mode for net_virtio. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since DPDK 19.11 [1], it is not allowed to set any RX mq mode for virtio driver. [1] https://github.com/DPDK/dpdk/commit/13b3137f3b7c8f866947a9b34e06a8aec0d084f7 Signed-off-by: Jaime Caamaño Ruiz Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- lib/netdev-dpdk.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 18c4adcc7..c2ec93c91 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -163,7 +163,6 @@ typedef uint16_t dpdk_port_t; static const struct rte_eth_conf port_conf = { .rxmode = { - .mq_mode = ETH_MQ_RX_RSS, .split_hdr_size = 0, .offloads = 0, }, @@ -965,6 +964,14 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq) rte_eth_dev_info_get(dev->port_id, &info); + /* As of DPDK 19.11, it is not allowed to set a mq_mode for + * virtio PMD driver. */ + if (!strcmp(info.driver_name, "net_virtio")) { + conf.rxmode.mq_mode = ETH_MQ_RX_NONE; + } else { + conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + } + /* As of DPDK 17.11.1 a few PMDs require to explicitly enable * scatter to support jumbo RX. * Setting scatter for the device is done after checking for -- GitLab From 27dc7adf66b1af88f238da46c6430e3d2eaa4da3 Mon Sep 17 00:00:00 2001 From: Federico Paolinelli Date: Thu, 30 Jul 2020 12:41:47 +0200 Subject: [PATCH 274/432] ovsdb-tool: Add a db consistency check to the ovsdb-tool check-cluster command. There are some occurrences where the database ends up in an inconsistent state. This happened in ovn-k8s and is described in [0]. Here we are adding a supported way to check that a given db is consistent, which is less error prone than checking the logs. Tested against both a valid db and a corrupted db attached to the above bug [1]. Also, tested with a fresh db that did not do a snapshot. [0]: https://bugzilla.redhat.com/show_bug.cgi?id=1837953#c23 [1]: https://bugzilla.redhat.com/attachment.cgi?id=1697595 Signed-off-by: Federico Paolinelli Suggested-by: Dumitru Ceara Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- ovsdb/ovsdb-tool.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 91662cab8..30d0472b2 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -1497,6 +1497,44 @@ do_check_cluster(struct ovs_cmdl_context *ctx) } } + /* Check for db consistency: + * The serverid must be in the servers list. + */ + + for (struct server *s = c.servers; s < &c.servers[c.n_servers]; s++) { + struct shash *servers_obj = json_object(s->snap->servers); + char *server_id = xasprintf(SID_FMT, SID_ARGS(&s->header.sid)); + bool found = false; + const struct shash_node *node; + + SHASH_FOR_EACH (node, servers_obj) { + if (!strncmp(server_id, node->name, SID_LEN)) { + found = true; + } + } + + if (!found) { + for (struct raft_entry *e = s->entries; + e < &s->entries[s->log_end - s->log_start]; e++) { + if (e->servers == NULL) { + continue; + } + struct shash *log_servers_obj = json_object(e->servers); + SHASH_FOR_EACH (node, log_servers_obj) { + if (!strncmp(server_id, node->name, SID_LEN)) { + found = true; + } + } + } + } + + if (!found) { + ovs_fatal(0, "%s: server %s not found in server list", + s->filename, server_id); + } + free(server_id); + } + /* Clean up. */ for (size_t i = 0; i < c.n_servers; i++) { -- GitLab From 7024ddf3202646095adfbb1998904abd7b21f6e3 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Mon, 3 Aug 2020 17:05:28 +0200 Subject: [PATCH 275/432] ovsdb: Add unixctl command to show storage status. If a database enters an error state, e.g., in case of RAFT when reading the DB file contents if applying the RAFT records triggers constraint violations, there's no way to determine this unless a client generates a write transaction. Such write transactions would fail with "ovsdb-error: inconsistent data". This commit adds a new command to show the status of the storage that's backing a database. Example, on an inconsistent database: $ ovs-appctl -t /tmp/test.ctl ovsdb-server/get-db-storage-status DB status: ovsdb error: inconsistent data Example, on a consistent database: $ ovs-appctl -t /tmp/test.ctl ovsdb-server/get-db-storage-status DB status: ok Signed-off-by: Dumitru Ceara Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- NEWS | 3 +++ ovsdb/ovsdb-server.c | 39 +++++++++++++++++++++++++++++++++++++++ ovsdb/storage.c | 10 ++++++++++ ovsdb/storage.h | 1 + 4 files changed, 53 insertions(+) diff --git a/NEWS b/NEWS index 2f67d5047..a9c50add2 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,8 @@ Post-v2.14.0 --------------------- + - OVSDB: + * New unixctl command 'ovsdb-server/get-db-storage-status' to show the + status of the storage that's backing a database. v2.14.0 - 17 Aug 2020 diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index fd7891a72..d772edbe0 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -90,6 +90,7 @@ static unixctl_cb_func ovsdb_server_set_active_ovsdb_server_probe_interval; static unixctl_cb_func ovsdb_server_set_sync_exclude_tables; static unixctl_cb_func ovsdb_server_get_sync_exclude_tables; static unixctl_cb_func ovsdb_server_get_sync_status; +static unixctl_cb_func ovsdb_server_get_db_storage_status; struct server_config { struct sset *remotes; @@ -453,6 +454,9 @@ main(int argc, char *argv[]) unixctl_command_register("ovsdb-server/sync-status", "", 0, 0, ovsdb_server_get_sync_status, &server_config); + unixctl_command_register("ovsdb-server/get-db-storage-status", "DB", 1, 1, + ovsdb_server_get_db_storage_status, + &server_config); /* Simulate the behavior of OVS release prior to version 2.5 that * does not support the monitor_cond method. */ @@ -1701,6 +1705,41 @@ ovsdb_server_get_sync_status(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_destroy(&ds); } +static void +ovsdb_server_get_db_storage_status(struct unixctl_conn *conn, + int argc OVS_UNUSED, + const char *argv[], + void *config_) +{ + struct server_config *config = config_; + struct shash_node *node; + + node = shash_find(config->all_dbs, argv[1]); + if (!node) { + unixctl_command_reply_error(conn, "Failed to find the database."); + return; + } + + struct db *db = node->data; + + if (!db->db) { + unixctl_command_reply_error(conn, "Failed to find the database."); + return; + } + + struct ds ds = DS_EMPTY_INITIALIZER; + char *error = ovsdb_storage_get_error(db->db->storage); + + if (!error) { + ds_put_cstr(&ds, "status: ok"); + } else { + ds_put_format(&ds, "status: %s", error); + free(error); + } + unixctl_command_reply(conn, ds_cstr(&ds)); + ds_destroy(&ds); +} + static void parse_options(int argc, char *argv[], struct sset *db_filenames, struct sset *remotes, diff --git a/ovsdb/storage.c b/ovsdb/storage.c index 7b4ad16f6..f662e9056 100644 --- a/ovsdb/storage.c +++ b/ovsdb/storage.c @@ -198,6 +198,16 @@ ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage, } } +char * +ovsdb_storage_get_error(const struct ovsdb_storage *storage) +{ + if (storage->error) { + return ovsdb_error_to_string(storage->error); + } + + return NULL; +} + void ovsdb_storage_run(struct ovsdb_storage *storage) { diff --git a/ovsdb/storage.h b/ovsdb/storage.h index a22396891..02b6e7e6c 100644 --- a/ovsdb/storage.h +++ b/ovsdb/storage.h @@ -42,6 +42,7 @@ const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *); uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *); void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *, struct simap *usage); +char *ovsdb_storage_get_error(const struct ovsdb_storage *); void ovsdb_storage_run(struct ovsdb_storage *); void ovsdb_storage_wait(struct ovsdb_storage *); -- GitLab From 32ae689274032eecc38d569d47576f191d746c5d Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Thu, 3 Sep 2020 16:55:43 +0200 Subject: [PATCH 276/432] Remove manpages.mk from git. manpages.mk is generated at build-time using sodepends.py and so there is no need to keep it in git. Signed-off-by: Timothy Redaelli Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- .gitignore | 1 + Makefile.am | 5 +- manpages.mk | 266 ---------------------------------------------------- 3 files changed, 4 insertions(+), 268 deletions(-) delete mode 100644 manpages.mk diff --git a/.gitignore b/.gitignore index 2ac9cdac7..f1cdcf124 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ /docs-check /install-sh /libtool +/manpages.mk /manpage-check /missing /missing-distfiles diff --git a/Makefile.am b/Makefile.am index 27ef9e4b4..6981b943e 100644 --- a/Makefile.am +++ b/Makefile.am @@ -412,8 +412,8 @@ flake8-check: $(FLAKE8_PYFILES) endif CLEANFILES += flake8-check -include $(srcdir)/manpages.mk -$(srcdir)/manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.py +-include manpages.mk +manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.py @PYTHONPATH=$$PYTHONPATH$(psep)$(srcdir)/python $(PYTHON3) $(srcdir)/build-aux/sodepends.py -I. -I$(srcdir) $(MAN_ROOTS) >$(@F).tmp @if cmp -s $(@F).tmp $@; then \ touch $@; \ @@ -421,6 +421,7 @@ $(srcdir)/manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.p else \ mv $(@F).tmp $@; \ fi +CLEANFILES += manpages.mk CLEANFILES += manpage-dep-check if VSTUDIO_DDK diff --git a/manpages.mk b/manpages.mk deleted file mode 100644 index dc201484c..000000000 --- a/manpages.mk +++ /dev/null @@ -1,266 +0,0 @@ -# Generated automatically -- do not modify! -*- buffer-read-only: t -*- - -ovsdb/ovsdb-client.1: \ - ovsdb/ovsdb-client.1.in \ - lib/common-syn.man \ - lib/common.man \ - lib/daemon-syn.man \ - lib/daemon.man \ - lib/ovs.tmac \ - lib/ssl-bootstrap-syn.man \ - lib/ssl-bootstrap.man \ - lib/ssl-connect-syn.man \ - lib/ssl-connect.man \ - lib/ssl-syn.man \ - lib/ssl.man \ - lib/table.man \ - lib/vlog-syn.man \ - lib/vlog.man \ - ovsdb/ovsdb-schemas.man -ovsdb/ovsdb-client.1.in: -lib/common-syn.man: -lib/common.man: -lib/daemon-syn.man: -lib/daemon.man: -lib/ovs.tmac: -lib/ssl-bootstrap-syn.man: -lib/ssl-bootstrap.man: -lib/ssl-connect-syn.man: -lib/ssl-connect.man: -lib/ssl-syn.man: -lib/ssl.man: -lib/table.man: -lib/vlog-syn.man: -lib/vlog.man: -ovsdb/ovsdb-schemas.man: - -ovsdb/ovsdb-server.1: \ - ovsdb/ovsdb-server.1.in \ - lib/common-syn.man \ - lib/common.man \ - lib/coverage-unixctl.man \ - lib/daemon-syn.man \ - lib/daemon.man \ - lib/memory-unixctl.man \ - lib/ovs.tmac \ - lib/service-syn.man \ - lib/service.man \ - lib/ssl-bootstrap-syn.man \ - lib/ssl-bootstrap.man \ - lib/ssl-connect-syn.man \ - lib/ssl-connect.man \ - lib/ssl-peer-ca-cert-syn.man \ - lib/ssl-peer-ca-cert.man \ - lib/ssl-syn.man \ - lib/ssl.man \ - lib/unixctl-syn.man \ - lib/unixctl.man \ - lib/vlog-syn.man \ - lib/vlog-unixctl.man \ - lib/vlog.man -ovsdb/ovsdb-server.1.in: -lib/common-syn.man: -lib/common.man: -lib/coverage-unixctl.man: -lib/daemon-syn.man: -lib/daemon.man: -lib/memory-unixctl.man: -lib/ovs.tmac: -lib/service-syn.man: -lib/service.man: -lib/ssl-bootstrap-syn.man: -lib/ssl-bootstrap.man: -lib/ssl-connect-syn.man: -lib/ssl-connect.man: -lib/ssl-peer-ca-cert-syn.man: -lib/ssl-peer-ca-cert.man: -lib/ssl-syn.man: -lib/ssl.man: -lib/unixctl-syn.man: -lib/unixctl.man: -lib/vlog-syn.man: -lib/vlog-unixctl.man: -lib/vlog.man: - -ovsdb/ovsdb-tool.1: \ - ovsdb/ovsdb-tool.1.in \ - lib/common-syn.man \ - lib/common.man \ - lib/ovs.tmac \ - lib/vlog-syn.man \ - lib/vlog.man \ - ovsdb/ovsdb-schemas.man -ovsdb/ovsdb-tool.1.in: -lib/common-syn.man: -lib/common.man: -lib/ovs.tmac: -lib/vlog-syn.man: -lib/vlog.man: -ovsdb/ovsdb-schemas.man: - -utilities/bugtool/ovs-bugtool.8: \ - utilities/bugtool/ovs-bugtool.8.in \ - lib/ovs.tmac -utilities/bugtool/ovs-bugtool.8.in: -lib/ovs.tmac: - - -utilities/ovs-dpctl-top.8: \ - utilities/ovs-dpctl-top.8.in \ - lib/ovs.tmac -utilities/ovs-dpctl-top.8.in: -lib/ovs.tmac: - -utilities/ovs-dpctl.8: \ - utilities/ovs-dpctl.8.in \ - lib/common.man \ - lib/dpctl.man \ - lib/ovs.tmac \ - lib/vlog.man -utilities/ovs-dpctl.8.in: -lib/common.man: -lib/dpctl.man: -lib/ovs.tmac: -lib/vlog.man: - -utilities/ovs-ofctl.8: \ - utilities/ovs-ofctl.8.in \ - lib/colors.man \ - lib/common.man \ - lib/daemon.man \ - lib/ofp-version.man \ - lib/ovs.tmac \ - lib/ssl.man \ - lib/unixctl.man \ - lib/vconn-active.man \ - lib/vlog.man -utilities/ovs-ofctl.8.in: -lib/colors.man: -lib/common.man: -lib/daemon.man: -lib/ofp-version.man: -lib/ovs.tmac: -lib/ssl.man: -lib/unixctl.man: -lib/vconn-active.man: -lib/vlog.man: - -utilities/ovs-pcap.1: \ - utilities/ovs-pcap.1.in \ - lib/common-syn.man \ - lib/common.man \ - lib/ovs.tmac -utilities/ovs-pcap.1.in: -lib/common-syn.man: -lib/common.man: -lib/ovs.tmac: - -lib/ovs.tmac: - -utilities/ovs-testcontroller.8: \ - utilities/ovs-testcontroller.8.in \ - lib/common.man \ - lib/daemon.man \ - lib/ofp-version.man \ - lib/ovs.tmac \ - lib/ssl-peer-ca-cert.man \ - lib/ssl.man \ - lib/unixctl.man \ - lib/vconn-active.man \ - lib/vconn-passive.man \ - lib/vlog.man -utilities/ovs-testcontroller.8.in: -lib/common.man: -lib/daemon.man: -lib/ofp-version.man: -lib/ovs.tmac: -lib/ssl-peer-ca-cert.man: -lib/ssl.man: -lib/unixctl.man: -lib/vconn-active.man: -lib/vconn-passive.man: -lib/vlog.man: - -utilities/ovs-vsctl.8: \ - utilities/ovs-vsctl.8.in \ - lib/common.man \ - lib/db-ctl-base.man \ - lib/ovs.tmac \ - lib/ssl-bootstrap.man \ - lib/ssl-peer-ca-cert.man \ - lib/ssl.man \ - lib/table.man \ - lib/vconn-active.man \ - lib/vconn-passive.man \ - lib/vlog.man -utilities/ovs-vsctl.8.in: -lib/common.man: -lib/db-ctl-base.man: -lib/ovs.tmac: -lib/ssl-bootstrap.man: -lib/ssl-peer-ca-cert.man: -lib/ssl.man: -lib/table.man: -lib/vconn-active.man: -lib/vconn-passive.man: -lib/vlog.man: - -vswitchd/ovs-vswitchd.8: \ - vswitchd/ovs-vswitchd.8.in \ - lib/common.man \ - lib/coverage-unixctl.man \ - lib/daemon.man \ - lib/dpctl.man \ - lib/dpif-netdev-unixctl.man \ - lib/memory-unixctl.man \ - lib/netdev-dpdk-unixctl.man \ - lib/ovs.tmac \ - lib/service.man \ - lib/ssl-bootstrap.man \ - lib/ssl-peer-ca-cert.man \ - lib/ssl.man \ - lib/unixctl.man \ - lib/vlog-unixctl.man \ - lib/vlog.man \ - ofproto/ofproto-dpif-unixctl.man \ - ofproto/ofproto-tnl-unixctl.man \ - ofproto/ofproto-unixctl.man -vswitchd/ovs-vswitchd.8.in: -lib/common.man: -lib/coverage-unixctl.man: -lib/daemon.man: -lib/dpctl.man: -lib/dpif-netdev-unixctl.man: -lib/memory-unixctl.man: -lib/netdev-dpdk-unixctl.man: -lib/ovs.tmac: -lib/service.man: -lib/ssl-bootstrap.man: -lib/ssl-peer-ca-cert.man: -lib/ssl.man: -lib/unixctl.man: -lib/vlog-unixctl.man: -lib/vlog.man: -ofproto/ofproto-dpif-unixctl.man: -ofproto/ofproto-tnl-unixctl.man: -ofproto/ofproto-unixctl.man: - -vtep/vtep-ctl.8: \ - vtep/vtep-ctl.8.in \ - lib/common.man \ - lib/db-ctl-base.man \ - lib/ovs.tmac \ - lib/ssl-bootstrap.man \ - lib/ssl-peer-ca-cert.man \ - lib/ssl.man \ - lib/table.man \ - lib/vlog.man -vtep/vtep-ctl.8.in: -lib/common.man: -lib/db-ctl-base.man: -lib/ovs.tmac: -lib/ssl-bootstrap.man: -lib/ssl-peer-ca-cert.man: -lib/ssl.man: -lib/table.man: -lib/vlog.man: -- GitLab From 8d56db08831af84a5a37e16f3df24e6e22901dcd Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Thu, 3 Sep 2020 10:02:46 -0700 Subject: [PATCH 277/432] selinux: Add missing permissions for ovs-kmod-ctl. On RHEL 8, a SELinux policy is missing when ovs-kmod-ctl use modprobe to load kernel modules. This patch adds the missing permissions based on /var/log/audit/audit.log Example log of the AVC violations: type=AVC msg=audit(1599075387.136:65): avc: denied { read } for pid=1472 comm="modprobe" name="modules.alias.bin" dev="dm-0" ino=586629 scontext=system_u:system_r:openvswitch_load_module_t:s0 tcontext=system_u:object_r:modules_dep_t:s0 tclass=file permissive=0 type=AVC msg=audit(1599085253.148:45): avc: denied { open } for pid=1355 comm="modprobe" path="/usr/lib/modules/4.18.0-193.el8.x86_64/modules.dep.bin" dev="dm-0" ino=624258 scontext=system_u:system_r:openvswitch_load_module_t:s0 tcontext=unconfined_u:object_r:modules_dep_t:s0 tclass=file permissive=0 VMWare-BZ: #2633569 Signed-off-by: Yi-Hung Wei Acked-by: Greg Rose Acked-by: Ansis Atteka Signed-off-by: Ilya Maximets --- selinux/openvswitch-custom.te.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/selinux/openvswitch-custom.te.in b/selinux/openvswitch-custom.te.in index 2adaf231f..beb0ab0d6 100644 --- a/selinux/openvswitch-custom.te.in +++ b/selinux/openvswitch-custom.te.in @@ -19,6 +19,7 @@ require { type kernel_t; type hostname_exec_t; type modules_conf_t; + type modules_dep_t; type modules_object_t; type passwd_file_t; type plymouth_exec_t; @@ -121,6 +122,7 @@ allow openvswitch_load_module_t insmod_exec_t:file { execute execute_no_trans ge allow openvswitch_load_module_t kernel_t:system module_request; allow openvswitch_load_module_t modules_conf_t:dir { getattr open read search }; allow openvswitch_load_module_t modules_conf_t:file { getattr open read }; +allow openvswitch_load_module_t modules_dep_t:file { getattr map open read }; allow openvswitch_load_module_t modules_object_t:file { map getattr open read }; allow openvswitch_load_module_t modules_object_t:dir { getattr open read search }; allow openvswitch_load_module_t openvswitch_load_module_exec_t:file { entrypoint }; -- GitLab From 8dc3911bb190f59276aac1e710b69b6411e00884 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 15 Sep 2020 21:09:57 +0200 Subject: [PATCH 278/432] cirrus: Use FreeBSD 11.4. Support cycle of 11.3 ends in the end of September 2020, so we need to upgrade. Signed-off-by: Ilya Maximets Acked-by: Aaron Conole --- .cirrus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.cirrus.yml b/.cirrus.yml index 9428164ee..263c2cd7e 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -3,7 +3,7 @@ freebsd_build_task: freebsd_instance: matrix: image_family: freebsd-12-1-snap - image_family: freebsd-11-3-snap + image_family: freebsd-11-4-snap cpu: 4 memory: 8G -- GitLab From b424becaac58d8cb08fb19ea839be6807d3ed57f Mon Sep 17 00:00:00 2001 From: Boleslaw Tokarski Date: Wed, 8 Apr 2020 11:47:20 +0100 Subject: [PATCH 279/432] ipsec: Fix Strongswan configuration syntax. Strongswan seems to have .opt files in the source tree with the dotted option syntax. It seems that up until version 5.6, the syntax was also accepted by Strongswan. However, the .opt files are converted to .conf files during Strongswan build, and the dotted syntax is no longer accepted by Strongswan (tested on 5.8.2). The effect was that the ovs ipsec monitor fails to start Strongswan, since that complains with: /etc/strongswan.d/ovs.conf:4: syntax error, unexpected ., expecting : or '{' or '=' [.] This commit fixes the configuration file provided to Strongswan to .conf syntax. Signed-off-by: Boleslaw Tokarski Signed-off-by: Ilya Maximets --- ipsec/ovs-monitor-ipsec.in | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in index 1c185bbd8..b84608a55 100755 --- a/ipsec/ovs-monitor-ipsec.in +++ b/ipsec/ovs-monitor-ipsec.in @@ -145,10 +145,18 @@ class StrongSwanHelper(object): """This class does StrongSwan specific configurations.""" STRONGSWAN_CONF = """%s -charon.plugins.kernel-netlink.set_proto_port_transport_sa = yes -charon.plugins.kernel-netlink.xfrm_ack_expires = 10 -charon.load_modular = yes -charon.plugins.gcm.load = yes +charon { + plugins { + kernel-netlink { + set_proto_port_transport_sa = yes + xfrm_ack_expires = 10 + } + gcm { + load = yes + } + } + load_modular = yes +} """ % (FILE_HEADER) CONF_HEADER = """%s -- GitLab From 0026d9dcb0865e8ba48b57429da25ace0df43d41 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 16 Sep 2020 14:29:56 +0200 Subject: [PATCH 280/432] AUTHORS: Add Boleslaw Tokarski. Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index cb26e0197..ba47c9c2c 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -80,6 +80,7 @@ Bert Vermeulen bert@biot.com Bhanuprakash Bodireddy bhanuprakash.bodireddy@intel.com Billy O'Mahony billy.o.mahony@intel.com Binbin Xu xu.binbin1@zte.com.cn +Boleslaw Tokarski boleslaw.tokarski@jollamobile.com Brian Haley haleyb.dev@gmail.com Brian Kruger bkruger+ovsdev@gmail.com Bruce Davie bdavie@vmware.com -- GitLab From 4d6e7ddacbf6c14ec3ddd37eff4953e1080d45df Mon Sep 17 00:00:00 2001 From: Gurucharan Shetty Date: Fri, 18 Sep 2020 14:43:32 -0700 Subject: [PATCH 281/432] ovs-lib: Handle daemon segfaults during exit. Currently, we terminate a daemon by trying "ovs-appctl exit", "SIGTERM" and finally "SIGKILL". But the logic fails if during "ovs-appctl exit", the daemon crashes (segfaults). The monitor will automatically restart the daemon with a new pid. The current logic of checking the non-existance of old pid succeeds and we proceed with the assumption that the daemon is dead. This is a problem during OVS upgrades as we will continue to run the older version of OVS. With this commit, we take care of this situation. If there is a segfault, the pidfile is not deleted. So, we wait a little to give time for the monitor to restart the daemon (which is usually instantaneous) and then re-read the pidfile. VMware-BZ: #2633995 Signed-off-by: Gurucharan Shetty Acked-by: Yi-Hung Wei --- utilities/ovs-lib.in | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in index d646b444a..f7e975674 100644 --- a/utilities/ovs-lib.in +++ b/utilities/ovs-lib.in @@ -255,20 +255,36 @@ stop_daemon () { if version_geq "$version" "2.5.90"; then actions="$graceful $actions" fi + actiontype="" for action in $actions; do if pid_exists "$pid" >/dev/null 2>&1; then :; else - return 0 + # pid does not exist. + if [ -n "$actiontype" ]; then + return 0 + fi + # But, does the file exist? We may have had a daemon + # segfault with `ovs-appctl exit`. Check one more time + # before deciding that the daemon is dead. + [ -e "$rundir/$1.pid" ] && sleep 2 && pid=`cat "$rundir/$1.pid"` 2>/dev/null + if pid_exists "$pid" >/dev/null 2>&1; then :; else + return 0 + fi fi case $action in EXIT) action "Exiting $1 ($pid)" \ ${bindir}/ovs-appctl -T 1 -t $rundir/$1.$pid.ctl exit $2 + # The above command could have resulted in delayed + # daemon segfault. And if a monitor is running, it + # would restart the daemon giving it a new pid. ;; TERM) action "Killing $1 ($pid)" kill $pid + actiontype="force" ;; KILL) action "Killing $1 ($pid) with SIGKILL" kill -9 $pid + actiontype="force" ;; FAIL) log_failure_msg "Killing $1 ($pid) failed" -- GitLab From 1f185ae633ecbf87645c3c09d4e81a3c6a24d13b Mon Sep 17 00:00:00 2001 From: Yi Li Date: Tue, 22 Sep 2020 10:25:30 +0800 Subject: [PATCH 282/432] Remove duplicate include file Found by checkincludes.pl Signed-off-by: Yi Li Signed-off-by: Alin Gabriel Serdean --- AUTHORS.rst | 1 + datapath-windows/ovsext/Vxlan.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index ba47c9c2c..b47806bf7 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -411,6 +411,7 @@ xu rong xu.rong@zte.com.cn YAMAMOTO Takashi yamamoto@midokura.com Yanqin Wei Yanqin.Wei@arm.com Yasuhito Takamiya yasuhito@gmail.com +Yi Li yili@winhong.com Yi Yang yangyi01@inspur.com Yi-Hung Wei yihung.wei@gmail.com Yifeng Sun pkusunyifeng@gmail.com diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c index 09809d397..04df9f6c9 100644 --- a/datapath-windows/ovsext/Vxlan.c +++ b/datapath-windows/ovsext/Vxlan.c @@ -19,7 +19,6 @@ #include "Atomic.h" #include "Debug.h" #include "Flow.h" -#include "Flow.h" #include "IpHelper.h" #include "NetProto.h" #include "Offload.h" -- GitLab From 66b6791c324ab6133d456b565e466724acec219b Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Tue, 22 Sep 2020 13:01:45 +0300 Subject: [PATCH 283/432] windows: Document how to generate the Windows installer This patch adds information on how to generate the Windows installer which can be used to easily deploy the userspace binaries, kernel module and create services on new environments. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- Documentation/intro/install/windows.rst | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/intro/install/windows.rst b/Documentation/intro/install/windows.rst index 394572f00..31cef13b8 100644 --- a/Documentation/intro/install/windows.rst +++ b/Documentation/intro/install/windows.rst @@ -71,7 +71,10 @@ The following explains the steps in some detail. You will need at least Visual Studio 2013 (update 4) to compile userspace binaries. In addition to that, if you want to compile the kernel module you - will also need to install Windows Driver Kit (WDK) 8.1 Update. + will also need to install Windows Driver Kit (WDK) 8.1 Update or later. + To generate the Windows installer you need + `WiX Toolset `__ and also be able to build the + kernel module. It is important to get the Visual Studio related environment variables and to have the $PATH inside the bash to point to the proper compiler and linker. @@ -319,6 +322,22 @@ An alternative way to do the same is to run the following command: seconds has been observed for the change to be reflected in the UI. This is not a bug in Open vSwitch. +Generate the Windows installer +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To generate the Windows installler run the following command from the top +source directory: + +:: + + $ make windows_installer + +.. note:: + + This will generate the Windows installer in the following location (relative + to the top source directory): + windows/ovs-windows-installer/bin/Release/OpenvSwitch.msi + Starting -------- @@ -797,5 +816,4 @@ TODO * Investigate and add the feature to provide QoS. -* Sign the driver & create an MSI for installing the different Open vSwitch - components on Windows. +* Sign the driver. -- GitLab From d4bd63f477e3f15fbf55650139b5881fd4003b72 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Tue, 22 Sep 2020 13:03:06 +0300 Subject: [PATCH 284/432] documentation, windows: Fix line endings at 79 characters Found by inspection. Signed-off-by: Alin Gabriel Serdean Acked-by: Greg Rose Acked-by: Ilya Maximets --- Documentation/intro/install/windows.rst | 8 ++++---- Documentation/topics/windows.rst | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Documentation/intro/install/windows.rst b/Documentation/intro/install/windows.rst index 31cef13b8..61582f791 100644 --- a/Documentation/intro/install/windows.rst +++ b/Documentation/intro/install/windows.rst @@ -804,10 +804,10 @@ Windows CI Service ------------------ `AppVeyor `__ provides a free Windows autobuild service for -open source projects. Open vSwitch has integration with AppVeyor for continuous -build. A developer can build test his changes for Windows by logging into -appveyor.com using a github account, creating a new project by linking it to -his development repository in github and triggering a new build. +open source projects. Open vSwitch has integration with AppVeyor for +continuous build. A developer can build test his changes for Windows by +logging into appveyor.com using a github account, creating a new project by +linking it to his development repository in github and triggering a new build. TODO ---- diff --git a/Documentation/topics/windows.rst b/Documentation/topics/windows.rst index 3a103b4e8..be6e2861e 100644 --- a/Documentation/topics/windows.rst +++ b/Documentation/topics/windows.rst @@ -253,9 +253,9 @@ Netlink Message Parser ~~~~~~~~~~~~~~~~~~~~~~ The communication between OVS userspace and OVS kernel datapath is in the form -of Netlink messages [1]_, [8]_. More details about this are provided below. In the -kernel, a full fledged netlink message parser has been implemented along the -lines of the netlink message parser in OVS userspace. In fact, a lot of the +of Netlink messages [1]_, [8]_. More details about this are provided below. In +the kernel, a full fledged netlink message parser has been implemented along +the lines of the netlink message parser in OVS userspace. In fact, a lot of the code is ported code. On the lines of ``struct ofpbuf`` in OVS userspace, a managed buffer has been -- GitLab From c893685f532ca24d568b73f66914ed18bcb73bd8 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Wed, 23 Sep 2020 13:39:55 +0300 Subject: [PATCH 285/432] windows, tests: Strip EOL characters when passing them to tasklist When running OVSDB cluster tests on Windows not all the ovsdb processes are terminated. Strip carriage return and newline of the arguments passed to the kill command because they will cause problems when passing them to tasklist and taskkill. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- tests/ovs-macros.at | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at index b1f666f4e..66545da57 100644 --- a/tests/ovs-macros.at +++ b/tests/ovs-macros.at @@ -106,6 +106,7 @@ if test "$IS_WIN32" = "yes"; then signal= retval=0 for arg; do + arg=$(echo $arg | tr -d '\n\r') case $arg in -*) signal=$arg ;; [1-9][0-9]*) -- GitLab From a52c073b2fdf35e852cfbe8edc36b4d6937aafb1 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:16:13 +0300 Subject: [PATCH 286/432] windows: Remove unused variable Found by inspection. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- windows/automake.mk | 1 - 1 file changed, 1 deletion(-) diff --git a/windows/automake.mk b/windows/automake.mk index 80dca1467..49c8985fd 100644 --- a/windows/automake.mk +++ b/windows/automake.mk @@ -12,7 +12,6 @@ # License for the specific language governing permissions and limitations # under the License. -PTHREAD_TEMP_DIR=`echo "$(PTHREAD_LDFLAGS)" | sed 's|^.\(.*\).$:\1||'` windows_installer: all #Userspace files needed for the installer cp -f $(top_srcdir)/datapath-windows/misc/OVS.psm1 windows/ovs-windows-installer/Services/OVS.psm1 -- GitLab From 809e13ed21a8e4692734bf63c99ddaf53ff87b1f Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:17:23 +0300 Subject: [PATCH 287/432] windows: Add default value for VSTUDIO_CONFIG VSTUDIO_CONFIG is used when generating the windows installer. If the parameter passed to configure `--with-vstudiotarget` is not specified to configure we default it to `Default`. Fixes bug: vstudiotarget/vstudiotargetver should be available only on Windows. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- m4/openvswitch.m4 | 90 +++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index 6fe79297e..907e6b060 100644 --- a/m4/openvswitch.m4 +++ b/m4/openvswitch.m4 @@ -146,51 +146,51 @@ dnl OVS_CHECK_WINDOWS dnl dnl Configure Visual Studio solution build AC_DEFUN([OVS_CHECK_VISUAL_STUDIO_DDK], [ -AC_ARG_WITH([vstudiotarget], - [AS_HELP_STRING([--with-vstudiotarget=target_type], - [Target type: Debug/Release])], - [ - case "$withval" in - "Release") ;; - "Debug") ;; - *) AC_MSG_ERROR([No valid Visual Studio configuration found]) ;; - esac - - VSTUDIO_CONFIG=$withval - ], [ - VSTUDIO_CONFIG= - ] - ) - - AC_SUBST([VSTUDIO_CONFIG]) - -AC_ARG_WITH([vstudiotargetver], - [AS_HELP_STRING([--with-vstudiotargetver=target_ver1,target_ver2], - [Target versions: Win8,Win8.1,Win10])], - [ - targetver=`echo "$withval" | tr -s , ' ' ` - for ver in $targetver; do - case "$ver" in - "Win8") VSTUDIO_WIN8=true ;; - "Win8.1") VSTUDIO_WIN8_1=true ;; - "Win10") VSTUDIO_WIN10=true ;; - *) AC_MSG_ERROR([No valid Visual Studio target version found]) ;; - esac - done - - ], [ - VSTUDIO_WIN8=true - VSTUDIO_WIN8_1=true - VSTUDIO_WIN10=true - ] - ) - - AM_CONDITIONAL([VSTUDIO_WIN8], [test -n "$VSTUDIO_WIN8"]) - AM_CONDITIONAL([VSTUDIO_WIN8_1], [test -n "$VSTUDIO_WIN8_1"]) - AM_CONDITIONAL([VSTUDIO_WIN10], [test -n "$VSTUDIO_WIN10"]) - - AC_DEFINE([VSTUDIO_DDK], [1], [System uses the Visual Studio build target.]) - AM_CONDITIONAL([VSTUDIO_DDK], [test -n "$VSTUDIO_CONFIG"]) +if test "$WIN32" = yes; then + AC_ARG_WITH([vstudiotarget], + [AS_HELP_STRING([--with-vstudiotarget=target_type], + [Target type: Debug/Release])], + [ + case "$withval" in + "Release") ;; + "Debug") ;; + *) AC_MSG_ERROR([No valid Visual Studio configuration found]) ;; + esac + + VSTUDIO_CONFIG=$withval + ], [ + VSTUDIO_CONFIG="Debug" + ] + ) + + AC_SUBST([VSTUDIO_CONFIG]) + + AC_ARG_WITH([vstudiotargetver], + [AS_HELP_STRING([--with-vstudiotargetver=target_ver1,target_ver2], + [Target versions: Win8,Win8.1,Win10])], + [ + targetver=`echo "$withval" | tr -s , ' ' ` + for ver in $targetver; do + case "$ver" in + "Win8") VSTUDIO_WIN8=true ;; + "Win8.1") VSTUDIO_WIN8_1=true ;; + "Win10") VSTUDIO_WIN10=true ;; + *) AC_MSG_ERROR([No valid Visual Studio target version found]) ;; + esac + done + + ], [ + VSTUDIO_WIN8=true + VSTUDIO_WIN8_1=true + VSTUDIO_WIN10=true + ] + ) + AC_DEFINE([VSTUDIO_DDK], [1], [System uses the Visual Studio build target.]) +fi +AM_CONDITIONAL([VSTUDIO_WIN8], [test -n "$VSTUDIO_WIN8"]) +AM_CONDITIONAL([VSTUDIO_WIN8_1], [test -n "$VSTUDIO_WIN8_1"]) +AM_CONDITIONAL([VSTUDIO_WIN10], [test -n "$VSTUDIO_WIN10"]) +AM_CONDITIONAL([VSTUDIO_DDK], [test -n "$VSTUDIO_CONFIG"]) ]) dnl Checks for Netlink support. -- GitLab From 3ea2cfd95a279a294a0fcfac9add069fa0e31cc8 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:18:39 +0300 Subject: [PATCH 288/432] datapath_windows: Add datapath_windows target It is useful to build the latest supported version of the driver using the `make` command. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- datapath-windows/automake.mk | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index b8cf5dd95..60b3d6033 100644 --- a/datapath-windows/automake.mk +++ b/datapath-windows/automake.mk @@ -90,3 +90,7 @@ datapath_windows_analyze: all MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Analyze" MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8.1Analyze" MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8Analyze" + +datapath_windows: all + MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Debug" + MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Release" -- GitLab From aaa1df71fd2f068f16cdfdee9433c450fe4491b0 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:19:29 +0300 Subject: [PATCH 289/432] windows, documentation: Recommend latest VS and WDK version Found by inspection. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- Documentation/intro/install/windows.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/intro/install/windows.rst b/Documentation/intro/install/windows.rst index 61582f791..e91c2dbf7 100644 --- a/Documentation/intro/install/windows.rst +++ b/Documentation/intro/install/windows.rst @@ -76,6 +76,9 @@ The following explains the steps in some detail. `WiX Toolset `__ and also be able to build the kernel module. + We recommend using the latest Visual Studio version together with the latest + WDK installed. + It is important to get the Visual Studio related environment variables and to have the $PATH inside the bash to point to the proper compiler and linker. One easy way to achieve this for VS2013 is to get into the "VS2013 x86 Native -- GitLab From fd8e707f38d06014f7e17319a4a10179e6dc66e1 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:20:23 +0300 Subject: [PATCH 290/432] windows, installer: Bundle latest runtime version Until now we were bundling MSVC120 x86 runtime. This patch changes it too the latest version and also add the 64 bit version of it. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- windows/automake.mk | 3 ++- windows/ovs-windows-installer/Product.wxs | 11 ++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/windows/automake.mk b/windows/automake.mk index 49c8985fd..ce1fac66c 100644 --- a/windows/automake.mk +++ b/windows/automake.mk @@ -26,7 +26,8 @@ windows_installer: all cp -f $(top_srcdir)/ovsdb/ovsdb-tool.pdb windows/ovs-windows-installer/Symbols/ #Third party files needed by the installer cp -f $(PTHREAD_WIN32_DIR_DLL_WIN_FORM)/*.dll windows/ovs-windows-installer/Binaries/ - cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC120_CRT_x86.msm" windows/ovs-windows-installer/Redist/Microsoft_VC120_CRT_x86.msm + cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC140_CRT_x86.msm" windows/ovs-windows-installer/Redist/Microsoft_VC140_CRT_x86.msm + cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC140_CRT_x64.msm" windows/ovs-windows-installer/Redist/Microsoft_VC140_CRT_x64.msm #Forwarding extension files needed for the installer cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8/ovsext.cat cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8/ovsext.inf diff --git a/windows/ovs-windows-installer/Product.wxs b/windows/ovs-windows-installer/Product.wxs index ea1bc6896..d722fe927 100644 --- a/windows/ovs-windows-installer/Product.wxs +++ b/windows/ovs-windows-installer/Product.wxs @@ -51,9 +51,13 @@ - - + + + + @@ -260,7 +264,8 @@ - + + -- GitLab From 8596b131c34b41bfe30c74047087b68ae5c071a1 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:22:38 +0300 Subject: [PATCH 291/432] windows: Update build with latest pthread project pthreads-win32 has moved too PThreads4W. This patch updates the build steps, CI (appveyor) and documentation. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- Documentation/intro/install/windows.rst | 10 +++++----- appveyor.yml | 18 +++++++----------- m4/openvswitch.m4 | 19 ++++--------------- 3 files changed, 16 insertions(+), 31 deletions(-) diff --git a/Documentation/intro/install/windows.rst b/Documentation/intro/install/windows.rst index e91c2dbf7..79d4c6261 100644 --- a/Documentation/intro/install/windows.rst +++ b/Documentation/intro/install/windows.rst @@ -98,13 +98,13 @@ The following explains the steps in some detail. Visual studio's linker is used. You should also see a 'which sort' report ``/bin/sort.exe``. -- pthreads-win32 +- PThreads4W - For pthread support, install the library, dll and includes of pthreads-win32 + For pthread support, install the library, dll and includes of PThreads4W project from `sourceware - `__ to a - directory (e.g.: ``C:/pthread``). You should add the pthread-win32's dll path - (e.g.: ``C:\pthread\dll\x86``) to the Windows' PATH environment variable. + `__ to a directory + (e.g.: ``C:/pthread``). You should add the PThreads4W's dll path + (e.g.: ``C:\pthread\bin``) to the Windows' PATH environment variable. - OpenSSL diff --git a/appveyor.yml b/appveyor.yml index fa6754ce2..6e2b2e9e2 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,5 @@ version: 1.0.{build} +image: Visual Studio 2019 branches: only: - master @@ -7,16 +8,8 @@ init: - ps: $env:PATH ="C:\Python37;"+$env:PATH - ps: New-Item -Type HardLink -Path "C:\Python37\python3.exe" -Value "C:\Python37\python.exe" - ps: >- - mkdir C:\pthreads-win32 - mkdir C:\ovs-build-downloads - $source = "ftp://sourceware.org/pub/pthreads-win32/pthreads-w32-2-9-1-release.zip" - - $destination = "C:\pthreads-win32\pthreads-win32.zip" - - Invoke-WebRequest $source -OutFile $destination - $source = "https://slproweb.com/download/Win32OpenSSL-1_0_2t.exe" $destination = "C:\ovs-build-downloads\Win32OpenSSL-1_0_2t.exe" @@ -35,14 +28,17 @@ init: cd C:\openvswitch + git clone https://git.code.sf.net/p/pthreads4w/code c:\pthreads4w-code + python3 -m pip install pypiwin32 --disable-pip-version-check build_script: -- '"C:\Program Files (x86)\Microsoft Visual Studio 12.0\Common7\Tools\VsDevCmd"' +- '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"' - C:\MinGW\msys\1.0\bin\bash -lc "echo \"C:/MinGW /mingw\" > /etc/fstab" -- C:\MinGW\msys\1.0\bin\bash -lc "cp /c/pthreads-win32/Pre-built.2/dll/x86/*.dll /c/openvswitch/." - C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe" +# Build pthreads +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/pthreads4w-code && nmake all install" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -lShlwapi -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -lShlwapi -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=c:/PTHREADS-BUILT/ --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make" - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make datapath_windows_analyze" diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index 907e6b060..244ea0fba 100644 --- a/m4/openvswitch.m4 +++ b/m4/openvswitch.m4 @@ -95,23 +95,12 @@ AC_DEFUN([OVS_CHECK_WIN32], AC_MSG_ERROR([Invalid --with-pthread value]) ;; *) - if (cl) 2>&1 | grep 'x64' >/dev/null 2>&1; then - cl_cv_x64=yes - else - cl_cv_x64=no - fi - if test "$cl_cv_x64" = yes; then - PTHREAD_WIN32_DIR=$withval/lib/x64 - PTHREAD_WIN32_DIR_DLL=/$(echo ${withval} | ${SED} -e 's/://')/dll/x64 - PTHREAD_WIN32_DIR_DLL_WIN_FORM=$withval/dll/x64 - else - PTHREAD_WIN32_DIR=$withval/lib/x86 - PTHREAD_WIN32_DIR_DLL=/$(echo ${withval} | ${SED} -e 's/://')/dll/x86 - PTHREAD_WIN32_DIR_DLL_WIN_FORM=$withval/dll/x86 - fi + PTHREAD_WIN32_DIR=$withval/lib + PTHREAD_WIN32_DIR_DLL=/$(echo ${withval} | ${SED} -e 's/://')/bin + PTHREAD_WIN32_DIR_DLL_WIN_FORM=$withval/bin PTHREAD_INCLUDES=-I$withval/include PTHREAD_LDFLAGS=-L$PTHREAD_WIN32_DIR - PTHREAD_LIBS="-lpthreadVC2" + PTHREAD_LIBS="-lpthreadVC3" AC_SUBST([PTHREAD_WIN32_DIR_DLL_WIN_FORM]) AC_SUBST([PTHREAD_WIN32_DIR_DLL]) AC_SUBST([PTHREAD_INCLUDES]) -- GitLab From bc357f0dd10cf35b4318930c2a06b363c852f101 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:23:05 +0300 Subject: [PATCH 292/432] windows, installer: Bundle Windows 10 driver This patch bundles the Windows 10 driver family in the installer and also adds detection for the family. Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- windows/automake.mk | 5 ++++- windows/ovs-windows-installer/Driver/.gitignore | 1 + .../ovs-windows-installer/Driver/Win10/.gitignore | 3 +++ windows/ovs-windows-installer/Product.wxs | 12 +++++++++++- 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 windows/ovs-windows-installer/Driver/Win10/.gitignore diff --git a/windows/automake.mk b/windows/automake.mk index ce1fac66c..489343028 100644 --- a/windows/automake.mk +++ b/windows/automake.mk @@ -31,10 +31,13 @@ windows_installer: all #Forwarding extension files needed for the installer cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8/ovsext.cat cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8/ovsext.inf - cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/OVSExt.sys windows/ovs-windows-installer/Driver/Win8/OVSExt.sys + cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/OVSExt.sys windows/ovs-windows-installer/Driver/Win8/ovsext.sys cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8.1/ovsext.cat cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8.1/ovsext.inf cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.sys windows/ovs-windows-installer/Driver/Win8.1/ovsext.sys + cp -f $(top_srcdir)/datapath-windows/x64/Win10$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win10/ovsext.cat + cp -f $(top_srcdir)/datapath-windows/x64/Win10$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win10/ovsext.inf + cp -f $(top_srcdir)/datapath-windows/x64/Win10$(VSTUDIO_CONFIG)/package/ovsext.sys windows/ovs-windows-installer/Driver/Win10/ovsext.sys MSBuild.exe windows/ovs-windows-installer.sln //nologo //target:Build //p:Configuration="Release" //p:Version="$(PACKAGE_VERSION)" //p:Platform=$(PLATFORM) EXTRA_DIST += \ diff --git a/windows/ovs-windows-installer/Driver/.gitignore b/windows/ovs-windows-installer/Driver/.gitignore index e9994b37d..3de517016 100644 --- a/windows/ovs-windows-installer/Driver/.gitignore +++ b/windows/ovs-windows-installer/Driver/.gitignore @@ -3,3 +3,4 @@ !.gitignore !Win8 !Win8.1 +!Win10 diff --git a/windows/ovs-windows-installer/Driver/Win10/.gitignore b/windows/ovs-windows-installer/Driver/Win10/.gitignore new file mode 100644 index 000000000..cec9082b6 --- /dev/null +++ b/windows/ovs-windows-installer/Driver/Win10/.gitignore @@ -0,0 +1,3 @@ +* + +!.gitignore diff --git a/windows/ovs-windows-installer/Product.wxs b/windows/ovs-windows-installer/Product.wxs index d722fe927..61289da6b 100644 --- a/windows/ovs-windows-installer/Product.wxs +++ b/windows/ovs-windows-installer/Product.wxs @@ -36,6 +36,9 @@ + + + = 602)]]> @@ -65,6 +68,7 @@ Description="Installs the Open vSwitch Hyper-V switch extension driver." Display="expand"> + @@ -254,11 +258,17 @@ - + + + 10000]]> + + + + -- GitLab From 80e74da4fd8bfdaba92105560ce144b4b2d00e36 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Thu, 24 Sep 2020 09:24:03 +0300 Subject: [PATCH 293/432] appveyor: Bump outdated links and add artifacts Bump OpenSSL. Add release and debug configuration. Build and add Windows installer to generated artifacts. Build and zip prebuilt version. Co-authored-by: Yonggang Luo Signed-off-by: Yonggang Luo Co-authored-by: Jinjun Gao Signed-off-by: Jinjun Gao Signed-off-by: Alin Gabriel Serdean Acked-by: Ilya Maximets --- appveyor.yml | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 6e2b2e9e2..25c3f69fb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,26 +3,27 @@ image: Visual Studio 2019 branches: only: - master -clone_folder: C:\openvswitch +configuration: + - Debug + - Release +clone_folder: C:\openvswitch_compile init: - ps: $env:PATH ="C:\Python37;"+$env:PATH - ps: New-Item -Type HardLink -Path "C:\Python37\python3.exe" -Value "C:\Python37\python.exe" - ps: >- mkdir C:\ovs-build-downloads - $source = "https://slproweb.com/download/Win32OpenSSL-1_0_2t.exe" + mkdir C:\openvswitch\driver - $destination = "C:\ovs-build-downloads\Win32OpenSSL-1_0_2t.exe" + $source = "https://slproweb.com/download/Win64OpenSSL-1_0_2u.exe" - Invoke-WebRequest $source -OutFile $destination - - cd C:\pthreads-win32 + $destination = "C:\ovs-build-downloads\Win64OpenSSL-1_0_2u.exe" - 7z x C:\pthreads-win32\pthreads-win32.zip + Invoke-WebRequest $source -OutFile $destination cd C:\ovs-build-downloads - .\Win32OpenSSL-1_0_2t.exe /silent /verysilent /sp- /suppressmsgboxes + .\Win64OpenSSL-1_0_2u.exe /silent /verysilent /sp- /suppressmsgboxes Start-Sleep -s 30 @@ -32,13 +33,28 @@ init: python3 -m pip install pypiwin32 --disable-pip-version-check + cd C:\openvswitch_compile + build_script: - '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"' - C:\MinGW\msys\1.0\bin\bash -lc "echo \"C:/MinGW /mingw\" > /etc/fstab" - C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe" # Build pthreads - C:\MinGW\msys\1.0\bin\bash -lc "cd /c/pthreads4w-code && nmake all install" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -lShlwapi -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=c:/PTHREADS-BUILT/ --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make" -- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make datapath_windows_analyze" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && ./boot.sh" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -lShlwapi -liphlpapi -lwbemuuid -lole32 -loleaut32\" --prefix=C:/openvswitch/usr --localstatedir=C:/openvswitch/var --sysconfdir=C:/openvswitch/etc --with-pthread=c:/PTHREADS-BUILT/ --enable-ssl --with-openssl=C:/OpenSSL-Win64 --with-vstudiotarget=\"%CONFIGURATION%\"" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make -j 4" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make datapath_windows_analyze" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make install" +- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make windows_installer" +- cp C:\PTHREADS-BUILT\bin\pthreadVC3.dll C:\openvswitch\usr\bin +- cp C:\PTHREADS-BUILT\bin\pthreadVC3.dll C:\openvswitch\usr\sbin +- ps: cp C:\openvswitch_compile\datapath-windows\x64\Win10$env:CONFIGURATION\package\* C:\openvswitch\driver +- ps: cp C:\openvswitch_compile\datapath-windows\x64\Win10$env:CONFIGURATION\package.cer C:\openvswitch\driver +- ps: cp C:\openvswitch_compile\datapath-windows\misc\* C:\openvswitch\driver +- cp c:\openvswitch_compile\windows\ovs-windows-installer\bin\x64\Release\OpenvSwitch.msi c:\OpenvSwitch-%CONFIGURATION%.msi + +after_build: + - ps: 7z a C:\ovs-master-$env:CONFIGURATION.zip C:\openvswitch + - ps: Push-AppveyorArtifact C:\ovs-master-$env:CONFIGURATION.zip + - ps: Push-AppveyorArtifact C:\OpenvSwitch-$env:CONFIGURATION.msi -- GitLab From 86f624e48655b9427b872c174cdfc003ff0fa408 Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Wed, 2 Sep 2020 18:36:10 +0100 Subject: [PATCH 294/432] DPDK: Remove support for vhost-user zero-copy. Support for vhost-user dequeue zero-copy was deprecated in OVS 2.14 with the aim of removing it for OVS 2.15. OVS only supports zero copy for vhost client mode, as such it will cease to function due to DPDK commit [1] Also DPDK is set to remove zero-copy functionality in DPDK 20.11 as referenced by commit [2] As such remove support from OVS. [1] 715070ea10e6 ("vhost: prevent zero-copy with incompatible client mode") [2] d21003c9dafa ("doc: announce removal of vhost zero-copy dequeue") Signed-off-by: Ian Stokes Acked-by: Maxime Coquelin Acked-by: Ilya Maximets Acked-by: Kevin Traynor --- Documentation/topics/dpdk/vhost-user.rst | 76 +----------------------- NEWS | 2 + lib/netdev-dpdk.c | 25 -------- vswitchd/vswitch.xml | 11 ---- 4 files changed, 4 insertions(+), 110 deletions(-) diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index 4af738d11..75d3fc958 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -340,8 +340,8 @@ The default value is ``false``. fixes (like userfaulfd leak) was released in 3.0.1. DPDK Post-copy feature requires avoiding to populate the guest memory - (application must not call mlock* syscall). So enabling mlockall and - dequeue zero-copy features is mis-compatible with post-copy feature. + (application must not call mlock* syscall). So enabling mlockall is + incompatible with post-copy feature. Note that during migration of vhost-user device, PMD threads hang for the time of faulted pages download from source host. Transferring 1GB hugepage @@ -553,78 +553,6 @@ shown with:: $ ovs-vsctl get Interface dpdkvhostclient0 statistics:ovs_tx_retries -vhost-user Dequeue Zero Copy (experimental) -------------------------------------------- - -.. warning:: - - vhost-user Dequeue Zero Copy is deprecated in OVS and will be removed in - the next release. - -Normally when dequeuing a packet from a vHost User device, a memcpy operation -must be used to copy that packet from guest address space to host address -space. This memcpy can be removed by enabling dequeue zero-copy like so:: - - $ ovs-vsctl add-port br0 dpdkvhostuserclient0 -- set Interface \ - dpdkvhostuserclient0 type=dpdkvhostuserclient \ - options:vhost-server-path=/tmp/dpdkvhostclient0 \ - options:dq-zero-copy=true - -With this feature enabled, a reference (pointer) to the packet is passed to -the host, instead of a copy of the packet. Removing this memcpy can give a -performance improvement for some use cases, for example switching large packets -between different VMs. However additional packet loss may be observed. - -Note that the feature is disabled by default and must be explicitly enabled -by setting the ``dq-zero-copy`` option to ``true`` while specifying the -``vhost-server-path`` option as above. If you wish to split out the command -into multiple commands as below, ensure ``dq-zero-copy`` is set before -``vhost-server-path``:: - - $ ovs-vsctl set Interface dpdkvhostuserclient0 options:dq-zero-copy=true - $ ovs-vsctl set Interface dpdkvhostuserclient0 \ - options:vhost-server-path=/tmp/dpdkvhostclient0 - -The feature is only available to ``dpdkvhostuserclient`` port types. - -A limitation exists whereby if packets from a vHost port with -``dq-zero-copy=true`` are destined for a ``dpdk`` type port, the number of tx -descriptors (``n_txq_desc``) for that port must be reduced to a smaller number, -128 being the recommended value. This can be achieved by issuing the following -command:: - - $ ovs-vsctl set Interface dpdkport options:n_txq_desc=128 - -Note: The sum of the tx descriptors of all ``dpdk`` ports the VM will send to -should not exceed 128. For example, in case of a bond over two physical ports -in balance-tcp mode, one must divide 128 by the number of links in the bond. - -Refer to :ref:`dpdk-queues-sizes` for more information. - -The reason for this limitation is due to how the zero copy functionality is -implemented. The vHost device's 'tx used vring', a virtio structure used for -tracking used ie. sent descriptors, will only be updated when the NIC frees -the corresponding mbuf. If we don't free the mbufs frequently enough, that -vring will be starved and packets will no longer be processed. One way to -ensure we don't encounter this scenario, is to configure ``n_txq_desc`` to a -small enough number such that the 'mbuf free threshold' for the NIC will be hit -more often and thus free mbufs more frequently. The value of 128 is suggested, -but values of 64 and 256 have been tested and verified to work too, with -differing performance characteristics. A value of 512 can be used too, if the -virtio queue size in the guest is increased to 1024 (available to configure in -QEMU versions v2.10 and greater). This value can be set like so:: - - $ qemu-system-x86_64 ... -chardev socket,id=char1,path=,server - -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce - -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1, - tx_queue_size=1024 - -Because of this limitation, this feature is considered 'experimental'. - -.. note:: - - Post-copy Live Migration is not compatible with dequeue zero copy. - Further information can be found in the `DPDK documentation `__ diff --git a/NEWS b/NEWS index a9c50add2..a858a3b35 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,8 @@ Post-v2.14.0 - OVSDB: * New unixctl command 'ovsdb-server/get-db-storage-status' to show the status of the storage that's backing a database. + - DPDK: + * Removed support for vhost-user dequeue zero-copy. v2.14.0 - 17 Aug 2020 diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index c2ec93c91..0b830be78 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2039,12 +2039,6 @@ netdev_dpdk_vhost_client_set_config(struct netdev *netdev, if (!nullable_string_is_equal(path, dev->vhost_id)) { free(dev->vhost_id); dev->vhost_id = nullable_xstrdup(path); - /* check zero copy configuration */ - if (smap_get_bool(args, "dq-zero-copy", false)) { - dev->vhost_driver_flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - } else { - dev->vhost_driver_flags &= ~RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - } netdev_request_reconfigure(netdev); } } @@ -5035,7 +5029,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) int err; uint64_t vhost_flags = 0; uint64_t vhost_unsup_flags; - bool zc_enabled; ovs_mutex_lock(&dev->mutex); @@ -5061,19 +5054,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) vhost_flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT; } - zc_enabled = dev->vhost_driver_flags - & RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - /* Enable zero copy flag, if requested */ - if (zc_enabled) { - vhost_flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY; - /* DPDK vHost library doesn't allow zero-copy with linear buffers. - * Hence disable Linear buffer. - */ - vhost_flags &= ~RTE_VHOST_USER_LINEARBUF_SUPPORT; - VLOG_WARN("Zero copy enabled, disabling linear buffer" - " check for vHost port %s", dev->up.name); - } - /* Enable External Buffers if TCP Segmentation Offload is enabled. */ if (userspace_tso_enabled()) { vhost_flags |= RTE_VHOST_USER_EXTBUF_SUPPORT; @@ -5090,11 +5070,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev) VLOG_INFO("vHost User device '%s' created in 'client' mode, " "using client socket '%s'", dev->up.name, dev->vhost_id); - if (zc_enabled) { - VLOG_INFO("Zero copy enabled for vHost port %s", dev->up.name); - VLOG_WARN("Zero copy support is deprecated and will be " - "removed in the next OVS release."); - } } err = rte_vhost_driver_callback_register(dev->vhost_id, diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 81c84927f..07da2ee8c 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3236,17 +3236,6 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \

    - -

    - The value specifies whether or not to enable dequeue zero copy on - the given interface. - Must be set before vhost-server-path is specified. - Only supported by dpdkvhostuserclient interfaces. - The feature is considered experimental. -

    -
    -

    -- GitLab From 39fbd2c3f0392811689ec780f09baf90faceb877 Mon Sep 17 00:00:00 2001 From: Tomasz Konieczny Date: Mon, 17 Feb 2020 12:37:36 +0100 Subject: [PATCH 295/432] docs: Add flow control on i40e issue There is an issue with flow control configuration on i40e devices and it has a work around. We add this to documentation as known issue until a permanent solution is developed. Signed-off-by: Tomasz Konieczny Acked-by: Flavio Leitner Signed-off-by: Ian Stokes --- Documentation/intro/install/dpdk.rst | 9 +++++++++ Documentation/topics/dpdk/phy.rst | 2 ++ 2 files changed, 11 insertions(+) diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index 39544f835..fe11571d2 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -717,6 +717,15 @@ Limitations around is temporary and is expected to be removed once a method is provided by DPDK to query the upper bound MTU value for a given device. +- Flow Control: When using i40e devices (Intel(R) 700 Series) it is recommended + to set Link State Change detection to interrupt mode. Otherwise it has been + observed that using the default polling mode, flow control changes may not be + applied, and flow control states will not be reflected correctly. + The issue is under investigation, this is a temporary work around. + + For information about setting Link State Change detection, refer to + :ref:`lsc-detection`. + Reporting Bugs -------------- diff --git a/Documentation/topics/dpdk/phy.rst b/Documentation/topics/dpdk/phy.rst index 38e52c8de..55a98e2b0 100644 --- a/Documentation/topics/dpdk/phy.rst +++ b/Documentation/topics/dpdk/phy.rst @@ -385,6 +385,8 @@ Jumbo Frames DPDK physical ports can be configured to use Jumbo Frames. For more information, refer to :doc:`jumbo-frames`. +.. _lsc-detection: + Link State Change (LSC) detection configuration ----------------------------------------------- -- GitLab From 0f6a89d07f3f2766c1f54b2c6bf9c1d2057cf4de Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Tue, 6 Oct 2020 18:09:20 +0100 Subject: [PATCH 296/432] dpif-netdev: Fix typo in copyright header. Reported-by: David Marchand Fixes: 352b6c7116cd ("dpif-lookup: add avx512 gather implementation.") Fixes: f5ace7cd8a85 ("dpif-netdev: Move dpcls lookup structures to .h") Cc: Harry Van Haaren Signed-off-by: Ian Stokes Acked-by: Harry van Haaren Acked-by: Flavio Leitner --- lib/dpif-netdev-lookup-avx512-gather.c | 2 +- lib/dpif-netdev-private.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev-lookup-avx512-gather.c index 12a01a34a..5e3634249 100644 --- a/lib/dpif-netdev-lookup-avx512-gather.c +++ b/lib/dpif-netdev-lookup-avx512-gather.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Intel Corperation. + * Copyright (c) 2020, Intel Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h index bdc150d45..4fda1220b 100644 --- a/lib/dpif-netdev-private.h +++ b/lib/dpif-netdev-private.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc. - * Copyright (c) 2019 Intel Corperation. + * Copyright (c) 2019 Intel Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. -- GitLab From 61069e7be6df5784089e8a4e65f85b55a6166bb8 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 16 Sep 2020 20:02:46 +0200 Subject: [PATCH 297/432] system-userspace-packet-type-aware.at: Wait for ip address updates. ovs-router module checks for the source ip address of the interface while adding a new route. netdev module doesn't request ip addresses from the system every time, but instead it caches currently assigned ip addresses and updates the cache on netlink notifications if needed. So, there is a slight delay between setting ip address on interface in a system and a moment OVS updates list of ip addresses of this interface. If route addition happens within this time frame, it fails with the following error: # ovs-appctl ovs/route/add 10.0.0.0/24 br-p1 Error while inserting route. ovs-appctl: ovs-vswitchd: server returned an error This makes system tests to fail frequently. Let's wait until local route successfully added. This will mean that OVS finished processing of a netlink event and will use up to date list of ip addresses on desired interface. Fixes: 526cf4e1d6a8 ("tests: Added unit tests in packet-type-aware.at") Signed-off-by: Ilya Maximets Acked-by: Aaron Conole Acked-by: Flavio Leitner --- tests/system-userspace-packet-type-aware.at | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/system-userspace-packet-type-aware.at b/tests/system-userspace-packet-type-aware.at index c2246316d..974304758 100644 --- a/tests/system-userspace-packet-type-aware.at +++ b/tests/system-userspace-packet-type-aware.at @@ -129,6 +129,7 @@ AT_CHECK([ ip addr add 10.0.0.1/24 dev br-p1 ip link set br-p1 up ], [0], [stdout]) +OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep -q br-p1]) AT_CHECK([ ovs-appctl ovs/route/add 10.0.0.0/24 br-p1 @@ -141,6 +142,7 @@ AT_CHECK([ ip addr add 20.0.0.2/24 dev br-p2 ip link set br-p2 up ], [0], [stdout]) +OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep -q br-p2]) AT_CHECK([ ovs-appctl ovs/route/add 20.0.0.0/24 br-p2 @@ -153,6 +155,7 @@ AT_CHECK([ ip addr add 30.0.0.3/24 dev br-p3 ip link set br-p3 up ], [0], [stdout]) +OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep -q br-p3]) AT_CHECK([ ovs-appctl ovs/route/add 30.0.0.0/24 br-p3 -- GitLab From 7b2e999fd7594a9252d38a52cd40f131bb13d950 Mon Sep 17 00:00:00 2001 From: Numan Siddique Date: Fri, 18 Sep 2020 20:45:36 +0530 Subject: [PATCH 298/432] smap: Add smap_get_uint() helper function. This helper function is required by OVN. Suggested-by: Dumitru Ceara Signed-off-by: Numan Siddique Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- lib/smap.c | 16 ++++++++++++++++ lib/smap.h | 2 ++ 2 files changed, 18 insertions(+) diff --git a/lib/smap.c b/lib/smap.c index 149b8b243..e82261497 100644 --- a/lib/smap.c +++ b/lib/smap.c @@ -247,6 +247,22 @@ smap_get_int(const struct smap *smap, const char *key, int def) return i_value; } +/* Gets the value associated with 'key' in 'smap' and converts it to an + * unsigned int. If 'key' is not in 'smap' or a valid unsigned integer + * can't be parsed from it's value, returns 'def'. */ +unsigned int +smap_get_uint(const struct smap *smap, const char *key, unsigned int def) +{ + const char *value = smap_get(smap, key); + unsigned int u_value; + + if (!value || !str_to_uint(value, 10, &u_value)) { + return def; + } + + return u_value; +} + /* Gets the value associated with 'key' in 'smap' and converts it to an * unsigned long long. If 'key' is not in 'smap' or a valid number can't be * parsed from it's value, returns 'def'. */ diff --git a/lib/smap.h b/lib/smap.h index 766c65f7f..a92115966 100644 --- a/lib/smap.h +++ b/lib/smap.h @@ -104,6 +104,8 @@ const char *smap_get_def(const struct smap *, const char *key, struct smap_node *smap_get_node(const struct smap *, const char *); bool smap_get_bool(const struct smap *smap, const char *key, bool def); int smap_get_int(const struct smap *smap, const char *key, int def); +unsigned int smap_get_uint(const struct smap *smap, const char *key, + unsigned int def); unsigned long long int smap_get_ullong(const struct smap *, const char *key, unsigned long long def); bool smap_get_uuid(const struct smap *, const char *key, struct uuid *); -- GitLab From 6edb0dd34082adb02c6f862093343ce6bc7ed5e7 Mon Sep 17 00:00:00 2001 From: Alin Gabriel Serdean Date: Wed, 23 Sep 2020 14:22:47 +0300 Subject: [PATCH 299/432] ovsdb-idl.at: Queue for termination all OVSDB IDL pids. When running OVSDB cluster tests on Windows not all the ovsdb processes are terminated. Queue up the pids of the started processes for termination when the test stops. Signed-off-by: Alin Gabriel Serdean Signed-off-by: Ilya Maximets --- tests/ovsdb-idl.at | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 261f4f323..b46258591 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -43,13 +43,13 @@ m4_define([OVSDB_CLUSTER_START_IDLTEST], AT_CHECK([ovsdb-tool join-cluster s$i.db \ $schema_name unix:s$i.raft unix:s1.raft]) done + on_exit 'kill $(cat s*.pid)' for i in $(seq $n); do AT_CHECK([ovsdb-server -vraft -vconsole:warn --detach --no-chdir \ --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i \ --remote=punix:s$i.ovsdb \ m4_if([$2], [], [], [--remote=$2]) s$i.db]) done - on_exit 'kill $(cat s*.pid)' for i in $(seq $n); do OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status ${schema_name} \ -- GitLab From cade1c4642064f472b9c9796fc4194fbe13a3789 Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Tue, 29 Sep 2020 17:07:31 -0300 Subject: [PATCH 300/432] ofproto-dpif-upcall: Log the value of flow limit. The datapath flow limit is calculated by revalidators so log the value as well. Signed-off-by: Flavio Leitner Acked-by: Eelco Chaudron Signed-off-by: Ilya Maximets --- ofproto/ofproto-dpif-upcall.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 72a5b4d73..195b01c13 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1283,7 +1283,9 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall) atomic_read_relaxed(&udpif->flow_limit, &flow_limit); if (udpif_get_n_flows(udpif) >= flow_limit) { COVERAGE_INC(upcall_flow_limit_hit); - VLOG_WARN_RL(&rl, "upcall: datapath flow limit reached"); + VLOG_WARN_RL(&rl, + "upcall: datapath reached the dynamic limit of %u flows.", + flow_limit); return false; } -- GitLab From b0672f4ba23a0201553d78ada964aec5e9b5b13b Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Wed, 30 Sep 2020 16:23:59 -0300 Subject: [PATCH 301/432] ofproto-dpif-upcall: Log the emergency flow flush. When the number of flows in the datapath reaches twice the maximum, revalidators will delete all flows as an emergency action to recover. In that case, log a message with values and increase a coverage counter. Signed-off-by: Flavio Leitner Acked-by: Eelco Chaudron Signed-off-by: Ilya Maximets --- ofproto/ofproto-dpif-upcall.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 195b01c13..e022fde27 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -57,6 +57,7 @@ COVERAGE_DEFINE(upcall_ukey_contention); COVERAGE_DEFINE(upcall_ukey_replace); COVERAGE_DEFINE(revalidate_missed_dp_flow); COVERAGE_DEFINE(upcall_flow_limit_hit); +COVERAGE_DEFINE(upcall_flow_limit_kill); /* A thread that reads upcalls from dpif, forwards each upcall's packet, * and possibly sets up a kernel flow as a cache. */ @@ -2607,6 +2608,7 @@ revalidate(struct revalidator *revalidator) struct udpif *udpif = revalidator->udpif; struct dpif_flow_dump_thread *dump_thread; uint64_t dump_seq, reval_seq; + bool kill_warn_print = true; unsigned int flow_limit; dump_seq = seq_read(udpif->dump_seq); @@ -2623,6 +2625,7 @@ revalidate(struct revalidator *revalidator) long long int max_idle; long long int now; + size_t kill_all_limit; size_t n_dp_flows; bool kill_them_all; @@ -2650,7 +2653,23 @@ revalidate(struct revalidator *revalidator) COVERAGE_INC(upcall_flow_limit_hit); } - kill_them_all = n_dp_flows > flow_limit * 2; + kill_them_all = false; + kill_all_limit = flow_limit * 2; + if (OVS_UNLIKELY(n_dp_flows > kill_all_limit)) { + static struct vlog_rate_limit rlem = VLOG_RATE_LIMIT_INIT(1, 1); + + kill_them_all = true; + COVERAGE_INC(upcall_flow_limit_kill); + if (kill_warn_print) { + kill_warn_print = false; + VLOG_WARN_RL(&rlem, + "Number of datapath flows (%"PRIuSIZE") twice as high as " + "current dynamic flow limit (%"PRIuSIZE"). " + "Starting to delete flows unconditionally " + "as an emergency measure.", n_dp_flows, kill_all_limit); + } + } + max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle; udpif->dpif->current_ms = time_msec(); -- GitLab From f00c47b8f33d28fbff5347c1c4e318ddcbdaf10a Mon Sep 17 00:00:00 2001 From: Ted Elhourani Date: Tue, 6 Oct 2020 20:25:56 +0000 Subject: [PATCH 302/432] dns-resolve: Allow unbound's config file to be set through an env var. When an unbound context is created, check whether OVS_UNBOUND_CONF has been set. If a valid config file is supplied then use it to configure the context. The procedure returns if the config file is invalid. If no config file is found then the default unbound config is used. Reviewed-by: Yifeng Sun Signed-off-by: Ted Elhourani Signed-off-by: Ilya Maximets --- Documentation/intro/install/general.rst | 4 +++- NEWS | 2 ++ lib/dns-resolve.c | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst index 09f2c13f1..c4300cd53 100644 --- a/Documentation/intro/install/general.rst +++ b/Documentation/intro/install/general.rst @@ -97,7 +97,9 @@ need the following software: specifying OpenFlow and OVSDB remotes. If unbound library is already installed, then Open vSwitch will automatically build with support for it. The environment variable OVS_RESOLV_CONF can be used to specify DNS server - configuration file (the default file on Linux is /etc/resolv.conf). + configuration file (the default file on Linux is /etc/resolv.conf), and + environment variable OVS_UNBOUND_CONF can be used to specify the + configuration file for unbound. On Linux, you may choose to compile the kernel module that comes with the Open vSwitch distribution or to use the kernel module built into the Linux kernel diff --git a/NEWS b/NEWS index a858a3b35..4619e73bf 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,8 @@ Post-v2.14.0 status of the storage that's backing a database. - DPDK: * Removed support for vhost-user dequeue zero-copy. + - The environment variable OVS_UNBOUND_CONF, if set, is now used + as the DNS resolver's (unbound) configuration file. v2.14.0 - 17 Aug 2020 diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c index 1ff58960f..d34451434 100644 --- a/lib/dns-resolve.c +++ b/lib/dns-resolve.c @@ -82,6 +82,18 @@ dns_resolve_init(bool is_daemon) return; } + const char *ub_conf_filename = getenv("OVS_UNBOUND_CONF"); + if (ub_conf_filename != NULL) { + int retval = ub_ctx_config(ub_ctx__, ub_conf_filename); + if (retval != 0) { + VLOG_WARN_RL(&rl, "Failed to set libunbound context config: %s", + ub_strerror(retval)); + ub_ctx_delete(ub_ctx__); + ub_ctx__ = NULL; + return; + } + } + const char *filename = getenv("OVS_RESOLV_CONF"); if (!filename) { #ifdef _WIN32 -- GitLab From 61e157ca69ae226a3b73d32356afa0f2b74c4e4c Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 8 Oct 2020 15:53:43 +0200 Subject: [PATCH 303/432] datapath: Fix exposing OVS_TUNNEL_KEY_ATTR_GTPU_OPTS to kernel module. Kernel module doesn't know about GTPU and it should return correct out-of-range error in case this tunnel attribute passed there for any reason. Current out-of-tree module will pass the range check and will try to access ovs_tunnel_key_lens[] array by index OVS_TUNNEL_KEY_ATTR_GTPU_OPTS. Even though it might not produce issues in current code, this is not a good thing to do since ovs_tunnel_key_lens[] array is not explicitly initialized for OVS_TUNNEL_KEY_ATTR_GTPU_OPTS and we will likely have misleading error about incorrect attribute length in the end. Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.") Acked-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/linux/compat/include/linux/openvswitch.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index cc41bbea4..2d884312f 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -405,7 +405,10 @@ enum ovs_tunnel_key_attr { OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */ OVS_TUNNEL_KEY_ATTR_PAD, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */ +#ifndef __KERNEL__ + /* Only used within userspace data path. */ OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, /* struct gtpu_metadata */ +#endif __OVS_TUNNEL_KEY_ATTR_MAX }; -- GitLab From 3c6b3a519ae6eae3da4cf7c59894b02b95cdade7 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 7 Oct 2020 15:23:49 +0200 Subject: [PATCH 304/432] travis: Disable check for array of flexible structures in sparse. Sparse introduced new checks for flexible arrays and there is a false-positive in netdev-linux implementation right now that can not be easily fixed. Patch sent to sparse to fix it, but we need to disable the check for now to unblock our CI. lib/netdev-linux.c:1238:19: error: array of flexible structures The issue is with the following code: union { struct cmsghdr cmsg; char buffer[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; } cmsg_buffers[NETDEV_MAX_BURST]; 'struct cmsghdr' contains a flexible array. But this union is a way to ensure correct alignment of 'buffer', suggested by CMSG manpage. Signed-off-by: Ilya Maximets --- .travis/linux-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 6981d1d47..6b6935794 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -4,7 +4,7 @@ set -o errexit set -x CFLAGS_FOR_OVS="-g -O2" -SPARSE_FLAGS="" +SPARSE_FLAGS="-Wno-flexible-array-array" EXTRA_OPTS="--enable-Werror" function install_kernel() -- GitLab From bbe2e39287476e7ba3e71d064adb8c0735cf0e95 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 5 Oct 2020 12:09:55 +0200 Subject: [PATCH 305/432] dpctl: Fix broken flow deletion via ovs-dpctl due to missing ufid. Current code generates UFID for flows installed by ovs-dpctl. This leads to inability to remove such flows by the same command. Ex: ovs-dpctl add-dp test ovs-dpctl add-if test vport0 ovs-dpctl add-flow test "in_port(0),eth(),eth_type(0x800),ipv4(src=100.1.0.1)" 0 ovs-dpctl del-flow test "in_port(0),eth(),eth_type(0x800),ipv4(src=100.1.0.1)" dpif|WARN|system@test: failed to flow_del (No such file or directory) ufid:e4457189-3990-4a01-bdcf-1e5f8b208711 in_port(0), eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),eth_type(0x0800), ipv4(src=100.1.0.1,dst=0.0.0.0,proto=0,tos=0,ttl=0,frag=no) ovs-dpctl: deleting flow (No such file or directory) Perhaps you need to specify a UFID? During del-flow operation UFID is generated too, however resulted value is different from one generated during add-flow. This happens because odp_flow_key_hash() function uses random base value for flow hashes which is different on every invocation. That is not an issue while running 'ovs-appctl dpctl/{add,del}-flow' because execution of these requests happens in context of the OVS main process, i.e. there will be same random seed. Commit e61984e781e6 was intended to allow offloading for flows added by dpctl/add-flow unixctl command, so it's better to generate UFIDs conditionally inside dpctl command handler only for appctl invocations. Offloading is not possible from ovs-dpctl utility anyway. There are still couple of corner case: It will not be possible to remove flow by 'ovs-appctl dpctl/del-flow' without specifying UFID if main OVS process was restarted since flow addition and it will not be possible to remove flow by ovs-dpctl without specifying UUID if it was added by 'ovs-appctl dpctl/add-flow'. But these scenarios seems minor since these commands intended for testing only. Reported-by: Eelco Chaudron Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2020-September/374863.html Fixes: e61984e781e6 ("dpif-netlink: Generate ufids for installing TC flowers") Signed-off-by: Ilya Maximets Acked-by: Eelco Chaudron Tested-by: Eelco Chaudron --- lib/dpctl.c | 21 ++++++++++++++++++++- lib/dpif-netlink.c | 45 --------------------------------------------- 2 files changed, 20 insertions(+), 46 deletions(-) diff --git a/lib/dpctl.c b/lib/dpctl.c index 09ae97f25..2f859a753 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -1157,6 +1157,16 @@ dpctl_put_flow(int argc, const char *argv[], enum dpif_flow_put_flags flags, goto out_freeactions; } + if (!ufid_present && dpctl_p->is_appctl) { + /* Generating UFID for this flow so it could be offloaded to HW. We're + * not doing that if invoked from ovs-dpctl utility because + * odp_flow_key_hash() uses randomly generated base for flow hashes + * that will be different for each invocation. And, anyway, offloading + * is only available via appctl. */ + odp_flow_key_hash(key.data, key.size, &ufid); + ufid_present = true; + } + /* The flow will be added on all pmds currently in the datapath. */ error = dpif_flow_put(dpif, flags, key.data, key.size, @@ -1268,6 +1278,7 @@ dpctl_del_flow(int argc, const char *argv[], struct dpctl_params *dpctl_p) struct ofpbuf mask; /* To be ignored. */ struct dpif *dpif; ovs_u128 ufid; + bool ufid_generated; bool ufid_present; struct simap port_names; int n, error; @@ -1303,6 +1314,14 @@ dpctl_del_flow(int argc, const char *argv[], struct dpctl_params *dpctl_p) goto out; } + if (!ufid_present && dpctl_p->is_appctl) { + /* While adding flow via appctl we're generating UFID to make HW + * offloading possible. Generating UFID here to be sure that such + * flows could be removed the same way they were added. */ + odp_flow_key_hash(key.data, key.size, &ufid); + ufid_present = ufid_generated = true; + } + /* The flow will be deleted from all pmds currently in the datapath. */ error = dpif_flow_del(dpif, key.data, key.size, ufid_present ? &ufid : NULL, PMD_ID_NULL, @@ -1310,7 +1329,7 @@ dpctl_del_flow(int argc, const char *argv[], struct dpctl_params *dpctl_p) if (error) { dpctl_error(dpctl_p, error, "deleting flow"); - if (error == ENOENT && !ufid_present) { + if (error == ENOENT && (!ufid_present || ufid_generated)) { struct ds s; ds_init(&s); diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 7da4fb54d..2f881e4fa 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2237,55 +2237,12 @@ dpif_netlink_operate_chunks(struct dpif_netlink *dpif, struct dpif_op **ops, } } -static void -dpif_netlink_try_update_ufid__(struct dpif_op *op, ovs_u128 *ufid) -{ - switch (op->type) { - case DPIF_OP_FLOW_PUT: - if (!op->flow_put.ufid) { - odp_flow_key_hash(op->flow_put.key, op->flow_put.key_len, - ufid); - op->flow_put.ufid = ufid; - } - break; - case DPIF_OP_FLOW_DEL: - if (!op->flow_del.ufid) { - odp_flow_key_hash(op->flow_del.key, op->flow_del.key_len, - ufid); - op->flow_del.ufid = ufid; - } - break; - case DPIF_OP_FLOW_GET: - if (!op->flow_get.ufid) { - odp_flow_key_hash(op->flow_get.key, op->flow_get.key_len, - ufid); - op->flow_get.ufid = ufid; - } - break; - case DPIF_OP_EXECUTE: - default: - break; - } -} - -static void -dpif_netlink_try_update_ufid(struct dpif_op **ops, ovs_u128 *ufid, - size_t n_ops) -{ - int i; - - for (i = 0; i < n_ops; i++) { - dpif_netlink_try_update_ufid__(ops[i], &ufid[i]); - } -} - static void dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops, enum dpif_offload_type offload_type) { struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_op *new_ops[OPERATE_MAX_OPS]; - ovs_u128 ufids[OPERATE_MAX_OPS]; int count = 0; int i = 0; int err = 0; @@ -2295,8 +2252,6 @@ dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops, return; } - dpif_netlink_try_update_ufid(ops, ufids, n_ops); - if (offload_type != DPIF_OFFLOAD_NEVER && netdev_is_flow_api_enabled()) { while (n_ops > 0) { count = 0; -- GitLab From 807152a4ddfb89b65ef75c6b12937ecd68ea8cb3 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 17 Jun 2020 14:55:45 -0700 Subject: [PATCH 306/432] Use primary/secondary, not master/slave, as names for OpenFlow roles. Signed-off-by: Ben Pfaff Acked-by: Alin Gabriel Serdean --- Documentation/topics/design.rst | 110 +++++++++++----------- NEWS | 3 + include/openflow/nicira-ext.h | 22 ++--- include/openflow/openflow-1.2.h | 6 +- include/openflow/openflow-1.4.h | 8 +- include/openvswitch/ofp-connection.h | 6 +- include/openvswitch/ofp-errors.h | 9 +- lib/ofp-connection.c | 94 ++++++++++--------- ofproto/connmgr.c | 72 +++++++------- ofproto/connmgr.h | 4 +- ofproto/ofproto.c | 36 +++---- tests/ofp-print.at | 49 +++++----- tests/ofproto-dpif.at | 12 +-- tests/ofproto.at | 134 +++++++++++++-------------- utilities/ovs-ofctl.8.in | 8 +- vswitchd/bridge.c | 8 +- vswitchd/vswitch.xml | 23 +++-- 17 files changed, 308 insertions(+), 296 deletions(-) diff --git a/Documentation/topics/design.rst b/Documentation/topics/design.rst index 22e966687..656d60673 100644 --- a/Documentation/topics/design.rst +++ b/Documentation/topics/design.rst @@ -70,79 +70,79 @@ that the message is suppressed. .. table:: ``OFPT_PACKET_IN`` / ``NXT_PACKET_IN`` - =========================================== ======= ===== - master/ - message and reason code other slave - =========================================== ======= ===== - ``OFPR_NO_MATCH`` yes --- - ``OFPR_ACTION`` yes --- - ``OFPR_INVALID_TTL`` --- --- - ``OFPR_ACTION_SET`` (OF1.4+) yes --- - ``OFPR_GROUP`` (OF1.4+) yes --- - ``OFPR_PACKET_OUT`` (OF1.4+) yes --- - =========================================== ======= ===== + =========================================== ======== ========= + primary/ + message and reason code other secondary + =========================================== ======== ========= + ``OFPR_NO_MATCH`` yes --- + ``OFPR_ACTION`` yes --- + ``OFPR_INVALID_TTL`` --- --- + ``OFPR_ACTION_SET`` (OF1.4+) yes --- + ``OFPR_GROUP`` (OF1.4+) yes --- + ``OFPR_PACKET_OUT`` (OF1.4+) yes --- + =========================================== ======== ========= .. table:: ``OFPT_FLOW_REMOVED`` / ``NXT_FLOW_REMOVED`` - =========================================== ======= ===== - master/ - message and reason code other slave - =========================================== ======= ===== - ``OFPRR_IDLE_TIMEOUT`` yes --- - ``OFPRR_HARD_TIMEOUT`` yes --- - ``OFPRR_DELETE`` yes --- - ``OFPRR_GROUP_DELETE`` (OF1.3+) yes --- - ``OFPRR_METER_DELETE`` (OF1.4+) yes --- - ``OFPRR_EVICTION`` (OF1.4+) yes --- - =========================================== ======= ===== + =========================================== ======== ========= + primary/ + message and reason code other secondary + =========================================== ======== ========= + ``OFPRR_IDLE_TIMEOUT`` yes --- + ``OFPRR_HARD_TIMEOUT`` yes --- + ``OFPRR_DELETE`` yes --- + ``OFPRR_GROUP_DELETE`` (OF1.3+) yes --- + ``OFPRR_METER_DELETE`` (OF1.4+) yes --- + ``OFPRR_EVICTION`` (OF1.4+) yes --- + =========================================== ======== ========= .. table:: ``OFPT_PORT_STATUS`` - =========================================== ======= ===== - master/ - message and reason code other slave - =========================================== ======= ===== - ``OFPPR_ADD`` yes yes - ``OFPPR_DELETE`` yes yes - ``OFPPR_MODIFY`` yes yes - =========================================== ======= ===== - + =========================================== ======== ========= + primary/ + message and reason code other secondary + =========================================== ======== ========= + ``OFPPR_ADD`` yes --- + ``OFPPR_DELETE`` yes --- + ``OFPPR_MODIFY`` yes --- + =========================================== ======== ========= + .. table:: ``OFPT_ROLE_REQUEST`` / ``OFPT_ROLE_REPLY`` (OF1.4+) - - =========================================== ======= ===== - master/ - message and reason code other slave - =========================================== ======= ===== - ``OFPCRR_MASTER_REQUEST`` --- --- - ``OFPCRR_CONFIG`` --- --- - ``OFPCRR_EXPERIMENTER`` --- --- - =========================================== ======= ===== + + =========================================== ======== ========= + primary/ + message and reason code other secondary + =========================================== ======== ========= + ``OFPCRR_PROMOTE_REQUEST`` --- --- + ``OFPCRR_CONFIG`` --- --- + ``OFPCRR_EXPERIMENTER`` --- --- + =========================================== ======== ========= .. table:: ``OFPT_TABLE_STATUS`` (OF1.4+) - =========================================== ======= ===== - master/ - message and reason code other slave - =========================================== ======= ===== - ``OFPTR_VACANCY_DOWN`` --- --- - ``OFPTR_VACANCY_UP`` --- --- - =========================================== ======= ===== + =========================================== ======== ========= + primary/ + message and reason code other secondary + =========================================== ======== ========= + ``OFPTR_VACANCY_DOWN`` --- --- + ``OFPTR_VACANCY_UP`` --- --- + =========================================== ======== ========= .. table:: ``OFPT_REQUESTFORWARD`` (OF1.4+) - =========================================== ======= ===== - master/ - message and reason code other slave - =========================================== ======= ===== - ``OFPRFR_GROUP_MOD`` --- --- - ``OFPRFR_METER_MOD`` --- --- - =========================================== ======= ===== + =========================================== ======== ========= + primary/ + message and reason code other secondary + =========================================== ======== ========= + ``OFPRFR_GROUP_MOD`` --- --- + ``OFPRFR_METER_MOD`` --- --- + =========================================== ======== ========= The ``NXT_SET_ASYNC_CONFIG`` message directly sets all of the values in this table for the current connection. The ``OFPC_INVALID_TTL_TO_CONTROLLER`` bit in the ``OFPT_SET_CONFIG`` message controls the setting for -``OFPR_INVALID_TTL`` for the "master" role. +``OFPR_INVALID_TTL`` for the "primary" role. ``OFPAT_ENQUEUE`` ----------------- diff --git a/NEWS b/NEWS index 4619e73bf..d9a7078a1 100644 --- a/NEWS +++ b/NEWS @@ -52,6 +52,9 @@ v2.14.0 - 17 Aug 2020 - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. + - Terminology: + * The terms "master" and "slave" have been replaced by "primary" and + "secondary", respectively, for OpenFlow connection roles. v2.13.0 - 14 Feb 2020 diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index dc12101f2..b68804991 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -296,16 +296,16 @@ enum nx_packet_in2_prop_type { * * The other possible roles are a related pair: * - * - Master (NX_ROLE_MASTER) is equivalent to Other, except that there may - * be at most one Master controller at a time: when a controller - * configures itself as Master, any existing Master is demoted to the - * Slave role. + * - Primary (NX_ROLE_PRIMARY) is equivalent to Other, except that there may + * be at most one Primary controller at a time: when a controller + * configures itself as Primary, any existing Primary is demoted to the + * Secondary role. * - * - Slave (NX_ROLE_SLAVE) allows the controller read-only access to + * - Secondary (NX_ROLE_SECONDARY) allows the controller read-only access to * OpenFlow features. In particular attempts to modify the flow table * will be rejected with an OFPBRC_EPERM error. * - * Slave controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED + * Secondary controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED * messages, but they do receive OFPT_PORT_STATUS messages. */ struct nx_role_request { @@ -315,23 +315,23 @@ OFP_ASSERT(sizeof(struct nx_role_request) == 4); enum nx_role { NX_ROLE_OTHER, /* Default role, full access. */ - NX_ROLE_MASTER, /* Full access, at most one. */ - NX_ROLE_SLAVE /* Read-only access. */ + NX_ROLE_PRIMARY, /* Full access, at most one. */ + NX_ROLE_SECONDARY /* Read-only access. */ }; /* NXT_SET_ASYNC_CONFIG. * * Sent by a controller, this message configures the asynchronous messages that * the controller wants to receive. Element 0 in each array specifies messages - * of interest when the controller has an "other" or "master" role; element 1, - * when the controller has a "slave" role. + * of interest when the controller has an "other" or "primary" role; element 1, + * when the controller has a "secondary" role. * * Each array element is a bitmask in which a 0-bit disables receiving a * particular message and a 1-bit enables receiving it. Each bit controls the * message whose 'reason' corresponds to the bit index. For example, the bit * with value 1<<2 == 4 in port_status_mask[1] determines whether the * controller will receive OFPT_PORT_STATUS messages with reason OFPPR_MODIFY - * (value 2) when the controller has a "slave" role. + * (value 2) when the controller has a "secondary" role. * * As a side effect, for service controllers, this message changes the * miss_send_len from default of zero to OFP_DEFAULT_MISS_SEND_LEN (128). diff --git a/include/openflow/openflow-1.2.h b/include/openflow/openflow-1.2.h index 30e220cfb..2952aec14 100644 --- a/include/openflow/openflow-1.2.h +++ b/include/openflow/openflow-1.2.h @@ -176,7 +176,7 @@ enum ofp12_group_capabilities { struct ofp12_role_request { ovs_be32 role; /* One of OFPCR12_ROLE_*. */ uint8_t pad[4]; /* Align to 64 bits. */ - ovs_be64 generation_id; /* Master Election Generation Id */ + ovs_be64 generation_id; /* Primary Election Generation Id */ }; OFP_ASSERT(sizeof(struct ofp12_role_request) == 16); @@ -184,8 +184,8 @@ OFP_ASSERT(sizeof(struct ofp12_role_request) == 16); enum ofp12_controller_role { OFPCR12_ROLE_NOCHANGE, /* Don't change current role. */ OFPCR12_ROLE_EQUAL, /* Default role, full access. */ - OFPCR12_ROLE_MASTER, /* Full access, at most one master. */ - OFPCR12_ROLE_SLAVE, /* Read-only access. */ + OFPCR12_ROLE_PRIMARY, /* Full access, at most one primary. */ + OFPCR12_ROLE_SECONDARY, /* Read-only access. */ }; /* Packet received on port (datapath -> controller). */ diff --git a/include/openflow/openflow-1.4.h b/include/openflow/openflow-1.4.h index 2bfa16b63..be191180b 100644 --- a/include/openflow/openflow-1.4.h +++ b/include/openflow/openflow-1.4.h @@ -274,7 +274,7 @@ struct ofp14_role_status { ovs_be32 role; /* One of OFPCR_ROLE_*. */ uint8_t reason; /* One of OFPCRR_*. */ uint8_t pad[3]; /* Align to 64 bits. */ - ovs_be64 generation_id; /* Master Election Generation Id */ + ovs_be64 generation_id; /* Primary Election Generation Id */ /* Followed by a list of struct ofp14_role_prop_header */ }; @@ -282,9 +282,9 @@ OFP_ASSERT(sizeof(struct ofp14_role_status) == 16); /* What changed about the controller role */ enum ofp14_controller_role_reason { - OFPCRR_MASTER_REQUEST = 0, /* Another controller asked to be master. */ - OFPCRR_CONFIG = 1, /* Configuration changed on the switch. */ - OFPCRR_EXPERIMENTER = 2, /* Experimenter data changed. */ + OFPCRR_PRIMARY_REQUEST = 0, /* Another controller asked to be primary. */ + OFPCRR_CONFIG = 1, /* Configuration changed on the switch. */ + OFPCRR_EXPERIMENTER = 2, /* Experimenter data changed. */ OFPCRR_N_REASONS /* Denotes number of reasons. */ }; diff --git a/include/openvswitch/ofp-connection.h b/include/openvswitch/ofp-connection.h index 5fb143157..1e844e07f 100644 --- a/include/openvswitch/ofp-connection.h +++ b/include/openvswitch/ofp-connection.h @@ -69,10 +69,10 @@ enum ofputil_async_msg_type { const char *ofputil_async_msg_type_to_string(enum ofputil_async_msg_type); struct ofputil_async_cfg { - uint32_t master[OAM_N_TYPES]; - uint32_t slave[OAM_N_TYPES]; + uint32_t primary[OAM_N_TYPES]; + uint32_t secondary[OAM_N_TYPES]; }; -#define OFPUTIL_ASYNC_CFG_INIT (struct ofputil_async_cfg) { .master[0] = 0 } +#define OFPUTIL_ASYNC_CFG_INIT (struct ofputil_async_cfg) { .primary[0] = 0 } enum ofperr ofputil_decode_set_async_config(const struct ofp_header *, bool loose, diff --git a/include/openvswitch/ofp-errors.h b/include/openvswitch/ofp-errors.h index a3f8142df..8c8511d65 100644 --- a/include/openvswitch/ofp-errors.h +++ b/include/openvswitch/ofp-errors.h @@ -115,10 +115,10 @@ enum ofperr { * OFPBIC_BAD_EXP_TYPE. */ /* Expected: 0x0,1,5 in OF1.0 means both OFPBRC_EPERM and - * OFPBRC_IS_SLAVE. */ + * OFPBRC_IS_SECONDARY. */ /* Expected: 0x0,1,5 in OF1.1 means both OFPBRC_EPERM and - * OFPBRC_IS_SLAVE. */ + * OFPBRC_IS_SECONDARY. */ /* ## ------------------ ## */ /* ## OFPET_HELLO_FAILED ## */ @@ -168,8 +168,9 @@ enum ofperr { * code defined the specification. ] */ OFPERR_OFPBRC_BAD_TABLE_ID, - /* OF1.0-1.1(1,5), OF1.2+(1,10). Denied because controller is slave. */ - OFPERR_OFPBRC_IS_SLAVE, + /* OF1.0-1.1(1,5), OF1.2+(1,10). Denied because controller has secondary + * role. (Secondary controllers have only read-only access.) */ + OFPERR_OFPBRC_IS_SECONDARY, /* NX1.0-1.1(1,514), OF1.2+(1,11). Invalid or missing port. [ A * non-standard error (1,514), formerly OFPERR_NXBRC_BAD_IN_PORT is used diff --git a/lib/ofp-connection.c b/lib/ofp-connection.c index 23b80ff39..3a7611b00 100644 --- a/lib/ofp-connection.c +++ b/lib/ofp-connection.c @@ -48,8 +48,8 @@ ofputil_decode_role_message(const struct ofp_header *oh, if (orr->role != htonl(OFPCR12_ROLE_NOCHANGE) && orr->role != htonl(OFPCR12_ROLE_EQUAL) && - orr->role != htonl(OFPCR12_ROLE_MASTER) && - orr->role != htonl(OFPCR12_ROLE_SLAVE)) { + orr->role != htonl(OFPCR12_ROLE_PRIMARY) && + orr->role != htonl(OFPCR12_ROLE_SECONDARY)) { return OFPERR_OFPRRFC_BAD_ROLE; } @@ -68,12 +68,12 @@ ofputil_decode_role_message(const struct ofp_header *oh, const struct nx_role_request *nrr = b.msg; BUILD_ASSERT(NX_ROLE_OTHER + 1 == OFPCR12_ROLE_EQUAL); - BUILD_ASSERT(NX_ROLE_MASTER + 1 == OFPCR12_ROLE_MASTER); - BUILD_ASSERT(NX_ROLE_SLAVE + 1 == OFPCR12_ROLE_SLAVE); + BUILD_ASSERT(NX_ROLE_PRIMARY + 1 == OFPCR12_ROLE_PRIMARY); + BUILD_ASSERT(NX_ROLE_SECONDARY + 1 == OFPCR12_ROLE_SECONDARY); if (nrr->role != htonl(NX_ROLE_OTHER) && - nrr->role != htonl(NX_ROLE_MASTER) && - nrr->role != htonl(NX_ROLE_SLAVE)) { + nrr->role != htonl(NX_ROLE_PRIMARY) && + nrr->role != htonl(NX_ROLE_SECONDARY)) { return OFPERR_OFPRRFC_BAD_ROLE; } @@ -100,11 +100,11 @@ format_role_generic(struct ds *string, enum ofp12_controller_role role, case OFPCR12_ROLE_EQUAL: ds_put_cstr(string, "equal"); /* OF 1.2 wording */ break; - case OFPCR12_ROLE_MASTER: - ds_put_cstr(string, "master"); + case OFPCR12_ROLE_PRIMARY: + ds_put_cstr(string, "primary"); break; - case OFPCR12_ROLE_SLAVE: - ds_put_cstr(string, "slave"); + case OFPCR12_ROLE_SECONDARY: + ds_put_cstr(string, "secondary"); break; default: OVS_NOT_REACHED(); @@ -148,8 +148,8 @@ ofputil_encode_role_reply(const struct ofp_header *request, struct nx_role_request *nrr; BUILD_ASSERT(NX_ROLE_OTHER == OFPCR12_ROLE_EQUAL - 1); - BUILD_ASSERT(NX_ROLE_MASTER == OFPCR12_ROLE_MASTER - 1); - BUILD_ASSERT(NX_ROLE_SLAVE == OFPCR12_ROLE_SLAVE - 1); + BUILD_ASSERT(NX_ROLE_PRIMARY == OFPCR12_ROLE_PRIMARY - 1); + BUILD_ASSERT(NX_ROLE_SECONDARY == OFPCR12_ROLE_SECONDARY - 1); buf = ofpraw_alloc_reply(OFPRAW_NXT_ROLE_REPLY, request, 0); nrr = ofpbuf_put_zeros(buf, sizeof *nrr); @@ -197,8 +197,8 @@ ofputil_decode_role_status(const struct ofp_header *oh, const struct ofp14_role_status *r = b.msg; if (r->role != htonl(OFPCR12_ROLE_NOCHANGE) && r->role != htonl(OFPCR12_ROLE_EQUAL) && - r->role != htonl(OFPCR12_ROLE_MASTER) && - r->role != htonl(OFPCR12_ROLE_SLAVE)) { + r->role != htonl(OFPCR12_ROLE_PRIMARY) && + r->role != htonl(OFPCR12_ROLE_SECONDARY)) { return OFPERR_OFPRRFC_BAD_ROLE; } @@ -218,8 +218,8 @@ ofputil_format_role_status(struct ds *string, ds_put_cstr(string, " reason="); switch (rs->reason) { - case OFPCRR_MASTER_REQUEST: - ds_put_cstr(string, "master_request"); + case OFPCRR_PRIMARY_REQUEST: + ds_put_cstr(string, "primary_request"); break; case OFPCRR_CONFIG: ds_put_cstr(string, "configuration_changed"); @@ -254,13 +254,13 @@ ofputil_async_msg_type_to_string(enum ofputil_async_msg_type type) struct ofp14_async_prop { uint64_t prop_type; enum ofputil_async_msg_type oam; - bool master; + bool primary; uint32_t allowed10, allowed14; }; -#define AP_PAIR(SLAVE_PROP_TYPE, OAM, A10, A14) \ - { SLAVE_PROP_TYPE, OAM, false, A10, (A14) ? (A14) : (A10) }, \ - { (SLAVE_PROP_TYPE + 1), OAM, true, A10, (A14) ? (A14) : (A10) } +#define AP_PAIR(SECONDARY_PROP_TYPE, OAM, A10, A14) \ + { SECONDARY_PROP_TYPE, OAM, false, A10, (A14) ? (A14) : (A10) }, \ + { (SECONDARY_PROP_TYPE + 1), OAM, true, A10, (A14) ? (A14) : (A10) } static const struct ofp14_async_prop async_props[] = { AP_PAIR( 0, OAM_PACKET_IN, OFPR10_BITS, OFPR14_BITS), @@ -288,10 +288,10 @@ get_ofp14_async_config_prop_by_prop_type(uint64_t prop_type) static const struct ofp14_async_prop * get_ofp14_async_config_prop_by_oam(enum ofputil_async_msg_type oam, - bool master) + bool primary) { FOR_EACH_ASYNC_PROP (ap) { - if (ap->oam == oam && ap->master == master) { + if (ap->oam == oam && ap->primary == primary) { return ap; } } @@ -310,7 +310,9 @@ encode_async_mask(const struct ofputil_async_cfg *src, const struct ofp14_async_prop *ap, enum ofp_version version) { - uint32_t mask = ap->master ? src->master[ap->oam] : src->slave[ap->oam]; + uint32_t mask = (ap->primary + ? src->primary[ap->oam] + : src->secondary[ap->oam]); return htonl(mask & ofp14_async_prop_allowed(ap, version)); } @@ -342,7 +344,7 @@ decode_async_mask(ovs_be32 src, } } - uint32_t *array = ap->master ? dst->master : dst->slave; + uint32_t *array = ap->primary ? dst->primary : dst->secondary; array[ap->oam] = mask; return 0; } @@ -362,20 +364,20 @@ parse_async_tlv(const struct ofpbuf *property, } if (ofpprop_is_experimenter(ap->prop_type)) { - /* For experimenter properties, whether a property is for the master or - * slave role is indicated by both 'type' and 'exp_type' in struct + /* For experimenter properties, whether a property is for the primary or + * secondary role is indicated by both 'type' and 'exp_type' in struct * ofp_prop_experimenter. Check that these are consistent. */ const struct ofp_prop_experimenter *ope = property->data; - bool should_be_master = ope->type == htons(0xffff); - if (should_be_master != ap->master) { + bool should_be_primary = ope->type == htons(0xffff); + if (should_be_primary != ap->primary) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); VLOG_WARN_RL(&rl, "async property type %#"PRIx16" " "indicates %s role but exp_type %"PRIu32" indicates " "%s role", ntohs(ope->type), - should_be_master ? "master" : "slave", + should_be_primary ? "primary" : "secondary", ntohl(ope->exp_type), - ap->master ? "master" : "slave"); + ap->primary ? "primary" : "secondary"); return OFPERR_OFPBPC_BAD_EXP_TYPE; } } @@ -390,9 +392,9 @@ decode_legacy_async_masks(const ovs_be32 masks[2], struct ofputil_async_cfg *dst) { for (int i = 0; i < 2; i++) { - bool master = i == 0; + bool primary = i == 0; const struct ofp14_async_prop *ap - = get_ofp14_async_config_prop_by_oam(oam, master); + = get_ofp14_async_config_prop_by_oam(oam, primary); decode_async_mask(masks[i], ap, version, true, dst); } } @@ -479,9 +481,9 @@ encode_legacy_async_masks(const struct ofputil_async_cfg *ac, ovs_be32 masks[2]) { for (int i = 0; i < 2; i++) { - bool master = i == 0; + bool primary = i == 0; const struct ofp14_async_prop *ap - = get_ofp14_async_config_prop_by_oam(oam, master); + = get_ofp14_async_config_prop_by_oam(oam, primary); masks[i] = encode_async_mask(ac, ap, version); } } @@ -507,11 +509,11 @@ ofputil_put_async_config__(const struct ofputil_async_cfg *ac, encode_async_mask(ac, ap, version)); /* For experimenter properties, we need to use type 0xfffe for - * master and 0xffff for slaves. */ + * primary and 0xffff for secondaries. */ if (ofpprop_is_experimenter(ap->prop_type)) { struct ofp_prop_experimenter *ope = ofpbuf_at_assert(buf, ofs, sizeof *ope); - ope->type = ap->master ? htons(0xffff) : htons(0xfffe); + ope->type = ap->primary ? htons(0xffff) : htons(0xfffe); } } } @@ -592,8 +594,8 @@ ofp_role_reason_to_string(enum ofp14_controller_role_reason reason, char *reasonbuf, size_t bufsize) { switch (reason) { - case OFPCRR_MASTER_REQUEST: - return "master_request"; + case OFPCRR_PRIMARY_REQUEST: + return "primary_request"; case OFPCRR_CONFIG: return "configuration_changed"; @@ -664,12 +666,12 @@ ofputil_format_set_async_config(struct ds *string, const struct ofputil_async_cfg *ac) { for (int i = 0; i < 2; i++) { - ds_put_format(string, "\n %s:\n", i == 0 ? "master" : "slave"); + ds_put_format(string, "\n %s:\n", i == 0 ? "primary" : "secondary"); for (uint32_t type = 0; type < OAM_N_TYPES; type++) { ds_put_format(string, "%16s:", ofputil_async_msg_type_to_string(type)); - uint32_t role = i == 0 ? ac->master[type] : ac->slave[type]; + uint32_t role = i == 0 ? ac->primary[type] : ac->secondary[type]; for (int j = 0; j < 32; j++) { if (role & (1u << j)) { char reasonbuf[INT_STRLEN(int) + 1]; @@ -705,17 +707,17 @@ ofputil_async_cfg_default(enum ofp_version version) } struct ofputil_async_cfg oac = { - .master[OAM_PACKET_IN] = pin, - .master[OAM_PORT_STATUS] = OFPPR_BITS, - .slave[OAM_PORT_STATUS] = OFPPR_BITS + .primary[OAM_PACKET_IN] = pin, + .primary[OAM_PORT_STATUS] = OFPPR_BITS, + .secondary[OAM_PORT_STATUS] = OFPPR_BITS }; if (version >= OFP14_VERSION) { - oac.master[OAM_FLOW_REMOVED] = OFPRR14_BITS; + oac.primary[OAM_FLOW_REMOVED] = OFPRR14_BITS; } else if (version == OFP13_VERSION) { - oac.master[OAM_FLOW_REMOVED] = OFPRR13_BITS; + oac.primary[OAM_FLOW_REMOVED] = OFPRR13_BITS; } else { - oac.master[OAM_FLOW_REMOVED] = OFPRR10_BITS; + oac.primary[OAM_FLOW_REMOVED] = OFPRR10_BITS; } return oac; diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c index aee676d93..9c5c633b4 100644 --- a/ofproto/connmgr.c +++ b/ofproto/connmgr.c @@ -212,9 +212,9 @@ struct connmgr { * traversals from other threads can be made safe by holding the * ofproto_mutex.*/ struct ovs_list conns; /* All ofconns. */ - uint64_t master_election_id; /* monotonically increasing sequence number - * for master election */ - bool master_election_id_defined; + uint64_t primary_election_id; /* monotonically increasing sequence number + * for primary election */ + bool primary_election_id_defined; /* OpenFlow connection establishment. */ struct hmap services; /* Contains "struct ofservice"s. */ @@ -253,8 +253,8 @@ connmgr_create(struct ofproto *ofproto, mgr->local_port_name = xstrdup(local_port_name); ovs_list_init(&mgr->conns); - mgr->master_election_id = 0; - mgr->master_election_id_defined = false; + mgr->primary_election_id = 0; + mgr->primary_election_id_defined = false; hmap_init(&mgr->services); mgr->snoops = NULL; @@ -773,11 +773,11 @@ snoop_preference(const struct ofservice *ofservice) } switch (ofconn->role) { - case OFPCR12_ROLE_MASTER: + case OFPCR12_ROLE_PRIMARY: return 3; case OFPCR12_ROLE_EQUAL: return 2; - case OFPCR12_ROLE_SLAVE: + case OFPCR12_ROLE_SECONDARY: return 1; case OFPCR12_ROLE_NOCHANGE: default: @@ -818,33 +818,33 @@ ofconn_get_type(const struct ofconn *ofconn) return ofconn->type; } -/* If a master election id is defined, stores it into '*idp' and returns +/* If a primary election id is defined, stores it into '*idp' and returns * true. Otherwise, stores UINT64_MAX into '*idp' and returns false. */ bool -ofconn_get_master_election_id(const struct ofconn *ofconn, uint64_t *idp) +ofconn_get_primary_election_id(const struct ofconn *ofconn, uint64_t *idp) { - *idp = (ofconn->connmgr->master_election_id_defined - ? ofconn->connmgr->master_election_id + *idp = (ofconn->connmgr->primary_election_id_defined + ? ofconn->connmgr->primary_election_id : UINT64_MAX); - return ofconn->connmgr->master_election_id_defined; + return ofconn->connmgr->primary_election_id_defined; } -/* Sets the master election id. +/* Sets the primary election id. * * Returns true if successful, false if the id is stale */ bool -ofconn_set_master_election_id(struct ofconn *ofconn, uint64_t id) +ofconn_set_primary_election_id(struct ofconn *ofconn, uint64_t id) { - if (ofconn->connmgr->master_election_id_defined + if (ofconn->connmgr->primary_election_id_defined && /* Unsigned difference interpreted as a two's complement signed * value */ - (int64_t)(id - ofconn->connmgr->master_election_id) < 0) { + (int64_t)(id - ofconn->connmgr->primary_election_id) < 0) { return false; } - ofconn->connmgr->master_election_id = id; - ofconn->connmgr->master_election_id_defined = true; + ofconn->connmgr->primary_election_id = id; + ofconn->connmgr->primary_election_id_defined = true; return true; } @@ -864,7 +864,7 @@ ofconn_send_role_status(struct ofconn *ofconn, uint32_t role, uint8_t reason) struct ofputil_role_status status; status.reason = reason; status.role = role; - ofconn_get_master_election_id(ofconn, &status.generation_id); + ofconn_get_primary_election_id(ofconn, &status.generation_id); struct ofpbuf *buf = ofputil_encode_role_status(&status, ofconn_get_protocol(ofconn)); @@ -873,19 +873,19 @@ ofconn_send_role_status(struct ofconn *ofconn, uint32_t role, uint8_t reason) } } -/* Changes 'ofconn''s role to 'role'. If 'role' is OFPCR12_ROLE_MASTER then - * any existing master is demoted to a slave. */ +/* Changes 'ofconn''s role to 'role'. If 'role' is OFPCR12_ROLE_PRIMARY then + * any existing primary is demoted to a secondary. */ void ofconn_set_role(struct ofconn *ofconn, enum ofp12_controller_role role) { - if (role != ofconn->role && role == OFPCR12_ROLE_MASTER) { + if (role != ofconn->role && role == OFPCR12_ROLE_PRIMARY) { struct ofconn *other; LIST_FOR_EACH (other, connmgr_node, &ofconn->connmgr->conns) { - if (other->role == OFPCR12_ROLE_MASTER) { - other->role = OFPCR12_ROLE_SLAVE; - ofconn_send_role_status(other, OFPCR12_ROLE_SLAVE, - OFPCRR_MASTER_REQUEST); + if (other->role == OFPCR12_ROLE_PRIMARY) { + other->role = OFPCR12_ROLE_SECONDARY; + ofconn_send_role_status(other, OFPCR12_ROLE_SECONDARY, + OFPCRR_PRIMARY_REQUEST); } } } @@ -898,9 +898,9 @@ ofconn_set_invalid_ttl_to_controller(struct ofconn *ofconn, bool enable) struct ofputil_async_cfg ac = ofconn_get_async_config(ofconn); uint32_t bit = 1u << OFPR_INVALID_TTL; if (enable) { - ac.master[OAM_PACKET_IN] |= bit; + ac.primary[OAM_PACKET_IN] |= bit; } else { - ac.master[OAM_PACKET_IN] &= ~bit; + ac.primary[OAM_PACKET_IN] &= ~bit; } ofconn_set_async_config(ofconn, &ac); } @@ -910,7 +910,7 @@ ofconn_get_invalid_ttl_to_controller(struct ofconn *ofconn) { struct ofputil_async_cfg ac = ofconn_get_async_config(ofconn); uint32_t bit = 1u << OFPR_INVALID_TTL; - return (ac.master[OAM_PACKET_IN] & bit) != 0; + return (ac.primary[OAM_PACKET_IN] & bit) != 0; } /* Returns the currently configured protocol for 'ofconn', one of OFPUTIL_P_*. @@ -1002,11 +1002,11 @@ ofconn_set_async_config(struct ofconn *ofconn, if (ofputil_protocol_to_ofp_version(ofconn_get_protocol(ofconn)) < OFP14_VERSION) { - if (ofconn->async_cfg->master[OAM_PACKET_IN] & (1u << OFPR_ACTION)) { - ofconn->async_cfg->master[OAM_PACKET_IN] |= OFPR14_ACTION_BITS; + if (ofconn->async_cfg->primary[OAM_PACKET_IN] & (1u << OFPR_ACTION)) { + ofconn->async_cfg->primary[OAM_PACKET_IN] |= OFPR14_ACTION_BITS; } - if (ofconn->async_cfg->slave[OAM_PACKET_IN] & (1u << OFPR_ACTION)) { - ofconn->async_cfg->slave[OAM_PACKET_IN] |= OFPR14_ACTION_BITS; + if (ofconn->async_cfg->secondary[OAM_PACKET_IN] & (1u << OFPR_ACTION)) { + ofconn->async_cfg->secondary[OAM_PACKET_IN] |= OFPR14_ACTION_BITS; } } } @@ -1441,9 +1441,9 @@ ofconn_receives_async_msg(const struct ofconn *ofconn, } struct ofputil_async_cfg ac = ofconn_get_async_config(ofconn); - uint32_t *masks = (ofconn->role == OFPCR12_ROLE_SLAVE - ? ac.slave - : ac.master); + uint32_t *masks = (ofconn->role == OFPCR12_ROLE_SECONDARY + ? ac.secondary + : ac.primary); return (masks[type] & (1u << reason)) != 0; } diff --git a/ofproto/connmgr.h b/ofproto/connmgr.h index 079c8437c..e299386c7 100644 --- a/ofproto/connmgr.h +++ b/ofproto/connmgr.h @@ -84,8 +84,8 @@ void connmgr_get_snoops(const struct connmgr *, struct sset *snoops); /* Individual connections to OpenFlow controllers. */ enum ofconn_type ofconn_get_type(const struct ofconn *); -bool ofconn_get_master_election_id(const struct ofconn *, uint64_t *idp); -bool ofconn_set_master_election_id(struct ofconn *, uint64_t); +bool ofconn_get_primary_election_id(const struct ofconn *, uint64_t *idp); +bool ofconn_set_primary_election_id(struct ofconn *, uint64_t); enum ofp12_controller_role ofconn_get_role(const struct ofconn *); void ofconn_set_role(struct ofconn *, enum ofp12_controller_role); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 59f06aa94..4a78fb575 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -3471,7 +3471,7 @@ handle_set_config(struct ofconn *ofconn, const struct ofp_header *oh) } if (ofconn_get_type(ofconn) != OFCONN_PRIMARY - || ofconn_get_role(ofconn) != OFPCR12_ROLE_SLAVE) { + || ofconn_get_role(ofconn) != OFPCR12_ROLE_SECONDARY) { enum ofputil_frag_handling cur = ofproto->frag_handling; enum ofputil_frag_handling next = config.frag; @@ -3496,16 +3496,16 @@ handle_set_config(struct ofconn *ofconn, const struct ofp_header *oh) return 0; } -/* Checks whether 'ofconn' is a slave controller. If so, returns an OpenFlow - * error message code for the caller to propagate upward. Otherwise, returns - * 0. +/* Checks whether 'ofconn' is a secondary controller. If so, returns an + * OpenFlow error message code for the caller to propagate upward. Otherwise, + * returns 0. * * The log message mentions 'msg_type'. */ static enum ofperr -reject_slave_controller(struct ofconn *ofconn) +reject_secondary_controller(struct ofconn *ofconn) { - if (ofconn_get_role(ofconn) == OFPCR12_ROLE_SLAVE) { - return OFPERR_OFPBRC_IS_SLAVE; + if (ofconn_get_role(ofconn) == OFPCR12_ROLE_SECONDARY) { + return OFPERR_OFPBRC_IS_SECONDARY; } else { return 0; } @@ -3686,7 +3686,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) COVERAGE_INC(ofproto_packet_out); - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -3808,7 +3808,7 @@ handle_port_mod(struct ofconn *ofconn, const struct ofp_header *oh) struct ofport *port; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -6174,7 +6174,7 @@ handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh) struct ofpbuf ofpacts; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -6237,7 +6237,7 @@ handle_role_request(struct ofconn *ofconn, const struct ofp_header *oh) if (request.role != OFPCR12_ROLE_NOCHANGE) { if (request.role != OFPCR12_ROLE_EQUAL && request.have_generation_id - && !ofconn_set_master_election_id(ofconn, request.generation_id)) { + && !ofconn_set_primary_election_id(ofconn, request.generation_id)) { return OFPERR_OFPRRFC_STALE; } @@ -6245,7 +6245,7 @@ handle_role_request(struct ofconn *ofconn, const struct ofp_header *oh) } reply.role = ofconn_get_role(ofconn); - reply.have_generation_id = ofconn_get_master_election_id( + reply.have_generation_id = ofconn_get_primary_election_id( ofconn, &reply.generation_id); buf = ofputil_encode_role_reply(oh, &reply); ofconn_send_reply(ofconn, buf); @@ -6865,7 +6865,7 @@ handle_meter_mod(struct ofconn *ofconn, const struct ofp_header *oh) uint32_t meter_id; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -7801,7 +7801,7 @@ handle_group_mod(struct ofconn *ofconn, const struct ofp_header *oh) struct ofproto_group_mod ogm; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -7922,7 +7922,7 @@ handle_table_mod(struct ofconn *ofconn, const struct ofp_header *oh) struct ofputil_table_mod tm; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -8295,7 +8295,7 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh) struct ofpbuf *buf; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -8349,7 +8349,7 @@ handle_bundle_add(struct ofconn *ofconn, const struct ofp_header *oh) struct ofputil_bundle_add_msg badd; enum ofptype type; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } @@ -8427,7 +8427,7 @@ handle_tlv_table_mod(struct ofconn *ofconn, const struct ofp_header *oh) struct ofputil_tlv_table_mod ttm; enum ofperr error; - error = reject_slave_controller(ofconn); + error = reject_secondary_controller(ofconn); if (error) { return error; } diff --git a/tests/ofp-print.at b/tests/ofp-print.at index dd6410b11..2c7e163bd 100644 --- a/tests/ofp-print.at +++ b/tests/ofp-print.at @@ -2816,7 +2816,8 @@ AT_CLEANUP AT_SETUP([OFPT_SET_ASYNC - OF1.3]) AT_KEYWORDS([ofp-print]) -dnl This message has bit 12 set for the PACKET_IN messages (master and slave). +dnl This message has bit 12 set for the PACKET_IN messages (primary and +dnl secondary). dnl Those aren't supported bits so they get silently ignored on decoding. dnl That seems reasonable because OF1.3 doesn't define any error codes for dnl OFPT_SET_ASYNC. @@ -2825,7 +2826,7 @@ AT_CHECK([ovs-ofctl ofp-print "\ 00 00 00 03 00 00 00 07 00 00 00 00 00 00 00 03 \ "], [0], [dnl OFPT_SET_ASYNC (OF1.3) (xid=0x0): - master: + primary: PACKET_IN: no_match invalid_ttl PORT_STATUS: add delete FLOW_REMOVED: (off) @@ -2833,7 +2834,7 @@ OFPT_SET_ASYNC (OF1.3) (xid=0x0): TABLE_STATUS: (off) REQUESTFORWARD: (off) - slave: + secondary: PACKET_IN: no_match action invalid_ttl PORT_STATUS: add delete modify FLOW_REMOVED: idle hard @@ -2849,7 +2850,7 @@ AT_CHECK([ovs-ofctl ofp-print "\ 03 18 00 18 00 00 00 02 00 00 00 02 00 00 00 00 \ 00 00 00 00 00 00 00 03 \ "], [0], [dnl -OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=master generation_id=3 +OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=primary generation_id=3 ]) AT_CLEANUP @@ -2869,7 +2870,7 @@ AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0a \ 00 00 00 01 \ "], [0], [dnl -NXT_ROLE_REQUEST (xid=0x2): role=master +NXT_ROLE_REQUEST (xid=0x2): role=primary ]) AT_CLEANUP @@ -2879,7 +2880,7 @@ AT_CHECK([ovs-ofctl ofp-print "\ 03 19 00 18 00 00 00 02 00 00 00 03 00 00 00 00 \ 12 34 56 78 ab cd ef 90 \ "], [0], [dnl -OFPT_ROLE_REPLY (OF1.2) (xid=0x2): role=slave generation_id=1311768467750121360 +OFPT_ROLE_REPLY (OF1.2) (xid=0x2): role=secondary generation_id=1311768467750121360 ]) AT_CLEANUP @@ -2889,67 +2890,67 @@ AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0b \ 00 00 00 02 \ "], [0], [dnl -NXT_ROLE_REPLY (xid=0x2): role=slave +NXT_ROLE_REPLY (xid=0x2): role=secondary ]) AT_CLEANUP -AT_SETUP([OFP_ROLE_STATUS - master, experimenter - OF1.3]) +AT_SETUP([OFP_ROLE_STATUS - primary, experimenter - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 04 00 20 00 00 00 0a 4f 4e 46 00 00 00 07 77 \ 00 00 00 02 02 00 00 00 ff ff ff ff ff ff ff ff \ "], [0], [dnl -ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=master reason=experimenter_data_changed +ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=primary reason=experimenter_data_changed ]) AT_CLEANUP -AT_SETUP([OFP_ROLE_STATUS - master, config - OF1.3]) +AT_SETUP([OFP_ROLE_STATUS - primary, config - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 04 00 20 00 00 00 0a 4f 4e 46 00 00 00 07 77 \ 00 00 00 02 01 00 00 00 ff ff ff ff ff ff ff ff \ "], [0], [dnl -ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=master reason=configuration_changed +ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=primary reason=configuration_changed ]) AT_CLEANUP -AT_SETUP([OFP_ROLE_STATUS - master, config,generation - OF1.3]) +AT_SETUP([OFP_ROLE_STATUS - primary, config,generation - OF1.3]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 04 04 00 20 00 00 00 0a 4f 4e 46 00 00 00 07 77 \ 00 00 00 02 01 00 00 00 00 00 00 00 00 00 00 10 \ "], [0], [dnl -ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=master generation_id=16 reason=configuration_changed +ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=primary generation_id=16 reason=configuration_changed ]) AT_CLEANUP -AT_SETUP([OFP_ROLE_STATUS - master, experimenter - OF1.4]) +AT_SETUP([OFP_ROLE_STATUS - primary, experimenter - OF1.4]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 05 1e 00 18 00 00 00 0a \ 00 00 00 02 02 00 00 00 ff ff ff ff ff ff ff ff \ "], [0], [dnl -OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=master reason=experimenter_data_changed +OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=primary reason=experimenter_data_changed ]) AT_CLEANUP -AT_SETUP([OFP_ROLE_STATUS - master, config - OF1.4]) +AT_SETUP([OFP_ROLE_STATUS - primary, config - OF1.4]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 05 1e 00 18 00 00 00 0a \ 00 00 00 02 01 00 00 00 ff ff ff ff ff ff ff ff \ "], [0], [dnl -OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=master reason=configuration_changed +OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=primary reason=configuration_changed ]) AT_CLEANUP -AT_SETUP([OFP_ROLE_STATUS - master, config,generation - OF1.4]) +AT_SETUP([OFP_ROLE_STATUS - primary, config,generation - OF1.4]) AT_KEYWORDS([ofp-print]) AT_CHECK([ovs-ofctl ofp-print "\ 05 1e 00 18 00 00 00 0a \ 00 00 00 02 01 00 00 00 00 00 00 00 00 00 00 10 \ "], [0], [dnl -OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=master generation_id=16 reason=configuration_changed +OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=primary generation_id=16 reason=configuration_changed ]) AT_CLEANUP @@ -3156,7 +3157,7 @@ AT_CLEANUP AT_SETUP([NXT_SET_ASYNC_CONFIG]) AT_KEYWORDS([ofp-print]) -dnl This message has bit 12 set for the PACKET_IN messages (master and slave). +dnl This message has bit 12 set for the PACKET_IN messages (primary and secondary). dnl Those aren't supported bits so they get silently ignored on decoding. AT_CHECK([ovs-ofctl ofp-print "\ 01 04 00 28 00 00 00 00 00 00 23 20 00 00 00 13 \ @@ -3164,7 +3165,7 @@ AT_CHECK([ovs-ofctl ofp-print "\ 00 00 00 00 00 00 00 03 \ "], [0], [dnl NXT_SET_ASYNC_CONFIG (xid=0x0): - master: + primary: PACKET_IN: no_match invalid_ttl PORT_STATUS: add delete FLOW_REMOVED: (off) @@ -3172,7 +3173,7 @@ NXT_SET_ASYNC_CONFIG (xid=0x0): TABLE_STATUS: (off) REQUESTFORWARD: (off) - slave: + secondary: PACKET_IN: no_match action invalid_ttl PORT_STATUS: add delete modify FLOW_REMOVED: idle hard @@ -3191,7 +3192,7 @@ AT_CHECK([ovs-ofctl ofp-print "\ 00 05 00 08 00 00 00 05 \ "], [0], [dnl OFPT_SET_ASYNC (OF1.4) (xid=0x2): - master: + primary: PACKET_IN: action PORT_STATUS: add modify FLOW_REMOVED: idle delete @@ -3199,7 +3200,7 @@ OFPT_SET_ASYNC (OF1.4) (xid=0x2): TABLE_STATUS: (off) REQUESTFORWARD: (off) - slave: + secondary: PACKET_IN: no_match invalid_ttl PORT_STATUS: delete FLOW_REMOVED: delete group_delete meter_delete diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index d63ef237a..d129e60d3 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -3401,13 +3401,13 @@ AT_CHECK([ovs-ofctl --protocols=OpenFlow13 add-flow br0 'priority=0 actions=outp dnl Singleton controller action. AT_CHECK([ovs-ofctl monitor -P standard --protocols=OpenFlow13 br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log]) -# Become slave (OF 1.3), which should disable everything except port status. +# Become secondary (OF 1.3), which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 041800180000000200000003000000000000000000000001 # Ensure that ovs-vswitchd gets a chance to reply before sending another command. ovs-appctl time/warp 500 100 -# Use OF 1.3 OFPT_SET_ASYNC to enable OFPR_NO_MATCH for slave only. +# Use OF 1.3 OFPT_SET_ASYNC to enable OFPR_NO_MATCH for secondary only. ovs-appctl -t ovs-ofctl ofctl/send 041c002000000002000000000000000100000000000000000000000000000000 ovs-appctl time/warp 500 100 @@ -3419,11 +3419,11 @@ OVS_APP_EXIT_AND_WAIT([ovs-ofctl]) AT_CHECK([ovs-appctl revalidator/purge], [0]) AT_CHECK([cat ofctl_monitor.log], [0], [dnl -send: OFPT_ROLE_REQUEST (OF1.3) (xid=0x2): role=slave generation_id=1 -OFPT_ROLE_REPLY (OF1.3) (xid=0x2): role=slave generation_id=1 +send: OFPT_ROLE_REQUEST (OF1.3) (xid=0x2): role=secondary generation_id=1 +OFPT_ROLE_REPLY (OF1.3) (xid=0x2): role=secondary generation_id=1 dnl send: OFPT_SET_ASYNC (OF1.3) (xid=0x2): - master: + primary: PACKET_IN: (off) PORT_STATUS: (off) FLOW_REMOVED: (off) @@ -3431,7 +3431,7 @@ send: OFPT_SET_ASYNC (OF1.3) (xid=0x2): TABLE_STATUS: (off) REQUESTFORWARD: (off) - slave: + secondary: PACKET_IN: no_match PORT_STATUS: (off) FLOW_REMOVED: (off) diff --git a/tests/ofproto.at b/tests/ofproto.at index 76a3be44d..f56673625 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -3157,7 +3157,7 @@ check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE ovs-appctl -t ovs-ofctl ofctl/send 0109000c0123456700040080 check_async 3 OFPR_ACTION OFPR_INVALID_TTL OFPPR_ADD OFPPR_DELETE OFPRR_DELETE -# Become slave, which should disable everything except port status. +# Become secondary, which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 0104001400000002000023200000000a00000002 check_async 4 OFPPR_ADD OFPPR_DELETE @@ -3172,7 +3172,7 @@ check_async 6 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE # Restore controller ID 0. ovs-appctl -t ovs-ofctl ofctl/send 010400180000000300002320000000140000000000000000 -# Become master. +# Become primary. ovs-appctl -t ovs-ofctl ofctl/send 0104001400000002000023200000000a00000001 check_async 7 OFPR_ACTION OFPPR_ADD @@ -3264,7 +3264,7 @@ check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700040080 check_async 3 OFPR_ACTION OFPR_INVALID_TTL OFPPR_ADD OFPPR_DELETE OFPRR_DELETE -# Become slave (OF 1.2), which should disable everything except port status. +# Become secondary (OF 1.2), which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 031800180000000200000003000000000000000000000001 check_async 4 OFPPR_ADD OFPPR_DELETE @@ -3279,7 +3279,7 @@ check_async 6 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE # Restore controller ID 0. ovs-appctl -t ovs-ofctl ofctl/send 030400180000000300002320000000140000000000000000 -# Become master (OF 1.2). +# Become primary (OF 1.2). ovs-appctl -t ovs-ofctl ofctl/send 031800180000000400000002000000000000000000000002 check_async 7 OFPR_ACTION OFPPR_ADD @@ -3383,7 +3383,7 @@ check_async 1 ovs-appctl -t ovs-ofctl ofctl/send 0409000c0123456700000080 check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE -# Become slave (OF 1.3), which should disable everything except port status. +# Become secondary (OF 1.3), which should disable everything except port status. ovs-appctl -t ovs-ofctl ofctl/send 041800180000000200000003000000000000000000000001 check_async 3 OFPPR_ADD OFPPR_DELETE @@ -3398,7 +3398,7 @@ check_async 5 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE # Restore controller ID 0. ovs-appctl -t ovs-ofctl ofctl/send 040400180000000300002320000000140000000000000000 -# Become master (OF 1.3). +# Become primary (OF 1.3). ovs-appctl -t ovs-ofctl ofctl/send 041800180000000400000002000000000000000000000002 check_async 6 OFPR_ACTION OFPPR_ADD @@ -3615,7 +3615,7 @@ check_async 1 ovs_appctl -t ovs-ofctl ofctl/send 0509000c0123456700000080 check_async 2 OFPR_PACKET_OUT OFPR_ACTION_SET OFPPR_ADD OFPPR_MODIFY OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE -# Become slave (OF 1.4), which should disable everything except port status. +# Become secondary (OF 1.4), which should disable everything except port status. ovs_appctl -t ovs-ofctl ofctl/send 051800180000000200000003000000000000000000000001 check_async 3 OFPPR_ADD OFPPR_MODIFY OFPPR_DELETE @@ -3630,7 +3630,7 @@ check_async 5 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE # Restore controller ID 0. ovs_appctl -t ovs-ofctl ofctl/send 050400180000000300002320000000140000000000000000 -# Become master (OF 1.4). +# Become primary (OF 1.4). ovs_appctl -t ovs-ofctl ofctl/send 051800180000000400000002000000000000000000000002 check_async 6 OFPR_PACKET_OUT OFPPR_ADD OFPPR_MODIFY OFPRR_DELETE @@ -3740,27 +3740,27 @@ for i in 1 2; do echo >>expout$i "OFPT_ROLE_REPLY (OF1.2): role=equal" done -# controller 1: Become slave (generation_id is initially undefined, so +# controller 1: Become secondary (generation_id is initially undefined, so # 2^63+2 should not be stale) ovs-appctl -t `pwd`/c1 ofctl/send 031800180000000300000003000000008000000000000002 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=slave generation_id=9223372036854775810" -echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=slave generation_id=9223372036854775810" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=secondary generation_id=9223372036854775810" +echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=secondary generation_id=9223372036854775810" -# controller 2: Become master. +# controller 2: Become primary. ovs-appctl -t `pwd`/c2 ofctl/send 031800180000000300000002000000008000000000000003 -echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=9223372036854775811" -echo >>expout2 "OFPT_ROLE_REPLY (OF1.2): role=master generation_id=9223372036854775811" +echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=9223372036854775811" +echo >>expout2 "OFPT_ROLE_REPLY (OF1.2): role=primary generation_id=9223372036854775811" -# controller 1: Try to become the master using a stale generation ID +# controller 1: Try to become the primary using a stale generation ID ovs-appctl -t `pwd`/c1 ofctl/send 031800180000000400000002000000000000000000000003 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=3" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=3" echo >>expout1 "OFPT_ERROR (OF1.2): OFPRRFC_STALE" -echo >>expout1 "OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=3" +echo >>expout1 "OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=3" -# controller 1: Become master using a valid generation ID +# controller 1: Become primary using a valid generation ID ovs-appctl -t `pwd`/c1 ofctl/send 031800180000000500000002000000000000000000000001 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=1" -echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=master generation_id=1" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=1" +echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=primary generation_id=1" for i in 1 2; do ovs-appctl -t `pwd`/c$i ofctl/barrier @@ -3779,8 +3779,8 @@ AT_CLEANUP dnl This test checks that the role request/response messaging works, dnl that generation_id is handled properly, and that role status update -dnl messages are sent when a controller's role gets changed from master -dnl to slave. +dnl messages are sent when a controller's role gets changed from primary +dnl to secondary. AT_SETUP([ofproto - controller role (OpenFlow 1.4)]) OVS_VSWITCHD_START on_exit 'kill `cat c1.pid c2.pid`' @@ -3805,28 +3805,28 @@ for i in 1 2; do echo >>expout$i "OFPT_ROLE_REPLY (OF1.4): role=equal" done -# controller 1: Become slave (generation_id is initially undefined, so +# controller 1: Become secondary (generation_id is initially undefined, so # 2^63+2 should not be stale) ovs-appctl -t `pwd`/c1 ofctl/send 051800180000000300000003000000008000000000000002 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=slave generation_id=9223372036854775810" -echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=slave generation_id=9223372036854775810" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=secondary generation_id=9223372036854775810" +echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=secondary generation_id=9223372036854775810" -# controller 2: Become master. +# controller 2: Become primary. ovs-appctl -t `pwd`/c2 ofctl/send 051800180000000300000002000000008000000000000003 -echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=9223372036854775811" -echo >>expout2 "OFPT_ROLE_REPLY (OF1.4): role=master generation_id=9223372036854775811" +echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=9223372036854775811" +echo >>expout2 "OFPT_ROLE_REPLY (OF1.4): role=primary generation_id=9223372036854775811" -# controller 1: Try to become the master using a stale generation ID +# controller 1: Try to become the primary using a stale generation ID ovs-appctl -t `pwd`/c1 ofctl/send 051800180000000400000002000000000000000000000003 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=3" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=3" echo >>expout1 "OFPT_ERROR (OF1.4): OFPRRFC_STALE" -echo >>expout1 "OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=3" +echo >>expout1 "OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=3" -# controller 1: Become master using a valid generation ID +# controller 1: Become primary using a valid generation ID ovs-appctl -t `pwd`/c1 ofctl/send 051800180000000500000002000000000000000000000001 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=1" -echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=master generation_id=1" -echo >>expout2 "OFPT_ROLE_STATUS (OF1.4): role=slave generation_id=1 reason=master_request" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=1" +echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=primary generation_id=1" +echo >>expout2 "OFPT_ROLE_STATUS (OF1.4): role=secondary generation_id=1 reason=primary_request" for i in 1 2; do ovs-appctl -t `pwd`/c$i ofctl/barrier @@ -3845,8 +3845,8 @@ AT_CLEANUP dnl This test checks that the role request/response messaging works, dnl that generation_id is handled properly, and that role status update -dnl messages are sent when a controller's role gets changed from master -dnl to slave. +dnl messages are sent when a controller's role gets changed from primary +dnl to secondary. AT_SETUP([ofproto - controller role (OpenFlow 1.3)]) OVS_VSWITCHD_START on_exit 'kill `cat c1.pid c2.pid`' @@ -3871,28 +3871,28 @@ for i in 1 2; do echo >>expout$i "OFPT_ROLE_REPLY (OF1.3): role=equal" done -# controller 1: Become slave (generation_id is initially undefined, so +# controller 1: Become secondary (generation_id is initially undefined, so # 2^63+2 should not be stale) ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000300000003000000008000000000000002 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=slave generation_id=9223372036854775810" -echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=slave generation_id=9223372036854775810" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=secondary generation_id=9223372036854775810" +echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=secondary generation_id=9223372036854775810" -# controller 2: Become master. +# controller 2: Become primary. ovs-appctl -t `pwd`/c2 ofctl/send 041800180000000300000002000000008000000000000003 -echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=9223372036854775811" -echo >>expout2 "OFPT_ROLE_REPLY (OF1.3): role=master generation_id=9223372036854775811" +echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=9223372036854775811" +echo >>expout2 "OFPT_ROLE_REPLY (OF1.3): role=primary generation_id=9223372036854775811" -# controller 1: Try to become the master using a stale generation ID +# controller 1: Try to become the primary using a stale generation ID ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000400000002000000000000000000000003 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=3" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=3" echo >>expout1 "OFPT_ERROR (OF1.3): OFPRRFC_STALE" -echo >>expout1 "OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=3" +echo >>expout1 "OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=3" -# controller 1: Become master using a valid generation ID +# controller 1: Become primary using a valid generation ID ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000500000002000000000000000000000001 -echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=1" -echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=master generation_id=1" -echo >>expout2 "ONFT_ROLE_STATUS (OF1.3): role=slave generation_id=1 reason=master_request" +echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=1" +echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=primary generation_id=1" +echo >>expout2 "ONFT_ROLE_STATUS (OF1.3): role=secondary generation_id=1 reason=primary_request" for i in 1 2; do ovs-appctl -t `pwd`/c$i ofctl/barrier @@ -3978,13 +3978,13 @@ s/ (xid=0x[0-9a-fA-F]*)//'< monitor$i.log]], done } -# controller 1: Become slave +# controller 1: Become secondary ovs-appctl -t `pwd`/c1 ofctl/send 061800180000000300000003000000008000000000000002 -# controller 2: Become master +# controller 2: Become primary ovs-appctl -t `pwd`/c2 ofctl/send 051800180000000300000002000000008000000000000003 -# controller 1: Become slave +# controller 1: Become secondary ovs-appctl -t `pwd`/c3 ofctl/send 051800180000000300000003000000008000000000000004 # controller 1: Enabled requestforward using set Asynchronous message @@ -4074,16 +4074,16 @@ s/ (xid=0x[0-9a-fA-F]*)//'< monitor$i.log]], done } -# controller 1: Become slave -# NXT_ROLE_REQUEST (xid=0x3): role=slave +# controller 1: Become secondary +# NXT_ROLE_REQUEST (xid=0x3): role=secondary ovs-appctl -t `pwd`/c1 ofctl/send 0104001400000003000023200000000a00000002 -# controller 2: Become master -# NXT_ROLE_REQUEST (xid=0x3): role=master +# controller 2: Become primary +# NXT_ROLE_REQUEST (xid=0x3): role=primary ovs-appctl -t `pwd`/c2 ofctl/send 0104001400000003000023200000000a00000001 -# controller 1: Become slave -# NXT_ROLE_REQUEST (xid=0x3): role=slave +# controller 1: Become secondary +# NXT_ROLE_REQUEST (xid=0x3): role=secondary ovs-appctl -t `pwd`/c3 ofctl/send 0104001400000003000023200000000a00000002 # controller 1: Enabled requestforward using OFPRAW_NXT_SET_ASYNC_CONFIG2 @@ -4172,16 +4172,16 @@ s/ (xid=0x[0-9a-fA-F]*)//'< monitor$i.log]], done } -# controller 1: Become slave -# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=slave +# controller 1: Become secondary +# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=secondary ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000300000003000000008000000000000002 -# controller 2: Become master -# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=master +# controller 2: Become primary +# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=primary ovs-appctl -t `pwd`/c2 ofctl/send 041800180000000300000002000000008000000000000003 -# controller 1: Become slave -# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=slave +# controller 1: Become secondary +# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=secondary ovs-appctl -t `pwd`/c3 ofctl/send 041800180000000300000003000000008000000000000004 # controller 1: Enabled requestforward using OFPRAW_NXT_SET_ASYNC_CONFIG2 (necessary for OF1.3) @@ -4951,7 +4951,7 @@ EOF AT_CHECK([ofctl_strip < monitor.log], [], [dnl send: OFPT_GET_ASYNC_REQUEST (OF1.3): OFPT_GET_ASYNC_REPLY (OF1.3): - master: + primary: PACKET_IN: no_match action PORT_STATUS: add delete modify FLOW_REMOVED: idle hard delete group_delete @@ -4959,7 +4959,7 @@ OFPT_GET_ASYNC_REPLY (OF1.3): TABLE_STATUS: (off) REQUESTFORWARD: (off) - slave: + secondary: PACKET_IN: (off) PORT_STATUS: add delete modify FLOW_REMOVED: (off) diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index cb5c6120c..2017c6eba 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -562,12 +562,12 @@ between a switch and its controller. .IP When a switch has more than one controller configured, only the traffic to and from a single controller is output. If none of the -controllers is configured as a master or a slave (using a Nicira +controllers is configured as a primary or a secondary (using a Nicira extension to OpenFlow 1.0 or 1.1, or a standard request in OpenFlow 1.2 or later), then a controller is chosen arbitrarily among -them. If there is a master controller, it is chosen; otherwise, if -there are any controllers that are not masters or slaves, one is -chosen arbitrarily; otherwise, a slave controller is chosen +them. If there is a primary controller, it is chosen; otherwise, if +there are any controllers that are not primaries or secondaries, one is +chosen arbitrarily; otherwise, a secondary controller is chosen arbitrarily. This choice is made once at connection time and does not change as controllers reconfigure their roles. .IP diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index a3e7facd3..41989cf6c 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -3017,10 +3017,10 @@ ofp12_controller_role_to_str(enum ofp12_controller_role role) switch (role) { case OFPCR12_ROLE_EQUAL: return "other"; - case OFPCR12_ROLE_MASTER: - return "master"; - case OFPCR12_ROLE_SLAVE: - return "slave"; + case OFPCR12_ROLE_PRIMARY: + return "primary"; + case OFPCR12_ROLE_SECONDARY: + return "secondary"; case OFPCR12_ROLE_NOCHANGE: default: return NULL; diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 07da2ee8c..4958c7c95 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -5264,16 +5264,21 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \

    other
    Allows the controller access to all OpenFlow features.
    master
    -
    Equivalent to other, except that there may be at - most one master controller at a time. When a controller configures - itself as master, any existing master is demoted to - the slave role.
    +
    + Equivalent to other, except that there may be at + most one such controller at a time. If a given controller + promotes itself to this role, ovs-vswitchd + demotes any existing controller with the role to slave. +
    +
    slave
    -
    Allows the controller read-only access to OpenFlow features. - Attempts to modify the flow table will be rejected with an - error. Slave controllers do not receive OFPT_PACKET_IN or - OFPT_FLOW_REMOVED messages, but they do receive OFPT_PORT_STATUS - messages.
    +
    + Allows the controller read-only access to OpenFlow features. + Attempts to modify the flow table will be rejected with an + error. Such controllers do not receive OFPT_PACKET_IN or + OFPT_FLOW_REMOVED messages, but they do receive OFPT_PORT_STATUS + messages. +
    -- GitLab From 8205fbc8f5e0ae5c85b9d1be2f5f53997ea4ff31 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 17 Jun 2020 14:22:47 -0700 Subject: [PATCH 307/432] Eliminate "whitelist" and "blacklist" terms. There is one remaining use under datapath. That change should happen upstream in Linux first according to our usual policy. Signed-off-by: Ben Pfaff Acked-by: Alin Gabriel Serdean --- Documentation/howto/ipsec.rst | 2 +- Documentation/howto/selinux.rst | 8 +-- Documentation/topics/datapath.rst | 2 +- Documentation/topics/ovsdb-replication.rst | 9 ++- Makefile.am | 10 +-- build-aux/automake.mk | 2 +- ...ab-whitelist => initial-tab-allowed-files} | 0 ...fety-blacklist => thread-safety-forbidden} | 0 datapath/Makefile.am | 2 +- datapath/linux/Modules.mk | 2 +- ...heck-whitelist => export-check-allow-list} | 0 include/openvswitch/automake.mk | 2 +- lib/daemon.man | 2 +- lib/daemon.xml | 2 +- lib/dpif.c | 14 ++-- lib/dpif.h | 2 +- lib/ovsdb-idl.h | 4 +- ovsdb/ovsdb-server.c | 8 +-- ovsdb/replication.c | 72 +++++++++---------- ovsdb/replication.h | 10 +-- tests/ofproto-macros.at | 4 +- tests/system-kmod-macros.at | 6 +- tests/system-userspace-macros.at | 6 +- tests/test-classifier.c | 3 +- utilities/checkpatch.py | 8 +-- vswitchd/bridge.c | 27 ++++--- vswitchd/ovs-vswitchd.c | 2 +- vswitchd/vswitch.xml | 2 +- 28 files changed, 105 insertions(+), 106 deletions(-) rename build-aux/{initial-tab-whitelist => initial-tab-allowed-files} (100%) rename build-aux/{thread-safety-blacklist => thread-safety-forbidden} (100%) rename datapath/linux/compat/build-aux/{export-check-whitelist => export-check-allow-list} (100%) diff --git a/Documentation/howto/ipsec.rst b/Documentation/howto/ipsec.rst index 17153ac2b..cd9348420 100644 --- a/Documentation/howto/ipsec.rst +++ b/Documentation/howto/ipsec.rst @@ -162,7 +162,7 @@ undesirable situation. `ipsec_skb_mark`. By setting the ipsec_skb_mark as 0/1, OVS IPsec prevents all unencrypted tunnel packets leaving the host since the default skb_mark value for tunnel packets are 0. This affects all OVS tunnels including those - without IPsec being set up. You can install OpenFlow rules to whitelist + without IPsec being set up. You can install OpenFlow rules to enable those non-IPsec tunnels by setting the skb_mark of the tunnel traffic as non-zero value. diff --git a/Documentation/howto/selinux.rst b/Documentation/howto/selinux.rst index 55c3e39ce..f657d5e51 100644 --- a/Documentation/howto/selinux.rst +++ b/Documentation/howto/selinux.rst @@ -67,8 +67,8 @@ differently than SELinux. SELinux and Open vSwitch are moving targets. What this means is that, if you solely rely on your Linux distribution's SELinux policy, then this policy might not have correctly anticipated that a newer Open vSwitch version needs extra -white list rules. However, if you solely rely on SELinux policy that ships -with Open vSwitch, then Open vSwitch developers might not have correctly +rules to allow behavior. However, if you solely rely on SELinux policy that +ships with Open vSwitch, then Open vSwitch developers might not have correctly anticipated the feature set that your SELinux implementation supports. Installation @@ -136,8 +136,8 @@ Then verify that this module was indeed loaded:: openvswitch 1.1.1 If you still see Permission denied errors, then take a look into -``selinux/openvswitch.te.in`` file in the OVS source tree and try to add white -list rules. This is really simple, just run SELinux audit2allow tool:: +``selinux/openvswitch.te.in`` file in the OVS source tree and try to add allow +rules. This is really simple, just run SELinux audit2allow tool:: $ grep "openvswitch_t" /var/log/audit/audit.log | audit2allow -M ovslocal diff --git a/Documentation/topics/datapath.rst b/Documentation/topics/datapath.rst index 8585c79eb..e6dcfbc19 100644 --- a/Documentation/topics/datapath.rst +++ b/Documentation/topics/datapath.rst @@ -261,5 +261,5 @@ Implement the headers and codes for compatibility with older kernel in function should be prefixed with ``rpl_``. Otherwise, the function should be prefixed with ``ovs_``. For special case when it is not possible to follow this rule (e.g., the ``pskb_expand_head()`` function), the function name must -be added to ``linux/compat/build-aux/export-check-whitelist``, otherwise, the +be added to ``linux/compat/build-aux/export-check-allowlist``, otherwise, the compilation check ``check-export-symbol`` will fail. diff --git a/Documentation/topics/ovsdb-replication.rst b/Documentation/topics/ovsdb-replication.rst index 950dfc9b7..e762f0730 100644 --- a/Documentation/topics/ovsdb-replication.rst +++ b/Documentation/topics/ovsdb-replication.rst @@ -91,7 +91,7 @@ When sending a monitor request the standby server is doing the following: 4. For each database with the same schema in both the active and standby servers: construct and send a monitor request message specifying the tables that will be monitored (i.e all the tables on the database except the ones - blacklisted [*]). + explicitly excluded [*]). 5. Set the standby database to the current state of the active database. @@ -100,10 +100,9 @@ receive notifications of changes occurring to the tables specified in the request. The process of handling this notifications is detailed in the next section. -[*] A set of tables that will be excluded from replication can be configure as -a blacklist of tables via the command line option -``--sync-exclude-tables=db:table[,db:table]...``, where db corresponds to the -database where the table resides. +[*] A set of tables that will be excluded from replication can be configured +via the command line option ``--sync-exclude-tables=db:table[,db:table]...``, +where db corresponds to the database where the table resides. Replication Process ------------------- diff --git a/Makefile.am b/Makefile.am index 6981b943e..a3fbb15e2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -276,7 +276,7 @@ static-check: fi .PHONY: static-check -# Check that assert.h is not used outside a whitelist of files. +# Check that assert.h is not used (outside a small set of files). ALL_LOCAL += check-assert-h-usage check-assert-h-usage: @if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \ @@ -323,7 +323,7 @@ check-tabs: if test -e .git && (git --version) >/dev/null 2>&1 && \ grep -ln "^ " \ `git ls-files \ - | grep -v -f build-aux/initial-tab-whitelist` /dev/null \ + | grep -v -f build-aux/initial-tab-allowed-files` /dev/null \ | $(EGREP) -v ':[ ]*/?\*'; \ then \ echo "See above for files that use tabs for indentation."; \ @@ -336,16 +336,16 @@ ALL_LOCAL += thread-safety-check thread-safety-check: @cd $(srcdir); \ if test -e .git && (git --version) >/dev/null 2>&1 && \ - grep -n -f build-aux/thread-safety-blacklist \ + grep -n -f build-aux/thread-safety-forbidden \ `git ls-files | grep '\.[ch]$$' \ | $(EGREP) -v '^datapath|^lib/sflow|^third-party'` /dev/null \ | $(EGREP) -v ':[ ]*/?\*'; \ then \ echo "See above for list of calls to functions that are"; \ - echo "blacklisted due to thread safety issues"; \ + echo "forbidden due to thread safety issues"; \ exit 1; \ fi -EXTRA_DIST += build-aux/thread-safety-blacklist +EXTRA_DIST += build-aux/thread-safety-forbidden .PHONY: thread-safety-check # Check that "ip" is used in preference to "ifconfig", because diff --git a/build-aux/automake.mk b/build-aux/automake.mk index 9007ecda9..6267ccd7c 100644 --- a/build-aux/automake.mk +++ b/build-aux/automake.mk @@ -5,7 +5,7 @@ EXTRA_DIST += \ build-aux/dist-docs \ build-aux/dpdkstrip.py \ build-aux/generate-dhparams-c \ - build-aux/initial-tab-whitelist \ + build-aux/initial-tab-allowed-files \ build-aux/sodepends.py \ build-aux/soexpand.py \ build-aux/text2c \ diff --git a/build-aux/initial-tab-whitelist b/build-aux/initial-tab-allowed-files similarity index 100% rename from build-aux/initial-tab-whitelist rename to build-aux/initial-tab-allowed-files diff --git a/build-aux/thread-safety-blacklist b/build-aux/thread-safety-forbidden similarity index 100% rename from build-aux/thread-safety-blacklist rename to build-aux/thread-safety-forbidden diff --git a/datapath/Makefile.am b/datapath/Makefile.am index f2a85bc3b..e4dd0c704 100644 --- a/datapath/Makefile.am +++ b/datapath/Makefile.am @@ -42,7 +42,7 @@ COMPAT_EXPORTS := $(shell $(COMPAT_GET_EXPORTS)) # Checks that all EXPORT_SYMBOL_GPL() export 'rpl_' or 'ovs_' prefixed functions. check-export-symbol: @for fun_ in $(COMPAT_FUNCTIONS); do \ - if ! grep -- $${fun_} $(top_srcdir)/datapath/linux/compat/build-aux/export-check-whitelist > /dev/null; then \ + if ! grep -- $${fun_} $(top_srcdir)/datapath/linux/compat/build-aux/export-check-allow-list > /dev/null; then \ if ! echo $${fun_} | grep -q -E '^(rpl|ovs)_'; then \ echo "error: $${fun_}() needs to be prefixed with 'rpl_' or 'ovs_'."; \ exit 1; \ diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk index 63a5cbae4..372243988 100644 --- a/datapath/linux/Modules.mk +++ b/datapath/linux/Modules.mk @@ -120,4 +120,4 @@ openvswitch_headers += \ linux/compat/include/linux/netfilter.h \ linux/compat/include/linux/overflow.h \ linux/compat/include/linux/rbtree.h -EXTRA_DIST += linux/compat/build-aux/export-check-whitelist +EXTRA_DIST += linux/compat/build-aux/export-check-allow-list diff --git a/datapath/linux/compat/build-aux/export-check-whitelist b/datapath/linux/compat/build-aux/export-check-allow-list similarity index 100% rename from datapath/linux/compat/build-aux/export-check-whitelist rename to datapath/linux/compat/build-aux/export-check-allow-list diff --git a/include/openvswitch/automake.mk b/include/openvswitch/automake.mk index 73c346175..1fa6d88fa 100644 --- a/include/openvswitch/automake.mk +++ b/include/openvswitch/automake.mk @@ -72,7 +72,7 @@ endif # header file has the proper extern declaration for use with C++. # # Some header files don't declare any external functions, so they -# don't really need extern "C". We only white list a couple of these +# don't really need extern "C". We only permit a couple of these # below, which are the ones that seem unlikely to ever declare # external functions. For the rest, we add extern "C" anyway; it # doesn't hurt. diff --git a/lib/daemon.man b/lib/daemon.man index 68c0a312d..a92f8c4d5 100644 --- a/lib/daemon.man +++ b/lib/daemon.man @@ -58,7 +58,7 @@ This option has no effect when \fB\-\-detach\fR is not specified. .TP \fB\-\-no\-self\-confinement\fR By default daemon will try to self-confine itself to work with -files under well-know, at build-time whitelisted directories. It +files under well-known directories determined during build. It is better to stick with this default behavior and not to use this flag unless some other Access Control is used to confine daemon. Note that in contrast to other access control implementations that diff --git a/lib/daemon.xml b/lib/daemon.xml index 1b5e8acae..5a421ccab 100644 --- a/lib/daemon.xml +++ b/lib/daemon.xml @@ -82,7 +82,7 @@
    --no-self-confinement
    By default this daemon will try to self-confine itself to work with files - under well-known directories whitelisted at build time. It is better to + under well-known directories determined at build time. It is better to stick with this default behavior and not to use this flag unless some other Access Control is used to confine daemon. Note that in contrast to other access control implementations that are typically enforced from diff --git a/lib/dpif.c b/lib/dpif.c index 7cac3a629..53d65cf7c 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -79,9 +79,9 @@ struct registered_dpif_class { int refcount; }; static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes); -static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist); +static struct sset dpif_disallowed = SSET_INITIALIZER(&dpif_disallowed); -/* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */ +/* Protects 'dpif_classes', including the refcount, and 'dpif_disallowed'. */ static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER; /* Rate limit for individual messages going to or from the datapath, output at @@ -134,8 +134,8 @@ dp_register_provider__(const struct dpif_class *new_class) struct registered_dpif_class *registered_class; int error; - if (sset_contains(&dpif_blacklist, new_class->type)) { - VLOG_DBG("attempted to register blacklisted provider: %s", + if (sset_contains(&dpif_disallowed, new_class->type)) { + VLOG_DBG("attempted to register disallowed provider: %s", new_class->type); return EINVAL; } @@ -219,13 +219,13 @@ dp_unregister_provider(const char *type) return error; } -/* Blacklists a provider. Causes future calls of dp_register_provider() with +/* Disallows a provider. Causes future calls of dp_register_provider() with * a dpif_class which implements 'type' to fail. */ void -dp_blacklist_provider(const char *type) +dp_disallow_provider(const char *type) { ovs_mutex_lock(&dpif_mutex); - sset_add(&dpif_blacklist, type); + sset_add(&dpif_disallowed, type); ovs_mutex_unlock(&dpif_mutex); } diff --git a/lib/dpif.h b/lib/dpif.h index 2d52f0186..f8bba23fe 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -400,7 +400,7 @@ struct sset; int dp_register_provider(const struct dpif_class *); int dp_unregister_provider(const char *type); -void dp_blacklist_provider(const char *type); +void dp_disallow_provider(const char *type); void dp_enumerate_types(struct sset *types); const char *dpif_normalize_type(const char *); diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index c56cd19b1..a1a577664 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -100,12 +100,12 @@ const struct ovsdb_idl_table_class *ovsdb_idl_table_class_from_column( * The client may choose any subset of the columns and tables to replicate, * specifying it one of two ways: * - * - As a blacklist (adding the columns or tables to replicate). To do so, + * - As a deny list (adding the columns or tables to replicate). To do so, * the client passes false as 'monitor_everything_by_default' to * ovsdb_idl_create() and then calls ovsdb_idl_add_column() and * ovsdb_idl_add_table() for the desired columns and, if necessary, tables. * - * - As a whitelist (replicating all columns and tables except those + * - As an allow list (replicating all columns and tables except those * explicitly removed). To do so, the client passes true as * 'monitor_everything_by_default' to ovsdb_idl_create() and then calls * ovsdb_idl_omit() to remove columns. diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index d772edbe0..73a155b3f 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -1391,7 +1391,7 @@ ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn, { struct server_config *config = config_; - char *err = set_blacklist_tables(argv[1], true); + char *err = set_excluded_tables(argv[1], true); if (!err) { free(*config->sync_exclude); *config->sync_exclude = xstrdup(argv[1]); @@ -1403,7 +1403,7 @@ ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn, config->all_dbs, server_uuid, *config->replication_probe_interval); } - err = set_blacklist_tables(argv[1], false); + err = set_excluded_tables(argv[1], false); } unixctl_command_reply(conn, err); free(err); @@ -1415,7 +1415,7 @@ ovsdb_server_get_sync_exclude_tables(struct unixctl_conn *conn, const char *argv[] OVS_UNUSED, void *arg_ OVS_UNUSED) { - char *reply = get_blacklist_tables(); + char *reply = get_excluded_tables(); unixctl_command_reply(conn, reply); free(reply); } @@ -1853,7 +1853,7 @@ parse_options(int argc, char *argv[], break; case OPT_SYNC_EXCLUDE: { - char *err = set_blacklist_tables(optarg, false); + char *err = set_excluded_tables(optarg, false); if (err) { ovs_fatal(0, "%s", err); } diff --git a/ovsdb/replication.c b/ovsdb/replication.c index cbbce64df..bb1bd4250 100644 --- a/ovsdb/replication.c +++ b/ovsdb/replication.c @@ -68,11 +68,11 @@ static struct ovsdb_error *execute_update(struct ovsdb_txn *txn, struct json *new); /* Maps from db name to sset of table names. */ -static struct shash blacklist_tables = SHASH_INITIALIZER(&blacklist_tables); +static struct shash excluded_tables = SHASH_INITIALIZER(&excluded_tables); -static void blacklist_tables_clear(void); -static void blacklist_tables_add(const char *database, const char *table); -static bool blacklist_tables_find(const char *database, const char* table); +static void excluded_tables_clear(void); +static void excluded_tables_add(const char *database, const char *table); +static bool excluded_tables_find(const char *database, const char *table); /* Keep track of request IDs of all outstanding OVSDB requests. */ @@ -131,7 +131,7 @@ replication_init(const char *sync_from_, const char *exclude_tables, sync_from = xstrdup(sync_from_); /* Caller should have verified that the 'exclude_tables' is * parseable. An error here is unexpected. */ - ovs_assert(!set_blacklist_tables(exclude_tables, false)); + ovs_assert(!set_excluded_tables(exclude_tables, false)); replication_dbs_destroy(); @@ -407,38 +407,38 @@ replication_wait(void) } } -/* Parse 'blacklist' to rebuild 'blacklist_tables'. If 'dryrun' is false, the - * current black list tables will be wiped out, regardless of whether - * 'blacklist' can be parsed. If 'dryrun' is true, only parses 'blacklist' and - * reports any errors, without modifying the blacklist. +/* Parse 'excluded' to rebuild 'excluded_tables'. If 'dryrun' is false, the + * current set of excluded tables will be wiped out, regardless of whether + * 'excluded' can be parsed. If 'dryrun' is true, only parses 'excluded' and + * reports any errors, without modifying the list of exclusions. * * On error, returns the error string, which the caller is * responsible for freeing. Returns NULL otherwise. */ char * OVS_WARN_UNUSED_RESULT -set_blacklist_tables(const char *blacklist, bool dryrun) +set_excluded_tables(const char *excluded, bool dryrun) { struct sset set = SSET_INITIALIZER(&set); char *err = NULL; - if (blacklist) { + if (excluded) { const char *longname; if (!dryrun) { /* Can only add to an empty shash. */ - blacklist_tables_clear(); + excluded_tables_clear(); } - sset_from_delimited_string(&set, blacklist, " ,"); + sset_from_delimited_string(&set, excluded, " ,"); SSET_FOR_EACH (longname, &set) { char *database = xstrdup(longname), *table = NULL; strtok_r(database, ":", &table); if (table && !dryrun) { - blacklist_tables_add(database, table); + excluded_tables_add(database, table); } free(database); if (!table) { - err = xasprintf("Can't parse black list table: %s", longname); + err = xasprintf("Can't parse excluded table: %s", longname); goto done; } } @@ -447,19 +447,19 @@ set_blacklist_tables(const char *blacklist, bool dryrun) done: sset_destroy(&set); if (err && !dryrun) { - /* On error, destroy the partially built 'blacklist_tables'. */ - blacklist_tables_clear(); + /* On error, destroy the partially built 'excluded_tables'. */ + excluded_tables_clear(); } return err; } char * OVS_WARN_UNUSED_RESULT -get_blacklist_tables(void) +get_excluded_tables(void) { struct shash_node *node; struct sset set = SSET_INITIALIZER(&set); - SHASH_FOR_EACH (node, &blacklist_tables) { + SHASH_FOR_EACH (node, &excluded_tables) { const char *database = node->name; const char *table; struct sset *tables = node->data; @@ -489,35 +489,35 @@ get_blacklist_tables(void) } static void -blacklist_tables_clear(void) +excluded_tables_clear(void) { struct shash_node *node; - SHASH_FOR_EACH (node, &blacklist_tables) { + SHASH_FOR_EACH (node, &excluded_tables) { struct sset *tables = node->data; sset_destroy(tables); } - shash_clear_free_data(&blacklist_tables); + shash_clear_free_data(&excluded_tables); } static void -blacklist_tables_add(const char *database, const char *table) +excluded_tables_add(const char *database, const char *table) { - struct sset *tables = shash_find_data(&blacklist_tables, database); + struct sset *tables = shash_find_data(&excluded_tables, database); if (!tables) { tables = xmalloc(sizeof *tables); sset_init(tables); - shash_add(&blacklist_tables, database, tables); + shash_add(&excluded_tables, database, tables); } sset_add(tables, table); } static bool -blacklist_tables_find(const char *database, const char *table) +excluded_tables_find(const char *database, const char *table) { - struct sset *tables = shash_find_data(&blacklist_tables, database); + struct sset *tables = shash_find_data(&excluded_tables, database); return tables && sset_contains(tables, table); } @@ -531,8 +531,8 @@ disconnect_active_server(void) void replication_destroy(void) { - blacklist_tables_clear(); - shash_destroy(&blacklist_tables); + excluded_tables_clear(); + shash_destroy(&excluded_tables); if (sync_from) { free(sync_from); @@ -558,8 +558,8 @@ reset_database(struct ovsdb *db) struct shash_node *table_node; SHASH_FOR_EACH (table_node, &db->tables) { - /* Delete all rows if the table is not blacklisted. */ - if (!blacklist_tables_find(db->schema->name, table_node->name)) { + /* Delete all rows if the table is not excluded. */ + if (!excluded_tables_find(db->schema->name, table_node->name)) { struct ovsdb_table *table = table_node->data; struct ovsdb_row *row, *next; HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) { @@ -572,7 +572,7 @@ reset_database(struct ovsdb *db) } /* Create a monitor request for 'db'. The monitor request will include - * any tables from 'blacklisted_tables' + * any tables from 'excluded_tables' * * Caller is responsible for disposing 'request'. */ @@ -590,8 +590,8 @@ create_monitor_request(struct ovsdb_schema *schema) for (int j = 0; j < n; j++) { struct ovsdb_table_schema *table = nodes[j]->data; - /* Monitor all tables not blacklisted. */ - if (!blacklist_tables_find(db_name, table->name)) { + /* Monitor all tables not excluded. */ + if (!excluded_tables_find(db_name, table->name)) { add_monitored_table(table, monitor_request); } } @@ -914,10 +914,10 @@ replication_status(void) } ds_chomp(&ds, ','); - if (!shash_is_empty(&blacklist_tables)) { + if (!shash_is_empty(&excluded_tables)) { ds_put_char(&ds, '\n'); ds_put_cstr(&ds, "exclude: "); - ds_put_and_free_cstr(&ds, get_blacklist_tables()); + ds_put_and_free_cstr(&ds, get_excluded_tables()); } break; } diff --git a/ovsdb/replication.h b/ovsdb/replication.h index c45f33e26..6d1be820f 100644 --- a/ovsdb/replication.h +++ b/ovsdb/replication.h @@ -39,9 +39,9 @@ struct ovsdb; * replication_get_last_error() should be call within the main loop * whenever OVSDB server runs in the backup mode. * - * - set_blacklist_tables(), get_blacklist_tables(), - * disconnect_active_server() and replication_usage() are support functions - * used mainly by uinxctl commands. + * - set_excluded_tables(), get_excluded_tables(), disconnect_active_server() + * and replication_usage() are support functions used mainly by unixctl + * commands. */ #define REPLICATION_DEFAULT_PROBE_INTERVAL 60000 @@ -58,9 +58,9 @@ int replication_get_last_error(void); char *replication_status(void); void replication_set_probe_interval(int); -char *set_blacklist_tables(const char *blacklist, bool dryrun) +char *set_excluded_tables(const char *excluded, bool dryrun) OVS_WARN_UNUSED_RESULT; -char *get_blacklist_tables(void) OVS_WARN_UNUSED_RESULT; +char *get_excluded_tables(void) OVS_WARN_UNUSED_RESULT; void disconnect_active_server(void); #endif /* ovsdb/replication.h */ diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at index 87f9ae280..736d9809c 100644 --- a/tests/ofproto-macros.at +++ b/tests/ofproto-macros.at @@ -304,11 +304,11 @@ add_pmd_of_ports () { m4_divert_pop([PREPARE_TESTS]) -# OVS_VSWITCHD_STOP([WHITELIST]) +# OVS_VSWITCHD_STOP([ALLOWLIST]) # # Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files # for messages with severity WARN or higher and signaling an error if any -# is present. The optional WHITELIST may contain shell-quoted "sed" +# is present. The optional ALLOWLIST may contain shell-quoted "sed" # commands to delete any warnings that are actually expected, e.g.: # # OVS_VSWITCHD_STOP(["/expected error/d"]) diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at index daf66bdec..15628a7c6 100644 --- a/tests/system-kmod-macros.at +++ b/tests/system-kmod-macros.at @@ -29,16 +29,16 @@ m4_define([OVS_TRAFFIC_VSWITCHD_START], AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2]) ]) -# OVS_TRAFFIC_VSWITCHD_STOP([WHITELIST], [extra_cmds]) +# OVS_TRAFFIC_VSWITCHD_STOP([ALLOWLIST], [extra_cmds]) # # Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files # for messages with severity WARN or higher and signaling an error if any -# is present. The optional WHITELIST may contain shell-quoted "sed" +# is present. The optional ALLOWLIST may contain shell-quoted "sed" # commands to delete any warnings that are actually expected, e.g.: # # OVS_TRAFFIC_VSWITCHD_STOP(["/expected error/d"]) # -# 'extra_cmds' are shell commands to be executed afte OVS_VSWITCHD_STOP() is +# 'extra_cmds' are shell commands to be executed after OVS_VSWITCHD_STOP() is # invoked. They can be used to perform additional cleanups such as name space # removal. m4_define([OVS_TRAFFIC_VSWITCHD_STOP], diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at index 72c84b9c7..34f82cee3 100644 --- a/tests/system-userspace-macros.at +++ b/tests/system-userspace-macros.at @@ -21,16 +21,16 @@ m4_define([OVS_TRAFFIC_VSWITCHD_START], AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2]) ]) -# OVS_TRAFFIC_VSWITCHD_STOP([WHITELIST], [extra_cmds]) +# OVS_TRAFFIC_VSWITCHD_STOP([ALLOWLIST], [extra_cmds]) # # Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files # for messages with severity WARN or higher and signaling an error if any -# is present. The optional WHITELIST may contain shell-quoted "sed" +# is present. The optional ALLOWLIST may contain shell-quoted "sed" # commands to delete any warnings that are actually expected, e.g.: # # OVS_TRAFFIC_VSWITCHD_STOP(["/expected error/d"]) # -# 'extra_cmds' are shell commands to be executed afte OVS_VSWITCHD_STOP() is +# 'extra_cmds' are shell commands to be executed after OVS_VSWITCHD_STOP() is # invoked. They can be used to perform additional cleanups such as name space # removal. m4_define([OVS_TRAFFIC_VSWITCHD_STOP], diff --git a/tests/test-classifier.c b/tests/test-classifier.c index 2d98fad48..cff00c8fa 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -14,7 +14,8 @@ * limitations under the License. */ -/* "White box" tests for classifier. +/* Tests for classifier, written with knowledge of and to advantage of the + * classifier's internal structure. * * With very few exceptions, these tests obtain complete coverage of every * basic block and every branch in the classifier implementation, e.g. a clean diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py index 7f1d21a40..ed231fa6f 100755 --- a/utilities/checkpatch.py +++ b/utilities/checkpatch.py @@ -190,13 +190,13 @@ skip_signoff_check = False # name, as they may have legitimate reasons to have longer lines. # # Python isn't checked as flake8 performs these checks during build. -line_length_blacklist = re.compile( +line_length_ignore_list = re.compile( r'\.(am|at|etc|in|m4|mk|patch|py)$|debian/rules') # Don't enforce a requirement that leading whitespace be all spaces on # files that include these characters in their name, since these kinds # of files need lines with leading tabs. -leading_whitespace_blacklist = re.compile(r'\.(mk|am|at)$|debian/rules') +leading_whitespace_ignore_list = re.compile(r'\.(mk|am|at)$|debian/rules') def is_subtracted_line(line): @@ -523,11 +523,11 @@ file_checks = [ checks = [ {'regex': None, - 'match_name': lambda x: not line_length_blacklist.search(x), + 'match_name': lambda x: not line_length_ignore_list.search(x), 'check': lambda x: line_length_check(x)}, {'regex': None, - 'match_name': lambda x: not leading_whitespace_blacklist.search(x), + 'match_name': lambda x: not leading_whitespace_ignore_list.search(x), 'check': lambda x: not leading_whitespace_is_spaces(x), 'print': lambda: print_warning("Line has non-spaces leading whitespace")}, diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 41989cf6c..62697e89b 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -3909,49 +3909,48 @@ bridge_configure_remotes(struct bridge *br, && (!strncmp(c->target, "punix:", 6) || !strncmp(c->target, "unix:", 5))) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); - char *whitelist; + char *allowed; if (!strncmp(c->target, "unix:", 5)) { /* Connect to a listening socket */ - whitelist = xasprintf("unix:%s/", ovs_rundir()); + allowed = xasprintf("unix:%s/", ovs_rundir()); if (strchr(c->target, '/') && - !equal_pathnames(c->target, whitelist, - strlen(whitelist))) { + !equal_pathnames(c->target, allowed, strlen(allowed))) { /* Absolute path specified, but not in ovs_rundir */ VLOG_ERR_RL(&rl, "bridge %s: Not connecting to socket " "controller \"%s\" due to possibility for " "remote exploit. Instead, specify socket " - "in whitelisted \"%s\" or connect to " + "in permitted directory \"%s\" or connect to " "\"unix:%s/%s.mgmt\" (which is always " "available without special configuration).", - br->name, c->target, whitelist, + br->name, c->target, allowed, ovs_rundir(), br->name); - free(whitelist); + free(allowed); continue; } } else { - whitelist = xasprintf("punix:%s/%s.", + allowed = xasprintf("punix:%s/%s.", ovs_rundir(), br->name); - if (!equal_pathnames(c->target, whitelist, strlen(whitelist)) - || strchr(c->target + strlen(whitelist), '/')) { + if (!equal_pathnames(c->target, allowed, strlen(allowed)) + || strchr(c->target + strlen(allowed), '/')) { /* Prevent remote ovsdb-server users from accessing * arbitrary Unix domain sockets and overwriting arbitrary * local files. */ VLOG_ERR_RL(&rl, "bridge %s: Not adding Unix domain socket " "controller \"%s\" due to possibility of " "overwriting local files. Instead, specify " - "path in whitelisted format \"%s*\" or " + "path in permitted format \"%s*\" or " "connect to \"unix:%s/%s.mgmt\" (which is " "always available without special " "configuration).", - br->name, c->target, whitelist, + br->name, c->target, allowed, ovs_rundir(), br->name); - free(whitelist); + free(allowed); continue; } } - free(whitelist); + free(allowed); } bridge_configure_local_iface_netdev(br, c); diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index 1e72b628b..f007f9c0b 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -228,7 +228,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp) break; case OPT_DISABLE_SYSTEM: - dp_blacklist_provider("system"); + dp_disallow_provider("system"); break; case OPT_DISABLE_SYSTEM_ROUTE: diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 4958c7c95..0e25e018e 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1072,7 +1072,7 @@ Drop all unencrypted tunneled packets in which the least-significant bit of skb_mark is 0. This would be a useful policy if no unencrypted tunneled traffic should exit - the system without being specially whitelisted by setting + the system without being specially permitted by setting skb_mark to 1.
    -- GitLab From 5821a592a4d73ea3466d5b2b051181c6e71d3c8a Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Mon, 12 Oct 2020 13:24:48 -0700 Subject: [PATCH 308/432] datapath: return an error instead of doing BUG_ON() Upstream commit: commit a734d1f4c2fc962ef4daa179e216df84a8ec5f84 Author: Eelco Chaudron Date: Thu May 2 16:12:38 2019 -0400 net: openvswitch: return an error instead of doing BUG_ON() For all other error cases in queue_userspace_packet() the error is returned, so it makes sense to do the same for these two error cases. Reported-by: Davide Caratti Signed-off-by: Eelco Chaudron Acked-by: Flavio Leitner Signed-off-by: David S. Miller Acked-by: Eelco Chaudron Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 05c1e4274..d604bfd36 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -469,7 +469,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); - BUG_ON(err); + if (err) + goto out; if (upcall_info->userdata) __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, @@ -486,7 +487,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); - BUG_ON(err); + if (err) + goto out; + nla_nest_end(user_skb, nla); } -- GitLab From fa764e8fb10cb02e602a79e0951456f8000e01f4 Mon Sep 17 00:00:00 2001 From: Enrico Weigelt Date: Mon, 12 Oct 2020 13:24:49 -0700 Subject: [PATCH 309/432] datapath: drop unneeded likely() call around IS_ERR() Upstream commit: commit b90f5aa4d6268e81dd1fd51e5ef89d2892bf040d Author: Enrico Weigelt Date: Wed Jun 5 23:06:40 2019 +0200 net: openvswitch: drop unneeded likely() call around IS_ERR() IS_ERR() already calls unlikely(), so this extra likely() call around the !IS_ERR() is not needed. Signed-off-by: Enrico Weigelt Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index d604bfd36..4c485c88a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1402,7 +1402,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) &flow->id, info, false, ufid_flags); if (likely(reply)) { - if (likely(!IS_ERR(reply))) { + if (!IS_ERR(reply)) { rcu_read_lock(); /*To keep RCU checker happy. */ err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, reply, info->snd_portid, -- GitLab From ec335e0b74e4811ff504a83e00c8379bb2b0485b Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Mon, 12 Oct 2020 13:24:50 -0700 Subject: [PATCH 310/432] datapath: do not update max_headroom if new headroom is equal to old headroom Upstream commit: commit 6b660c4177aaebdc73df7a3378f0e8b110aa4b51 Author: Taehee Yoo Date: Sat Jul 6 01:08:09 2019 +0900 net: openvswitch: do not update max_headroom if new headroom is equal to old headroom When a vport is deleted, the maximum headroom size would be changed. If the vport which has the largest headroom is deleted, the new max_headroom would be set. But, if the new headroom size is equal to the old headroom size, updating routine is unnecessary. Signed-off-by: Taehee Yoo Tested-by: Greg Rose Reviewed-by: Greg Rose Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 4c485c88a..2879f24ef 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -2072,10 +2072,9 @@ static struct vport *lookup_vport(struct net *net, } -/* Called with ovs_mutex */ -static void update_headroom(struct datapath *dp) +static unsigned int ovs_get_max_headroom(struct datapath *dp) { - unsigned dev_headroom, max_headroom = 0; + unsigned int dev_headroom, max_headroom = 0; struct net_device *dev; struct vport *vport; int i; @@ -2089,10 +2088,19 @@ static void update_headroom(struct datapath *dp) } } - dp->max_headroom = max_headroom; + return max_headroom; +} + +/* Called with ovs_mutex */ +static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom) +{ + struct vport *vport; + int i; + + dp->max_headroom = new_headroom; for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) - netdev_set_rx_headroom(vport->dev, max_headroom); + netdev_set_rx_headroom(vport->dev, new_headroom); } static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -2103,6 +2111,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sk_buff *reply; struct vport *vport; struct datapath *dp; + unsigned int new_headroom; u32 port_no; int err; @@ -2165,8 +2174,10 @@ restart: OVS_VPORT_CMD_NEW); BUG_ON(err < 0); - if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) - update_headroom(dp); + new_headroom = netdev_get_fwd_headroom(vport->dev); + + if (new_headroom > dp->max_headroom) + ovs_update_headroom(dp, new_headroom); else netdev_set_rx_headroom(vport->dev, dp->max_headroom); @@ -2235,11 +2246,12 @@ exit_unlock_free: static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) { - bool must_update_headroom = false; + bool update_headroom = false; struct nlattr **a = info->attrs; struct sk_buff *reply; struct datapath *dp; struct vport *vport; + unsigned int new_headroom; int err; reply = ovs_vport_cmd_alloc_info(); @@ -2265,13 +2277,17 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) /* the vport deletion may trigger dp headroom update */ dp = vport->dp; if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom) - must_update_headroom = true; + update_headroom = true; + netdev_reset_rx_headroom(vport->dev); ovs_dp_detach_port(vport); - if (must_update_headroom) - update_headroom(dp); + if (update_headroom) { + new_headroom = ovs_get_max_headroom(dp); + if (new_headroom < dp->max_headroom) + ovs_update_headroom(dp, new_headroom); + } ovs_unlock(); ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info); -- GitLab From 381a020b4883e41686d7a4a85582755cb6fce088 Mon Sep 17 00:00:00 2001 From: Yifeng Sun Date: Mon, 12 Oct 2020 13:24:51 -0700 Subject: [PATCH 311/432] datapath: Print error when ovs_execute_actions() fails Upstream commit: commit aa733660dbd8d9192b8c528ae0f4b84f3fef74e4 Author: Yifeng Sun Date: Sun Aug 4 19:56:11 2019 -0700 openvswitch: Print error when ovs_execute_actions() fails Currently in function ovs_dp_process_packet(), return values of ovs_execute_actions() are silently discarded. This patch prints out an debug message when error happens so as to provide helpful hints for debugging. Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Yifeng Sun Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 2879f24ef..c8c21d774 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -240,6 +240,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; + int error; stats = this_cpu_ptr(dp->stats_percpu); @@ -248,7 +249,6 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; - int error; memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; @@ -265,7 +265,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); - ovs_execute_actions(dp, skb, sf_acts, key); + error = ovs_execute_actions(dp, skb, sf_acts, key); + if (unlikely(error)) + net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", + ovs_dp_name(dp), error); stats_counter = &stats->n_hit; -- GitLab From dca968421f89dfb3e8294c7621f918eb690390aa Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Mon, 12 Oct 2020 13:24:52 -0700 Subject: [PATCH 312/432] datapath: Set OvS recirc_id from tc chain index Upstream commit: commit 95a7233c452a58a4c2310c456c73997853b2ec46 Author: Paul Blakey Date: Wed Sep 4 16:56:37 2019 +0300 net: openvswitch: Set OvS recirc_id from tc chain index Offloaded OvS datapath rules are translated one to one to tc rules, for example the following simplified OvS rule: recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2) Will be translated to the following tc rule: $ tc filter add dev dev1 ingress \ prio 1 chain 0 proto ip \ flower tcp ct_state -trk \ action ct pipe \ action goto chain 2 Received packets will first travel though tc, and if they aren't stolen by it, like in the above rule, they will continue to OvS datapath. Since we already did some actions (action ct in this case) which might modify the packets, and updated action stats, we would like to continue the proccessing with the correct recirc_id in OvS (here recirc_id(2)) where we left off. To support this, introduce a new skb extension for tc, which will be used for translating tc chain to ovs recirc_id to handle these miss cases. Last tc chain index will be set by tc goto chain action and read by OvS datapath. Signed-off-by: Paul Blakey Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Backport the local datapath changes from this patch and add compat layer fixup for the DECLARE_STATIC_KEY_FALSE macro. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- acinclude.m4 | 3 ++ datapath/datapath.c | 38 ++++++++++++++++--- datapath/datapath.h | 2 + datapath/flow.c | 13 +++++++ .../linux/compat/include/linux/static_key.h | 7 ++++ 5 files changed, 58 insertions(+), 5 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 84f344da0..3d56510a0 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -631,6 +631,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ [OVS_DEFINE([HAVE_UPSTREAM_STATIC_KEY])]) OVS_GREP_IFELSE([$KSRC/include/linux/jump_label.h], [DEFINE_STATIC_KEY_FALSE], [OVS_DEFINE([HAVE_DEFINE_STATIC_KEY])]) + OVS_GREP_IFELSE([$KSRC/include/linux/jump_label.h], + [DECLARE_STATIC_KEY_FALSE], + [OVS_DEFINE([HAVE_DECLARE_STATIC_KEY])]) OVS_GREP_IFELSE([$KSRC/include/linux/etherdevice.h], [eth_hw_addr_random]) OVS_GREP_IFELSE([$KSRC/include/linux/etherdevice.h], [ether_addr_copy]) diff --git a/datapath/datapath.c b/datapath/datapath.c index c8c21d774..009887691 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1635,10 +1635,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) +DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + +static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - if (a[OVS_DP_ATTR_USER_FEATURES]) - dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + u32 user_features = 0; + + if (a[OVS_DP_ATTR_USER_FEATURES]) { + user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + + if (user_features & ~(OVS_DP_F_VPORT_PIDS | + OVS_DP_F_UNALIGNED | + OVS_DP_F_TC_RECIRC_SHARING)) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (user_features & OVS_DP_F_TC_RECIRC_SHARING) + return -EOPNOTSUPP; +#endif + } + + dp->user_features = user_features; + + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + static_branch_enable(&tc_recirc_sharing_support); + else + static_branch_disable(&tc_recirc_sharing_support); + + return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1700,7 +1724,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); + err = ovs_dp_change(dp, a); + if (err) + goto err_destroy_meters; /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -1825,7 +1851,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto err_unlock_free; - ovs_dp_change(dp, info->attrs); + err = ovs_dp_change(dp, info->attrs); + if (err) + goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_GET); diff --git a/datapath/datapath.h b/datapath/datapath.h index f99db1fde..c377e9b24 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -251,6 +251,8 @@ extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; extern const struct genl_multicast_group ovs_dp_vport_multicast_group; +DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, diff --git a/datapath/flow.c b/datapath/flow.c index 6dc7402d5..5a00c238c 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -874,6 +874,9 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_ext; +#endif int res, err; /* Extract metadata from packet. */ @@ -904,7 +907,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (res < 0) return res; key->mac_proto = res; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (static_branch_unlikely(&tc_recirc_sharing_support)) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext ? tc_ext->chain : 0; + } else { + key->recirc_id = 0; + } +#else key->recirc_id = 0; +#endif err = key_extract(skb, key); if (!err) diff --git a/datapath/linux/compat/include/linux/static_key.h b/datapath/linux/compat/include/linux/static_key.h index 7e43a49e8..432feccb9 100644 --- a/datapath/linux/compat/include/linux/static_key.h +++ b/datapath/linux/compat/include/linux/static_key.h @@ -74,6 +74,13 @@ static inline void rpl_static_key_disable(struct static_key *key) #define static_branch_enable(x) rpl_static_key_enable(&(x)->key) #define static_branch_disable(x) rpl_static_key_disable(&(x)->key) +#ifndef HAVE_DECLARE_STATIC_KEY +#define DECLARE_STATIC_KEY_TRUE(name) \ + extern struct static_key_true name +#define DECLARE_STATIC_KEY_FALSE(name) \ + extern struct static_key_false name +#endif + #endif /* HAVE_UPSTREAM_STATIC_KEY */ #endif /* _STATIC_KEY_WRAPPER_H */ -- GitLab From b7465cac76fdcef5e6437ec538dd234a26a6fd45 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 12 Oct 2020 13:24:53 -0700 Subject: [PATCH 313/432] datapath: fix GFP flags in rtnl_net_notifyid() Upstream commit: commit d4e4fdf9e4a27c87edb79b1478955075be141f67 Author: Guillaume Nault Date: Wed Oct 23 18:39:04 2019 +0200 netns: fix GFP flags in rtnl_net_notifyid() In rtnl_net_notifyid(), we certainly can't pass a null GFP flag to rtnl_notify(). A GFP_KERNEL flag would be fine in most circumstances, but there are a few paths calling rtnl_net_notifyid() from atomic context or from RCU critical sections. The later also precludes the use of gfp_any() as it wouldn't detect the RCU case. Also, the nlmsg_new() call is wrong too, as it uses GFP_KERNEL unconditionally. Therefore, we need to pass the GFP flags as parameter and propagate it through function calls until the proper flags can be determined. In most cases, GFP_KERNEL is fine. The exceptions are: * openvswitch: ovs_vport_cmd_get() and ovs_vport_cmd_dump() indirectly call rtnl_net_notifyid() from RCU critical section, * rtnetlink: rtmsg_ifinfo_build_skb() already receives GFP flags as parameter. Also, in ovs_vport_cmd_build_info(), let's change the GFP flags used by nlmsg_new(). The function is allowed to sleep, so better make the flags consistent with the ones used in the following ovs_vport_cmd_fill_info() call. Found by code inspection. Fixes: 9a9634545c70 ("netns: notify netns id events") Signed-off-by: Guillaume Nault Acked-by: Nicolas Dichtel Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Backport the datapath.c portion of this fix. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 009887691..aceb655bd 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1992,7 +1992,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, struct net *net, u32 portid, u32 seq, - u32 flags, u8 cmd) + u32 flags, u8 cmd, gfp_t gfp) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@ -2014,7 +2014,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, #ifdef HAVE_PEERNET2ID_ALLOC if (!net_eq(net, dev_net(vport->dev))) { - int id = peernet2id_alloc(net, dev_net(vport->dev)); + int id = peernet2id_alloc(net, dev_net(vport->dev), gfp); if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) goto nla_put_failure; @@ -2056,11 +2056,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, struct sk_buff *skb; int retval; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd, + GFP_KERNEL); BUG_ON(retval < 0); return skb; @@ -2202,7 +2203,7 @@ restart: err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_NEW); + OVS_VPORT_CMD_NEW, GFP_KERNEL); BUG_ON(err < 0); new_headroom = netdev_get_fwd_headroom(vport->dev); @@ -2262,7 +2263,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_SET); + OVS_VPORT_CMD_SET, GFP_KERNEL); BUG_ON(err < 0); ovs_unlock(); @@ -2302,7 +2303,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_DEL); + OVS_VPORT_CMD_DEL, GFP_KERNEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ @@ -2349,7 +2350,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, - OVS_VPORT_CMD_GET); + OVS_VPORT_CMD_GET, GFP_ATOMIC); BUG_ON(err < 0); rcu_read_unlock(); @@ -2385,7 +2386,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - OVS_VPORT_CMD_GET) < 0) + OVS_VPORT_CMD_GET, + GFP_ATOMIC) < 0) goto out; j++; -- GitLab From c07157ca26b5523a9301e375c34d331ecf99d53b Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:24:54 -0700 Subject: [PATCH 314/432] datapath: don't unlock mutex when changing the user_features fails Upstream commit: commit 4c76bf696a608ea5cc555fe97ec59a9033236604 Author: Tonghao Zhang Date: Fri Nov 1 22:23:53 2019 +0800 net: openvswitch: don't unlock mutex when changing the user_features fails Unlocking of a not locked mutex is not allowed. Other kernel thread may be in critical section while we unlock it because of setting user_feature fail. Fixes: 95a7233c4 ("net: openvswitch: Set OvS recirc_id from tc chain index") Cc: Paul Blakey Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index aceb655bd..cf216c8ec 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1746,6 +1746,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } + ovs_unlock(); goto err_destroy_meters; } @@ -1762,7 +1763,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) return 0; err_destroy_meters: - ovs_unlock(); ovs_meters_exit(dp); err_destroy_ports_array: kfree(dp->ports); -- GitLab From fef7abfc05ae642b7759603c28021f510a3b9b38 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:24:55 -0700 Subject: [PATCH 315/432] datapath: optimize flow-mask looking up Upstream commit: commit 57f7d7b9164426c496300d254fd5167fbbf205ea Author: Tonghao Zhang Date: Fri Nov 1 22:23:49 2019 +0800 net: openvswitch: optimize flow-mask looking up The full looking up on flow table traverses all mask array. If mask-array is too large, the number of invalid flow-mask increase, performance will be drop. One bad case, for example: M means flow-mask is valid and NULL of flow-mask means deleted. +-------------------------------------------+ | M | NULL | ... | NULL | M| +-------------------------------------------+ In that case, without this patch, openvswitch will traverses all mask array, because there will be one flow-mask in the tail. This patch changes the way of flow-mask inserting and deleting, and the mask array will be keep as below: there is not a NULL hole. In the fast path, we can "break" "for" (not "continue") in flow_lookup when we get a NULL flow-mask. "break" v +-------------------------------------------+ | M | M | NULL |... | NULL | NULL| +-------------------------------------------+ This patch don't optimize slow or control path, still using ma->max to traverse. Slow path: * tbl_mask_array_realloc * ovs_flow_tbl_lookup_exact * flow_mask_find Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/flow_table.c | 103 ++++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 50 deletions(-) diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 76b390e9c..62d726ddd 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -540,8 +540,8 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u32 *n_mask_hit, u32 *index) { - struct sw_flow_mask *mask; struct sw_flow *flow; + struct sw_flow_mask *mask; int i; if (*index < ma->max) { @@ -560,7 +560,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, mask = rcu_dereference_ovsl(ma->masks[i]); if (!mask) - continue; + break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ @@ -716,7 +716,7 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table) struct mask_array *ma; ma = rcu_dereference_ovsl(table->mask_array); - return ma->count; + return READ_ONCE(ma->count); } static struct table_instance *table_instance_expand(struct table_instance *ti, @@ -725,21 +725,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } -static void tbl_mask_array_delete_mask(struct mask_array *ma, - struct sw_flow_mask *mask) +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) { - int i; + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); /* Remove the deleted mask pointers from the array */ - for (i = 0; i < ma->max; i++) { - if (mask == ovsl_dereference(ma->masks[i])) { - RCU_INIT_POINTER(ma->masks[i], NULL); - ma->count--; - kfree_rcu(mask, rcu); - return; - } + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; } + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -753,18 +765,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) BUG_ON(!mask->ref_count); mask->ref_count--; - if (!mask->ref_count) { - struct mask_array *ma; - - ma = ovsl_dereference(tbl->mask_array); - tbl_mask_array_delete_mask(ma, mask); - - /* Shrink the mask array if necessary. */ - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && - ma->count <= (ma->max / 3)) - tbl_mask_array_realloc(tbl, ma->max / 2); - - } + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); } } @@ -828,6 +830,29 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) + return err; + + ma = ovsl_dereference(tbl->mask_array); + } + + BUG_ON(ovsl_dereference(ma->masks[ma_count])); + + rcu_assign_pointer(ma->masks[ma_count], new); + WRITE_ONCE(ma->count, ma_count +1); + + return 0; +} + /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) @@ -836,9 +861,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask = flow_mask_find(tbl, new); if (!mask) { - struct mask_array *ma; - int i; - /* Allocate a new mask if none exsits. */ mask = mask_alloc(); if (!mask) @@ -848,28 +870,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, mask->range = new->range; /* Add mask to mask-list. */ - ma = ovsl_dereference(tbl->mask_array); - if (ma->count >= ma->max) { - int err; - - err = tbl_mask_array_realloc(tbl, ma->max + - MASK_ARRAY_SIZE_MIN); - if (err) { - kfree(mask); - return err; - } - ma = ovsl_dereference(tbl->mask_array); - } - - for (i = 0; i < ma->max; i++) { - struct sw_flow_mask *t; - - t = ovsl_dereference(ma->masks[i]); - if (!t) { - rcu_assign_pointer(ma->masks[i], mask); - ma->count++; - break; - } + if (tbl_mask_array_add_mask(tbl, mask)) { + kfree(mask); + return -ENOMEM; } } else { -- GitLab From cdc58fe7d290cd0990e15ca10df1ad5b312b01d7 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:24:56 -0700 Subject: [PATCH 316/432] datapath: simplify the flow_hash Upstream commit: commit 515b65a4b99197ae062a795ab4de919e6d04be04 Author: Tonghao Zhang Date: Fri Nov 1 22:23:50 2019 +0800 net: openvswitch: simplify the flow_hash Simplify the code and remove the unnecessary BUILD_BUG_ON. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/flow_table.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 62d726ddd..7efaa8044 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -455,13 +455,10 @@ err_free_ti: static u32 flow_hash(const struct sw_flow_key *key, const struct sw_flow_key_range *range) { - int key_start = range->start; - int key_end = range->end; - const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); - int hash_u32s = (key_end - key_start) >> 2; + const u32 *hash_key = (const u32 *)((const u8 *)key + range->start); /* Make sure number of hash bytes are multiple of u32. */ - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + int hash_u32s = range_n_bytes(range) >> 2; return jhash2(hash_key, hash_u32s, 0); } -- GitLab From e5466316984d84dfeabbcf6254f207caaff06f7d Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:24:57 -0700 Subject: [PATCH 317/432] datapath: add likely in flow_lookup Upstream commit: commit 0a3e01371db17d753dd92ec4d0fc6247412d3b01 Author: Tonghao Zhang Date: Fri Nov 1 22:23:51 2019 +0800 net: openvswitch: add likely in flow_lookup The most case *index < ma->max, and flow-mask is not NULL. We add un/likely for performance. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/flow_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datapath/flow_table.c b/datapath/flow_table.c index 7efaa8044..ca2efe94d 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -541,7 +541,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct sw_flow_mask *mask; int i; - if (*index < ma->max) { + if (likely(*index < ma->max)) { mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); @@ -556,7 +556,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, continue; mask = rcu_dereference_ovsl(ma->masks[i]); - if (!mask) + if (unlikely(!mask)) break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); -- GitLab From 6d1cf7f3e51f845cd306efcee7efb81de1616265 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:24:58 -0700 Subject: [PATCH 318/432] datapath: fix possible memleak on destroy flow-table Upstream commit: commit 50b0e61b32ee890a75b4377d5fbe770a86d6a4c1 Author: Tonghao Zhang Date: Fri Nov 1 22:23:52 2019 +0800 net: openvswitch: fix possible memleak on destroy flow-table When we destroy the flow tables which may contain the flow_mask, so release the flow mask struct. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Added additional compat layer fixup for WRITE_ONCE() Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/flow_table.c | 186 +++++++++--------- .../linux/compat/include/linux/compiler.h | 8 + 2 files changed, 106 insertions(+), 88 deletions(-) diff --git a/datapath/flow_table.c b/datapath/flow_table.c index ca2efe94d..bd05dd394 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -234,6 +234,74 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size) return 0; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) + return err; + + ma = ovsl_dereference(tbl->mask_array); + } + + BUG_ON(ovsl_dereference(ma->masks[ma_count])); + + rcu_assign_pointer(ma->masks[ma_count], new); + WRITE_ONCE(ma->count, ma_count +1); + + return 0; +} + +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); + + /* Remove the deleted mask pointers from the array */ + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; + } + + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); +} + +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and + * mask list. + */ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); + } +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; @@ -280,7 +348,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, +static void table_instance_flow_free(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti, + struct sw_flow *flow, + bool count) +{ + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); + if (count) + table->count--; + + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + + if (count) + table->ufid_count--; + } + + flow_mask_remove(table, flow->mask); +} + +static void table_instance_destroy(struct flow_table *table, + struct table_instance *ti, struct table_instance *ufid_ti, bool deferred) { @@ -297,13 +386,12 @@ static void table_instance_destroy(struct table_instance *ti, struct sw_flow *flow; struct hlist_head *head = &ti->buckets[i]; struct hlist_node *n; - int ver = ti->node_ver; - int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { - hlist_del_rcu(&flow->flow_table.node[ver]); - if (ovs_identifier_is_ufid(&flow->id)) - hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); + hlist_for_each_entry_safe(flow, n, head, + flow_table.node[ti->node_ver]) { + + table_instance_flow_free(table, ti, ufid_ti, + flow, false); ovs_flow_free(flow, deferred); } } @@ -328,7 +416,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) free_percpu(table->mask_cache); kfree(rcu_dereference_raw(table->mask_array)); - table_instance_destroy(ti, ufid_ti, false); + table_instance_destroy(table, ti, ufid_ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, @@ -444,7 +532,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) flow_table->count = 0; flow_table->ufid_count = 0; - table_instance_destroy(old_ti, old_ufid_ti, true); + table_instance_destroy(flow_table, old_ti, old_ufid_ti, true); return 0; err_free_ti: @@ -722,51 +810,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } -static void tbl_mask_array_del_mask(struct flow_table *tbl, - struct sw_flow_mask *mask) -{ - struct mask_array *ma = ovsl_dereference(tbl->mask_array); - int i, ma_count = READ_ONCE(ma->count); - - /* Remove the deleted mask pointers from the array */ - for (i = 0; i < ma_count; i++) { - if (mask == ovsl_dereference(ma->masks[i])) - goto found; - } - - BUG(); - return; - -found: - WRITE_ONCE(ma->count, ma_count -1); - - rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); - RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); - - kfree_rcu(mask, rcu); - - /* Shrink the mask array if necessary. */ - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && - ma_count <= (ma->max / 3)) - tbl_mask_array_realloc(tbl, ma->max / 2); -} - -/* Remove 'mask' from the mask list, if it is not needed any more. */ -static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) -{ - if (mask) { - /* ovs-lock is required to protect mask-refcount and - * mask list. - */ - ASSERT_OVSL(); - BUG_ON(!mask->ref_count); - mask->ref_count--; - - if (!mask->ref_count) - tbl_mask_array_del_mask(tbl, mask); - } -} - /* Must be called with OVS mutex held. */ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) { @@ -774,17 +817,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); BUG_ON(table->count == 0); - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); - table->count--; - if (ovs_identifier_is_ufid(&flow->id)) { - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); - table->ufid_count--; - } - - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be - * accessible as long as the RCU read lock is held. - */ - flow_mask_remove(table, flow->mask); + table_instance_flow_free(table, ti, ufid_ti, flow, true); } static struct sw_flow_mask *mask_alloc(void) @@ -827,29 +860,6 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } -static int tbl_mask_array_add_mask(struct flow_table *tbl, - struct sw_flow_mask *new) -{ - struct mask_array *ma = ovsl_dereference(tbl->mask_array); - int err, ma_count = READ_ONCE(ma->count); - - if (ma_count >= ma->max) { - err = tbl_mask_array_realloc(tbl, ma->max + - MASK_ARRAY_SIZE_MIN); - if (err) - return err; - - ma = ovsl_dereference(tbl->mask_array); - } - - BUG_ON(ovsl_dereference(ma->masks[ma_count])); - - rcu_assign_pointer(ma->masks[ma_count], new); - WRITE_ONCE(ma->count, ma_count +1); - - return 0; -} - /* Add 'mask' into the mask list, if it is not already there. */ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow, const struct sw_flow_mask *new) diff --git a/datapath/linux/compat/include/linux/compiler.h b/datapath/linux/compat/include/linux/compiler.h index 65f3ba6f4..59b506fd4 100644 --- a/datapath/linux/compat/include/linux/compiler.h +++ b/datapath/linux/compat/include/linux/compiler.h @@ -15,4 +15,12 @@ #define READ_ONCE(x) (x) #endif +#ifndef WRITE_ONCE +#define WRITE_ONCE(x, val) \ +do { \ + *(volatile typeof(x) *)&(x) = (val); \ +} while (0) +#endif + + #endif -- GitLab From a3839b264636f30e34e2028ef7c802235b759565 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:24:59 -0700 Subject: [PATCH 319/432] datapath: simplify the ovs_dp_cmd_new Upstream commit: commit eec62eadd1d757b0743ccbde55973814f3ad396e Author: Tonghao Zhang Date: Fri Nov 1 22:23:54 2019 +0800 net: openvswitch: simplify the ovs_dp_cmd_new use the specified functions to init resource. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 60 ++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index cf216c8ec..22a08baa3 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1665,6 +1665,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) return 0; } +static int ovs_dp_stats_init(struct datapath *dp) +{ + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); + if (!dp->stats_percpu) + return -ENOMEM; + + return 0; +} + +static int ovs_dp_vport_init(struct datapath *dp) +{ + int i; + + dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) + return -ENOMEM; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + + return 0; +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1673,7 +1698,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err, i; + int err; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@ -1686,35 +1711,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_free_reply; + goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); if (err) - goto err_free_dp; + goto err_destroy_dp; - dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); - if (!dp->stats_percpu) { - err = -ENOMEM; + err = ovs_dp_stats_init(dp); + if (err) goto err_destroy_table; - } - dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, - sizeof(struct hlist_head), - GFP_KERNEL); - if (!dp->ports) { - err = -ENOMEM; - goto err_destroy_percpu; - } - - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_dp_vport_init(dp); + if (err) + goto err_destroy_stats; err = ovs_meters_init(dp); if (err) - goto err_destroy_ports_array; + goto err_destroy_ports; /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); @@ -1764,15 +1780,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) err_destroy_meters: ovs_meters_exit(dp); -err_destroy_ports_array: +err_destroy_ports: kfree(dp->ports); -err_destroy_percpu: +err_destroy_stats: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(&dp->table); -err_free_dp: +err_destroy_dp: kfree(dp); -err_free_reply: +err_destroy_reply: kfree_skb(reply); err: return err; -- GitLab From 825634866e95d4e9533d6dafb8d8eccc5f80a3ed Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:25:00 -0700 Subject: [PATCH 320/432] datapath: select vport upcall portid directly Upstream commit: commit 90ce9f23a886bdef7a4b7a9bd52c7a50a6a81635 Author: Tonghao Zhang Date: Thu Nov 7 00:34:28 2019 +0800 net: openvswitch: select vport upcall portid directly The commit 69c51582ff786 ("dpif-netlink: don't allocate per thread netlink sockets"), in Open vSwitch ovs-vswitchd, has changed the number of allocated sockets to just one per port by moving the socket array from a per handler structure to a per datapath one. In the kernel datapath, a vport will have only one socket in most case, if so select it directly in fast-path. Signed-off-by: Tonghao Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/vport.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datapath/vport.c b/datapath/vport.c index f929282dc..bd62c5612 100644 --- a/datapath/vport.c +++ b/datapath/vport.c @@ -507,8 +507,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb) ids = rcu_dereference(vport->upcall_portids); - if (ids->n_ids == 1 && ids->ids[0] == 0) - return 0; + /* If there is only one portid, select it in the fast-path. */ + if (ids->n_ids == 1) + return ids->ids[0]; hash = skb_get_hash(skb); ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids); -- GitLab From 11047d57d797e079d0bbb9e5827817ebf1c08551 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:25:01 -0700 Subject: [PATCH 321/432] datapath: don't call pad_packet if not necessary Upstream commit: commit 61ca533c0e94104c35fcb7858a23ec9a05d78143 Author: Tonghao Zhang Date: Thu Nov 14 23:51:08 2019 +0800 net: openvswitch: don't call pad_packet if not necessary The nla_put_u16/nla_put_u32 makes sure that *attrlen is align. The call tree is that: nla_put_u16/nla_put_u32 -> nla_put attrlen = sizeof(u16) or sizeof(u32) -> __nla_put attrlen -> __nla_reserve attrlen -> skb_put(skb, nla_total_size(attrlen)) nla_total_size returns the total length of attribute including padding. Cc: Joe Stringer Cc: William Tu Signed-off-by: Tonghao Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 22a08baa3..ddc0b4491 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -512,23 +512,17 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, } /* Add OVS_PACKET_ATTR_MRU */ - if (upcall_info->mru) { - if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, - upcall_info->mru)) { - err = -ENOBUFS; - goto out; - } - pad_packet(dp, user_skb); + if (upcall_info->mru && + nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) { + err = -ENOBUFS; + goto out; } /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ - if (cutlen > 0) { - if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, - skb->len)) { - err = -ENOBUFS; - goto out; - } - pad_packet(dp, user_skb); + if (cutlen > 0 && + nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) { + err = -ENOBUFS; + goto out; } /* Add OVS_PACKET_ATTR_HASH */ -- GitLab From 244674ed7cdd9a0c5e1d212e01b1e4fefbb6a32d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Oct 2020 13:25:02 -0700 Subject: [PATCH 322/432] datapath: fix flow command message size Upstream commit: commit 4e81c0b3fa93d07653e2415fa71656b080a112fd Author: Paolo Abeni Date: Tue Nov 26 12:55:50 2019 +0100 openvswitch: fix flow command message size When user-space sets the OVS_UFID_F_OMIT_* flags, and the relevant flow has no UFID, we can exceed the computed size, as ovs_nla_put_identifier() will always dump an OVS_FLOW_ATTR_KEY attribute. Take the above in account when computing the flow command message size. Fixes: 74ed7ab9264c ("openvswitch: Add support for unique flow IDs.") Reported-by: Qi Jun Ding Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index ddc0b4491..1020fee41 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -763,9 +763,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - /* OVS_FLOW_ATTR_UFID */ + /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback + * see ovs_nla_put_identifier() + */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); + else + len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) -- GitLab From 90b36b02115f1e82684b571936aa4802b170964f Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Oct 2020 13:25:03 -0700 Subject: [PATCH 323/432] datapath: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() Upstream commit: commit 8ffeb03fbba3b599690b361467bfd2373e8c450f Author: Paolo Abeni Date: Sun Dec 1 18:41:24 2019 +0100 openvswitch: drop unneeded BUG_ON() in ovs_flow_cmd_build_info() All the callers of ovs_flow_cmd_build_info() already deal with error return code correctly, so we can handle the error condition in a more gracefull way. Still dump a warning to preserve debuggability. v1 -> v2: - clarify the commit message - clean the skb and report the error (DaveM) Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 1020fee41..9448a4c1a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -946,7 +946,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); - BUG_ON(retval < 0); + if (WARN_ON_ONCE(retval < 0)) { + kfree_skb(skb); + skb = ERR_PTR(retval); + } return skb; } -- GitLab From 096043f436f831581aa8264cafbac3d7fc899d96 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Oct 2020 13:25:04 -0700 Subject: [PATCH 324/432] datapath: remove another BUG_ON() Upstream commit: commit 8a574f86652a4540a2433946ba826ccb87f398cc Author: Paolo Abeni Date: Sun Dec 1 18:41:25 2019 +0100 openvswitch: remove another BUG_ON() If we can't build the flow del notification, we can simply delete the flow, no need to crash the kernel. Still keep a WARN_ON to preserve debuggability. Note: the BUG_ON() predates the Fixes tag, but this change can be applied only after the mentioned commit. v1 -> v2: - do not leak an skb on error Fixes: aed067783e50 ("openvswitch: Minimize ovs_flow_cmd_del critical section.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 9448a4c1a..1bc8e1439 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1414,7 +1414,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); - BUG_ON(err < 0); + if (WARN_ON_ONCE(err < 0)) { + kfree_skb(reply); + goto out_free; + } ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info); } else { genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0, @@ -1423,6 +1426,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } } +out_free: ovs_flow_free(flow, true); return 0; unlock: -- GitLab From 6a101a6c8372570a30e0f8edb558c8a69cc80e7d Mon Sep 17 00:00:00 2001 From: aaron conole Date: Mon, 12 Oct 2020 13:25:05 -0700 Subject: [PATCH 325/432] datapath: support asymmetric conntrack Upstream commit: commit 5d50aa83e2c8e91ced2cca77c198b468ca9210f4 author: aaron conole date: tue dec 3 16:34:13 2019 -0500 openvswitch: support asymmetric conntrack the openvswitch module shares a common conntrack and nat infrastructure exposed via netfilter. it's possible that a packet needs both snat and dnat manipulation, due to e.g. tuple collision. netfilter can support this because it runs through the nat table twice - once on ingress and again after egress. the openvswitch module doesn't have such capability. like netfilter hook infrastructure, we should run through nat twice to keep the symmetry. fixes: 05752523e565 ("openvswitch: interface with nat.") signed-off-by: aaron conole signed-off-by: david s. miller Fixes: c5f6c06b58d6 ("datapath: Interface with NAT.") Acked-by: Aaron Conole Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/conntrack.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/datapath/conntrack.c b/datapath/conntrack.c index 5b4d6cce0..c7a318baf 100644 --- a/datapath/conntrack.c +++ b/datapath/conntrack.c @@ -978,6 +978,17 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + if (err == NF_ACCEPT && + ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, + maniptype); + } + /* Mark NAT done if successful and update the flow key. */ if (err == NF_ACCEPT) ovs_nat_update_key(key, skb, maniptype); -- GitLab From c225ff05e02c61d18cebdebd644bb8166a464090 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 12 Oct 2020 13:25:06 -0700 Subject: [PATCH 326/432] datapath: use skb_list_walk_safe helper for gso segments Upstream commit: commit 2cec4448db38758832c2edad439f99584bb8fa0d Author: Jason A. Donenfeld Date: Mon Jan 13 18:42:29 2020 -0500 net: openvswitch: use skb_list_walk_safe helper for gso segments This is a straight-forward conversion case for the new function, keeping the flow of the existing code as intact as possible. Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/datapath.c | 11 ++++------- datapath/linux/compat/include/linux/skbuff.h | 7 +++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 1bc8e1439..52a59f135 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -343,8 +343,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, } #endif /* Queue all of the segments. */ - skb = segs; - do { + skb_list_walk_safe(segs, skb, nskb) { *OVS_CB(skb) = ovs_cb; #ifdef HAVE_SKB_GSO_UDP if (gso_type & SKB_GSO_UDP && skb != segs) @@ -354,17 +353,15 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (err) break; - } while ((skb = skb->next)); + } /* Free all of the segments. */ - skb = segs; - do { - nskb = skb->next; + skb_list_walk_safe(segs, skb, nskb) { if (err) kfree_skb(skb); else consume_skb(skb); - } while ((skb = nskb)); + } return err; } diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index 6d248b3ed..204ce5497 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -487,4 +487,11 @@ static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) } #endif +#ifndef skb_list_walk_safe +/* Iterate through singly-linked GSO fragments of an skb. */ +#define skb_list_walk_safe(first, skb, next_skb) \ + for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ + (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL) +#endif + #endif -- GitLab From 577550b14c65b8b74ee35e15307acee8d4510c34 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 12 Oct 2020 13:25:07 -0700 Subject: [PATCH 327/432] datapath: Distribute switch variables for initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream commit: commit 16a556eeb7ed2dc3709fe2c5be76accdfa4901ab Author: Kees Cook Date: Wed Feb 19 22:23:09 2020 -0800 openvswitch: Distribute switch variables for initialization Variables declared in a switch statement before any case statements cannot be automatically initialized with compiler instrumentation (as they are not part of any execution flow). With GCC's proposed automatic stack variable initialization feature, this triggers a warning (and they don't get initialized). Clang's automatic stack variable initialization (via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also doesn't initialize such variables[1]. Note that these warnings (or silent skipping) happen before the dead-store elimination optimization phase, so even when the automatic initializations are later elided in favor of direct initializations, the warnings remain. To avoid these problems, move such variables into the "case" where they're used or lift them up into the main function body. net/openvswitch/flow_netlink.c: In function ‘validate_set’: net/openvswitch/flow_netlink.c:2711:29: warning: statement will never be executed [-Wswitch-unreachable] 2711 | const struct ovs_key_ipv4 *ipv4_key; | ^~~~~~~~ [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Signed-off-by: Kees Cook Signed-off-by: David S. Miller Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/flow_netlink.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index d3fd77106..996041602 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -2700,10 +2700,6 @@ static int validate_set(const struct nlattr *a, return -EINVAL; switch (key_type) { - const struct ovs_key_ipv4 *ipv4_key; - const struct ovs_key_ipv6 *ipv6_key; - int err; - case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: @@ -2715,7 +2711,9 @@ static int validate_set(const struct nlattr *a, return -EINVAL; break; - case OVS_KEY_ATTR_TUNNEL: + case OVS_KEY_ATTR_TUNNEL: { + int err; + #ifndef USE_UPSTREAM_TUNNEL if (eth_p_mpls(eth_type)) return -EINVAL; @@ -2728,8 +2726,10 @@ static int validate_set(const struct nlattr *a, if (err) return err; break; + } + case OVS_KEY_ATTR_IPV4: { + const struct ovs_key_ipv4 *ipv4_key; - case OVS_KEY_ATTR_IPV4: if (eth_type != htons(ETH_P_IP)) return -EINVAL; @@ -2749,8 +2749,10 @@ static int validate_set(const struct nlattr *a, return -EINVAL; } break; + } + case OVS_KEY_ATTR_IPV6: { + const struct ovs_key_ipv6 *ipv6_key; - case OVS_KEY_ATTR_IPV6: if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; @@ -2777,7 +2779,7 @@ static int validate_set(const struct nlattr *a, return -EINVAL; break; - + } case OVS_KEY_ATTR_TCP: if ((eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6)) || -- GitLab From afe7210161854078e9f1af2a575d9acacc5a0101 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 12 Oct 2020 13:25:08 -0700 Subject: [PATCH 328/432] datapath: use hlist_for_each_entry_rcu instead of hlist_for_each_entry Upstream commit: commit 64948427a63f49dd0ce403388d232f22cc1971a8 Author: Tonghao Zhang Date: Thu Mar 26 04:27:24 2020 +0800 net: openvswitch: use hlist_for_each_entry_rcu instead of hlist_for_each_entry The struct sw_flow is protected by RCU, when traversing them, use hlist_for_each_entry_rcu. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Reviewed-by: Greg Rose Signed-off-by: David S. Miller Compat fixup - OVS doesn't support lockdep_ovsl_is_held() yet Reviewed-by: Tonghao Zhang Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/flow_table.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/datapath/flow_table.c b/datapath/flow_table.c index bd05dd394..650338fb0 100644 --- a/datapath/flow_table.c +++ b/datapath/flow_table.c @@ -485,12 +485,12 @@ static void flow_table_copy_flows(struct table_instance *old, struct hlist_head *head = &old->buckets[i]; if (ufid) - hlist_for_each_entry(flow, head, - ufid_table.node[old_ver]) + hlist_for_each_entry_rcu(flow, head, + ufid_table.node[old_ver]) ufid_table_instance_insert(new, flow); else - hlist_for_each_entry(flow, head, - flow_table.node[old_ver]) + hlist_for_each_entry_rcu(flow, head, + flow_table.node[old_ver]) table_instance_insert(new, flow); } -- GitLab From 44722d58c40b030a4e962ded447aa9670b6573b5 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 12 Oct 2020 13:25:09 -0700 Subject: [PATCH 329/432] acinclude: Enable builds up to Linux 5.8 Allow building openvswitch against Linux kernels up to and including version 5.8. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- acinclude.m4 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 3d56510a0..1460289ca 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -167,10 +167,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [ AC_MSG_RESULT([$kversion]) if test "$version" -ge 5; then - if test "$version" = 5 && test "$patchlevel" -le 5; then + if test "$version" = 5 && test "$patchlevel" -le 8; then : # Linux 5.x else - AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 5.5.x is not supported (please refer to the FAQ for advice)]) + AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 5.8.x is not supported (please refer to the FAQ for advice)]) fi elif test "$version" = 4; then : # Linux 4.x -- GitLab From dbb0b86da76186458c31bdbc9b59ab801229c906 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 12 Oct 2020 13:25:10 -0700 Subject: [PATCH 330/432] travis: Update kernel list as of 5.8 Update the list to more closely track the LTS releases on kernel.org. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 43e6a75cc..9fd8bbe01 100644 --- a/.travis.yml +++ b/.travis.yml @@ -38,8 +38,8 @@ env: - TESTSUITE=1 OPTS="--enable-shared" - TESTSUITE=1 DPDK=1 - TESTSUITE=1 LIBS=-ljemalloc - - KERNEL_LIST="5.5 4.20 4.19 4.18 4.17 4.16" - - KERNEL_LIST="4.15 4.14 4.9 4.4 3.19 3.16" + - KERNEL_LIST="5.8 5.5 5.4 4.19" + - KERNEL_LIST="4.14 4.9 4.4 3.16" - AFXDP=1 KERNEL=5.3 - M32=1 OPTS="--disable-ssl" - DPDK=1 OPTS="--enable-shared" -- GitLab From f3b345bdd2ccf12b5fca496e76aee242f0d2ef7c Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Mon, 12 Oct 2020 13:25:11 -0700 Subject: [PATCH 331/432] Documentation: Update faq and NEWS for kernel 5.8 Update the NEWS and faq now that we will support up to Linux kernel 5.8. Acked-by: Yi-Hung Wei Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- Documentation/faq/releases.rst | 1 + NEWS | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 9d5d2c3e1..dcba97e16 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -72,6 +72,7 @@ Q: What Linux kernel versions does each Open vSwitch release work with? 2.12.x 3.16 to 5.0 2.13.x 3.16 to 5.0 2.14.x 3.16 to 5.5 + 2.15.x 3.16 to 5.8 ============ ============== Open vSwitch userspace should also work with the Linux kernel module built diff --git a/NEWS b/NEWS index d9a7078a1..e1710f35d 100644 --- a/NEWS +++ b/NEWS @@ -7,6 +7,8 @@ Post-v2.14.0 * Removed support for vhost-user dequeue zero-copy. - The environment variable OVS_UNBOUND_CONF, if set, is now used as the DNS resolver's (unbound) configuration file. + - Linux datapath: + * Support for kernel versions up to 5.8.x. v2.14.0 - 17 Aug 2020 -- GitLab From 07d5758b032bc774d1e38fc79eaf356ea9302bdf Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 13 Oct 2020 12:02:10 +0200 Subject: [PATCH 332/432] bond: Fix using uninitialized 'lacp_fallback_ab_cfg' for 'bond-primary'. 's->lacp_fallback_ab_cfg' initialized down below in the code, so we're using it uninitialized to detect if we need to get 'bond-primary' configuration. Found by valgrind: Conditional jump or move depends on uninitialised value(s) at 0x409114: port_configure_bond (bridge.c:4569) by 0x409114: port_configure (bridge.c:1284) by 0x40F6E6: bridge_reconfigure (bridge.c:917) by 0x411425: bridge_run (bridge.c:3330) by 0x406D84: main (ovs-vswitchd.c:127) Uninitialised value was created by a stack allocation at 0x408C53: port_configure (bridge.c:1190) Fix that by moving this code to the point where 'lacp_fallback_ab_cfg' already initialized. Additionally clarified behavior of 'bond-primary' in manpages for the fallback to AB case. Fixes: b4e50218a0f8 ("bond: Add 'primary' interface concept for active-backup mode.") Acked-by: Jeff Squyres Acked-by: Alin Gabriel Serdean Signed-off-by: Ilya Maximets --- vswitchd/bridge.c | 9 ++++----- vswitchd/vswitch.xml | 5 ++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 62697e89b..103b821bb 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -4564,11 +4564,6 @@ port_configure_bond(struct port *port, struct bond_settings *s) port->name); } - s->primary = NULL; - if (s->balance == BM_AB || s->lacp_fallback_ab_cfg) { - s->primary = smap_get(&port->cfg->other_config, "bond-primary"); - } - miimon_interval = smap_get_int(&port->cfg->other_config, "bond-miimon-interval", 0); if (miimon_interval <= 0) { @@ -4595,6 +4590,10 @@ port_configure_bond(struct port *port, struct bond_settings *s) s->lacp_fallback_ab_cfg = smap_get_bool(&port->cfg->other_config, "lacp-fallback-ab", false); + s->primary = NULL; + if (s->balance == BM_AB || s->lacp_fallback_ab_cfg) { + s->primary = smap_get(&port->cfg->other_config, "bond-primary"); + } LIST_FOR_EACH (iface, port_elem, &port->ifaces) { netdev_set_miimon_interval(iface->netdev, miimon_interval); diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 0e25e018e..a6b70a2f9 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2008,7 +2008,10 @@ If a slave interface with this name exists in the bond and is up, it will be made active. Relevant only when is - active-backup. + active-backup or if balance-tcp falls back + to active-backup (e.g., LACP negotiation fails and + is + true). -- GitLab From 51dec40f8e9d54c06d063e021fa549f401402f78 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 14 Oct 2020 18:13:46 +0200 Subject: [PATCH 333/432] ofp-ed-props: Fix using uninitialized padding for NSH encap actions. OVS uses memcmp to compare actions of existing and new flows, but 'struct ofp_ed_prop_nsh_md_type' and corresponding ofpact structure has 3 bytes of padding that never initialized and passed around within OF data structures and messages. Uninitialized bytes in MemcmpInterceptorCommon at offset 21 inside [0x7090000003f8, 136) WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x4a184e in bcmp (vswitchd/ovs-vswitchd+0x4a184e) #1 0x896c8a in ofpacts_equal lib/ofp-actions.c:9121:31 #2 0x564403 in replace_rule_finish ofproto/ofproto.c:5650:37 #3 0x563462 in add_flow_finish ofproto/ofproto.c:5218:13 #4 0x54a1ff in ofproto_flow_mod_finish ofproto/ofproto.c:8091:17 #5 0x5433b2 in handle_flow_mod__ ofproto/ofproto.c:6216:17 #6 0x56a2fc in handle_flow_mod ofproto/ofproto.c:6190:17 #7 0x565bda in handle_single_part_openflow ofproto/ofproto.c:8504:16 #8 0x540b25 in handle_openflow ofproto/ofproto.c:8685:21 #9 0x6697fd in ofconn_run ofproto/connmgr.c:1329:13 #10 0x668e6e in connmgr_run ofproto/connmgr.c:356:9 #11 0x53f1bc in ofproto_run ofproto/ofproto.c:1890:5 #12 0x4ead0c in bridge_run__ vswitchd/bridge.c:3250:9 #13 0x4e9bc8 in bridge_run vswitchd/bridge.c:3309:5 #14 0x51c072 in main vswitchd/ovs-vswitchd.c:127:9 #15 0x7f23a99011a2 in __libc_start_main (/lib64/libc.so.6) #16 0x46b92d in _start (vswitchd/ovs-vswitchd+0x46b92d) Uninitialized value was stored to memory at #0 0x4745aa in __msan_memcpy.part.0 (vswitchd/ovs-vswitchd) #1 0x54529f in rule_actions_create ofproto/ofproto.c:3134:5 #2 0x54915e in ofproto_rule_create ofproto/ofproto.c:5284:11 #3 0x55d419 in add_flow_init ofproto/ofproto.c:5123:17 #4 0x54841f in ofproto_flow_mod_init ofproto/ofproto.c:7987:17 #5 0x543250 in handle_flow_mod__ ofproto/ofproto.c:6206:13 #6 0x56a2fc in handle_flow_mod ofproto/ofproto.c:6190:17 #7 0x565bda in handle_single_part_openflow ofproto/ofproto.c:8504:16 #8 0x540b25 in handle_openflow ofproto/ofproto.c:8685:21 #9 0x6697fd in ofconn_run ofproto/connmgr.c:1329:13 #10 0x668e6e in connmgr_run ofproto/connmgr.c:356:9 #11 0x53f1bc in ofproto_run ofproto/ofproto.c:1890:5 #12 0x4ead0c in bridge_run__ vswitchd/bridge.c:3250:9 #13 0x4e9bc8 in bridge_run vswitchd/bridge.c:3309:5 #14 0x51c072 in main vswitchd/ovs-vswitchd.c:127:9 #15 0x7f23a99011a2 in __libc_start_main (/lib64/libc.so.6) Uninitialized value was created by an allocation of 'ofpacts_stub' in the stack frame of function 'handle_flow_mod' #0 0x569e80 in handle_flow_mod ofproto/ofproto.c:6170 This could cause issues with flow modifications or other operations. To reproduce, some NSH tests could be run under valgrind or clang MemorySantizer. Ex. "nsh - md1 encap over a veth link" test. Fix that by clearing padding bytes while encoding and decoding. OVS will still accept OF messages with non-zero padding from controllers. New tests added to tests/ofp-actions.at. Fixes: 1fc11c5948cf ("Generic encap and decap support for NSH") Signed-off-by: Ilya Maximets Acked-by: Jan Scheurich --- lib/ofp-ed-props.c | 3 ++- tests/ofp-actions.at | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/ofp-ed-props.c b/lib/ofp-ed-props.c index 28382e012..02a9235d5 100644 --- a/lib/ofp-ed-props.c +++ b/lib/ofp-ed-props.c @@ -49,7 +49,7 @@ decode_ed_prop(const struct ofp_ed_prop_header **ofp_prop, return OFPERR_NXBAC_BAD_ED_PROP; } struct ofpact_ed_prop_nsh_md_type *pnmt = - ofpbuf_put_uninit(out, sizeof(*pnmt)); + ofpbuf_put_zeros(out, sizeof *pnmt); pnmt->header.prop_class = prop_class; pnmt->header.type = prop_type; pnmt->header.len = len; @@ -108,6 +108,7 @@ encode_ed_prop(const struct ofpact_ed_prop **prop, opnmt->header.len = offsetof(struct ofp_ed_prop_nsh_md_type, pad); opnmt->md_type = pnmt->md_type; + memset(opnmt->pad, 0, sizeof opnmt->pad); prop_len = sizeof(*pnmt); break; } diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at index 28b2099a0..c79d7d0e2 100644 --- a/tests/ofp-actions.at +++ b/tests/ofp-actions.at @@ -769,6 +769,17 @@ dnl Check OpenFlow v1.3.4 Conformance Test: 430.510. & 00000010 00 00 00 10 00 00 00 01- 0019 0010 80000807 000102030405 000000000010 00000001 +dnl Check NSH encap (experimenter extension). +# actions=encap(nsh(md_type=1)) +ffff 0018 00002320 002e 0000 0001894f 0004 01 05 01 000000 + +dnl NSH encap with non-zero padding. +# actions=encap(nsh(md_type=1)) +# 21: 12 -> 00 +# 22: 34 -> 00 +# 23: 56 -> 00 +ffff 0018 00002320 002e 0000 0001894f 0004 01 05 01 123456 + ]) sed '/^[[#&]]/d' < test-data > input.txt sed -n 's/^# //p; /^$/p' < test-data > expout -- GitLab From 8b25079585271a793b8bf682d58930be93b8b556 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Wed, 14 Oct 2020 14:42:46 +0000 Subject: [PATCH 334/432] AUTHORS: Update Eli Britstein Signed-off-by: Eli Britstein Signed-off-by: Ilya Maximets --- .mailmap | 1 + AUTHORS.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 9175d23a4..85373d113 100644 --- a/.mailmap +++ b/.mailmap @@ -31,6 +31,7 @@ Chandra Sekhar Vejendla Daniele Di Proietto Daniele Di Proietto Ed Maste +Eli Britstein Ethan J. Jackson Fischetti, Antonio Flavio Fernandes diff --git a/AUTHORS.rst b/AUTHORS.rst index b47806bf7..9e9d210a2 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -131,7 +131,7 @@ Ed Swierk eswierk@skyportsystems.com Edouard Bourguignon madko@linuxed.net Eelco Chaudron echaudro@redhat.com Eiichi Tsukata eiichi.tsukata@nutanix.com -Eli Britstein elibr@mellanox.com +Eli Britstein elibr@nvidia.com Emma Finn emma.finn@intel.com Eric Lapointe elapointe@corsa.com Esteban Rodriguez Betancourt estebarb@hpe.com -- GitLab From c857b76199307c411504ae30d0b4f25ea72f5399 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 12 Oct 2020 20:15:39 +0200 Subject: [PATCH 335/432] Revert "travis: Disable check for array of flexible structures in sparse." This reverts commit 3c6b3a519ae6eae3da4cf7c59894b02b95cdade7. The fix landed to Sparse main repository [1]: b5d46df743be ("flex-array: allow arrays of unions with flexible members.") [1] https://git.kernel.org/pub/scm/devel/sparse/sparse.git Acked-by: Greg Rose Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- .travis/linux-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 6b6935794..6981d1d47 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -4,7 +4,7 @@ set -o errexit set -x CFLAGS_FOR_OVS="-g -O2" -SPARSE_FLAGS="-Wno-flexible-array-array" +SPARSE_FLAGS="" EXTRA_OPTS="--enable-Werror" function install_kernel() -- GitLab From 0ef70536eb418a13e10113a884f665d47fb732e3 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Thu, 30 Jul 2020 10:58:38 +0000 Subject: [PATCH 336/432] netdev-offload-dpdk: Support vxlan encap offload with load actions. Struct match has the tunnel values/masks in match->flow.tunnel/match->wc.masks.tunnel. Load actions such as load:0xa566c10->NXM_NX_TUN_IPV4_DST[], load:0xbba->NXM_NX_TUN_ID[] are utilizing the tunnel masks fields, but those should not be used for matching. Offloading fails if masks is not clear. Clear it if no tunnel used. Fixes: e8a2b5bf92bb ("netdev-dpdk: implement flow offload with rte flow") Reviewed-by: Eli Britstein Reviewed-by: Gaetan Rivet Acked-by: Sriharsha Basavapatna Tested-by: Emma Finn Signed-off-by: Lei Wang Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 5b632bac4..4d19f93cd 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -682,6 +682,10 @@ parse_flow_match(struct flow_patterns *patterns, consumed_masks = &match->wc.masks; + if (!flow_tnl_dst_is_set(&match->flow.tunnel)) { + memset(&consumed_masks->tunnel, 0, sizeof consumed_masks->tunnel); + } + memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port); /* recirc id must be zero. */ if (match->wc.masks.recirc_id & match->flow.recirc_id) { -- GitLab From 1e1a15669e324e0971c46a31f3de233e3a0ba5b2 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 17 Oct 2020 17:21:29 +0200 Subject: [PATCH 337/432] odp-util: Fix using uninitialized gtpu metadata. If datapath flow doesn't have one of the fields of gtpu metadata, e.g. 'tunnel(gtpu())', uninitialized stack memory will be used instead. ==3485429==WARNING: MemorySanitizer: use-of-uninitialized-value #0 0x853a1b in format_u8x lib/odp-util.c:3474:13 #1 0x86ee9c in format_odp_tun_gtpu_opt lib/odp-util.c:3713:5 #2 0x86a099 in format_odp_tun_attr lib/odp-util.c:3973:13 #3 0x83afe6 in format_odp_key_attr__ lib/odp-util.c:4179:9 #4 0x838afb in odp_flow_format lib/odp-util.c:4563:17 #5 0x738422 in log_flow_message lib/dpif.c:1750:5 #6 0x738e2f in log_flow_put_message lib/dpif.c:1784:9 #7 0x7371a4 in dpif_operate lib/dpif.c:1377:21 #8 0x7363ef in dpif_flow_put lib/dpif.c:1035:5 #9 0xc7aab7 in dpctl_put_flow lib/dpctl.c:1171:13 #10 0xc65a4f in dpctl_unixctl_handler lib/dpctl.c:2701:17 #11 0xaaad04 in process_command lib/unixctl.c:308:13 #12 0xaa87f7 in run_connection lib/unixctl.c:342:17 #13 0xaa842e in unixctl_server_run lib/unixctl.c:393:21 #14 0x51c09c in main vswitchd/ovs-vswitchd.c:128:9 #15 0x7f88344391a2 in __libc_start_main (/lib64/libc.so.6+0x271a2) #16 0x46b92d in _start (vswitchd/ovs-vswitchd+0x46b92d) Uninitialized value was stored to memory at #0 0x87da17 in scan_gtpu_metadata lib/odp-util.c:5221:27 #1 0x874588 in parse_odp_key_mask_attr__ lib/odp-util.c:5862:9 #2 0x83ee14 in parse_odp_key_mask_attr lib/odp-util.c:5808:18 #3 0x83e8b5 in odp_flow_from_string lib/odp-util.c:6065:18 #4 0xc7a4f3 in dpctl_put_flow lib/dpctl.c:1145:13 #5 0xc65a4f in dpctl_unixctl_handler lib/dpctl.c:2701:17 #6 0xaaad04 in process_command lib/unixctl.c:308:13 #7 0xaa87f7 in run_connection lib/unixctl.c:342:17 #8 0xaa842e in unixctl_server_run lib/unixctl.c:393:21 #9 0x51c09c in main vswitchd/ovs-vswitchd.c:128:9 #10 0x7f88344391a2 in __libc_start_main (/lib64/libc.so.6+0x271a2) Uninitialized value was created by an allocation of 'msgtype_ma' in the stack frame of function 'scan_gtpu_metadata' #0 0x87d440 in scan_gtpu_metadata lib/odp-util.c:5187 Fix that by initializing fields to all zeroes by default. Reported-at: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=21426 Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.") Acked-by: Yi Yang Signed-off-by: Ilya Maximets --- lib/odp-util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 5989381e9..e7424a9ac 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -5186,8 +5186,8 @@ scan_gtpu_metadata(const char *s, struct gtpu_metadata *mask) { const char *s_base = s; - uint8_t flags, flags_ma; - uint8_t msgtype, msgtype_ma; + uint8_t flags = 0, flags_ma = 0; + uint8_t msgtype = 0, msgtype_ma = 0; int len; if (!strncmp(s, "flags=", 6)) { -- GitLab From 2c5a48c9a0919039dbd26fa017395d619ae4a95f Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 17 Oct 2020 17:27:14 +0200 Subject: [PATCH 338/432] odp-util: Add missing comma after gtpu attributes. Currently flows are printed like this: 'tunnel(gtpu(flags=0x7f,msgtype=0)flags(0))' With this change: 'tunnel(gtpu(flags=0x7f,msgtype=0),flags(0))' Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.") Acked-by: Yi Yang Signed-off-by: Ilya Maximets --- lib/odp-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index e7424a9ac..0bd2f9aa8 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -3971,7 +3971,7 @@ format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr, case OVS_TUNNEL_KEY_ATTR_GTPU_OPTS: ds_put_cstr(ds, "gtpu("); format_odp_tun_gtpu_opt(a, ma, ds, verbose); - ds_put_cstr(ds, ")"); + ds_put_cstr(ds, "),"); break; case __OVS_TUNNEL_KEY_ATTR_MAX: default: -- GitLab From e39897fe9e4349ae29047a87505f65a295eac3df Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 20 Oct 2020 11:30:59 +0200 Subject: [PATCH 339/432] NEWS: Move terminology update to correct place. It's Post-v2.14.0, not v2.14.0. Fixes: 807152a4ddfb ("Use primary/secondary, not master/slave, as names for OpenFlow roles.") Acked-by: Ben Pfaff Signed-off-by: Ilya Maximets --- NEWS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e1710f35d..fa8947cb2 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,9 @@ Post-v2.14.0 as the DNS resolver's (unbound) configuration file. - Linux datapath: * Support for kernel versions up to 5.8.x. + - Terminology: + * The terms "master" and "slave" have been replaced by "primary" and + "secondary", respectively, for OpenFlow connection roles. v2.14.0 - 17 Aug 2020 @@ -54,9 +57,6 @@ v2.14.0 - 17 Aug 2020 - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. - - Terminology: - * The terms "master" and "slave" have been replaced by "primary" and - "secondary", respectively, for OpenFlow connection roles. v2.13.0 - 14 Feb 2020 -- GitLab From f51cf36d86e4a51630dc2781034149c13a634d67 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 17 Jun 2020 15:31:09 -0700 Subject: [PATCH 340/432] conntrack: Rename "master" connection to "parent" connection. Signed-off-by: Ben Pfaff Acked-by: Alin Gabriel Serdean --- include/openvswitch/meta-flow.h | 14 +++---- lib/conntrack-private.h | 10 ++--- lib/conntrack.c | 74 ++++++++++++++++----------------- lib/ct-dpif.c | 6 +-- lib/ct-dpif.h | 2 +- lib/meta-flow.xml | 2 +- lib/netlink-conntrack.c | 6 +-- tests/ofproto-dpif.at | 6 +-- tests/system-traffic.at | 6 +-- 9 files changed, 63 insertions(+), 63 deletions(-) diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h index d529a9f0d..95e52e358 100644 --- a/include/openvswitch/meta-flow.h +++ b/include/openvswitch/meta-flow.h @@ -852,7 +852,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_nw_proto". * * The "protocol" byte in the IPv4 or IPv6 header for the original - * direction conntrack tuple, or of the master conntrack entry, if the + * direction conntrack tuple, or of the parent conntrack entry, if the * current connection is a related connection. * * The value is initially zero and populated by the CT action. The value @@ -873,7 +873,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_nw_src". * * IPv4 source address of the original direction tuple of the conntrack - * entry, or of the master conntrack entry, if the current connection is a + * entry, or of the parent conntrack entry, if the current connection is a * related connection. * * The value is populated by the CT action. @@ -892,7 +892,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_nw_dst". * * IPv4 destination address of the original direction tuple of the - * conntrack entry, or of the master conntrack entry, if the current + * conntrack entry, or of the parent conntrack entry, if the current * connection is a related connection. * * The value is populated by the CT action. @@ -911,7 +911,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_ipv6_src". * * IPv6 source address of the original direction tuple of the conntrack - * entry, or of the master conntrack entry, if the current connection is a + * entry, or of the parent conntrack entry, if the current connection is a * related connection. * * The value is populated by the CT action. @@ -930,7 +930,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_ipv6_dst". * * IPv6 destination address of the original direction tuple of the - * conntrack entry, or of the master conntrack entry, if the current + * conntrack entry, or of the parent conntrack entry, if the current * connection is a related connection. * * The value is populated by the CT action. @@ -949,7 +949,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_tp_src". * * Transport layer source port of the original direction tuple of the - * conntrack entry, or of the master conntrack entry, if the current + * conntrack entry, or of the parent conntrack entry, if the current * connection is a related connection. * * The value is populated by the CT action. @@ -967,7 +967,7 @@ enum OVS_PACKED_ENUM mf_field_id { /* "ct_tp_dst". * * Transport layer destination port of the original direction tuple of the - * conntrack entry, or of the master conntrack entry, if the current + * conntrack entry, or of the parent conntrack entry, if the current * connection is a related connection. * * The value is populated by the CT action. diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 343475301..789af82ff 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -71,13 +71,13 @@ struct alg_exp_node { /* Key of data connection to be created. */ struct conn_key key; /* Corresponding key of the control connection. */ - struct conn_key master_key; + struct conn_key parent_key; /* The NAT replacement address to be used by the data connection. */ union ct_addr alg_nat_repl_addr; - /* The data connection inherits the master control + /* The data connection inherits the parent control * connection label and mark. */ - ovs_u128 master_label; - uint32_t master_mark; + ovs_u128 parent_label; + uint32_t parent_mark; /* True if for NAT application, the alg replaces the dest address; * otherwise, the source address is replaced. */ bool nat_rpl_dst; @@ -92,7 +92,7 @@ struct conn { /* Immutable data. */ struct conn_key key; struct conn_key rev_key; - struct conn_key master_key; /* Only used for orig_tuple support. */ + struct conn_key parent_key; /* Only used for orig_tuple support. */ struct ovs_list exp_node; struct cmap_node cm_node; struct nat_action_info_t *nat_info; diff --git a/lib/conntrack.c b/lib/conntrack.c index f42ba4b60..930ed0be6 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -144,7 +144,7 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx, struct dp_packet *pkt); static void -expectation_clean(struct conntrack *ct, const struct conn_key *master_key); +expectation_clean(struct conntrack *ct, const struct conn_key *parent_key); static struct ct_l4_proto *l4_protos[] = { [IPPROTO_TCP] = &ct_proto_tcp, @@ -585,14 +585,14 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn, /* Use the original direction tuple if we have it. */ if (conn) { if (conn->alg_related) { - key = &conn->master_key; + key = &conn->parent_key; } else { key = &conn->key; } } else if (alg_exp) { - pkt->md.ct_mark = alg_exp->master_mark; - pkt->md.ct_label = alg_exp->master_label; - key = &alg_exp->master_key; + pkt->md.ct_mark = alg_exp->parent_mark; + pkt->md.ct_label = alg_exp->parent_label; + key = &alg_exp->parent_key; } pkt->md.ct_orig_tuple_ipv6 = false; @@ -1002,9 +1002,9 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, if (alg_exp) { nc->alg_related = true; - nc->mark = alg_exp->master_mark; - nc->label = alg_exp->master_label; - nc->master_key = alg_exp->master_key; + nc->mark = alg_exp->parent_mark; + nc->label = alg_exp->parent_label; + nc->parent_key = alg_exp->parent_key; } if (nat_action_info) { @@ -1312,7 +1312,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt, if (!conn) { pkt->md.ct_state |= CS_INVALID; write_ct_md(pkt, zone, NULL, NULL, NULL); - char *log_msg = xasprintf("Missing master conn %p", rev_conn); + char *log_msg = xasprintf("Missing parent conn %p", rev_conn); ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true); free(log_msg); return; @@ -2677,16 +2677,16 @@ expectation_remove(struct hmap *alg_expectations, /* This function must be called with the ct->resources read lock taken. */ static struct alg_exp_node * expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs, - const struct conn_key *master_key, + const struct conn_key *parent_key, const struct conn_key *alg_exp_key, uint32_t basis) { struct alg_exp_node *alg_exp_node; HINDEX_FOR_EACH_WITH_HASH (alg_exp_node, node_ref, - conn_key_hash(master_key, basis), + conn_key_hash(parent_key, basis), alg_expectation_refs) { - if (!conn_key_cmp(&alg_exp_node->master_key, master_key) && + if (!conn_key_cmp(&alg_exp_node->parent_key, parent_key) && !conn_key_cmp(&alg_exp_node->key, alg_exp_key)) { return alg_exp_node; } @@ -2701,23 +2701,23 @@ expectation_ref_create(struct hindex *alg_expectation_refs, uint32_t basis) { if (!expectation_ref_lookup_unique(alg_expectation_refs, - &alg_exp_node->master_key, + &alg_exp_node->parent_key, &alg_exp_node->key, basis)) { hindex_insert(alg_expectation_refs, &alg_exp_node->node_ref, - conn_key_hash(&alg_exp_node->master_key, basis)); + conn_key_hash(&alg_exp_node->parent_key, basis)); } } static void -expectation_clean(struct conntrack *ct, const struct conn_key *master_key) +expectation_clean(struct conntrack *ct, const struct conn_key *parent_key) { ovs_rwlock_wrlock(&ct->resources_lock); struct alg_exp_node *node, *next; HINDEX_FOR_EACH_WITH_HASH_SAFE (node, next, node_ref, - conn_key_hash(master_key, ct->hash_basis), + conn_key_hash(parent_key, ct->hash_basis), &ct->alg_expectation_refs) { - if (!conn_key_cmp(&node->master_key, master_key)) { + if (!conn_key_cmp(&node->parent_key, parent_key)) { expectation_remove(&ct->alg_expectations, &node->key, ct->hash_basis); hindex_remove(&ct->alg_expectation_refs, &node->node_ref); @@ -2730,7 +2730,7 @@ expectation_clean(struct conntrack *ct, const struct conn_key *master_key) static void expectation_create(struct conntrack *ct, ovs_be16 dst_port, - const struct conn *master_conn, bool reply, bool src_ip_wc, + const struct conn *parent_conn, bool reply, bool src_ip_wc, bool skip_nat) { union ct_addr src_addr; @@ -2739,47 +2739,47 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port, struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node); if (reply) { - src_addr = master_conn->key.src.addr; - dst_addr = master_conn->key.dst.addr; + src_addr = parent_conn->key.src.addr; + dst_addr = parent_conn->key.dst.addr; alg_exp_node->nat_rpl_dst = true; if (skip_nat) { alg_nat_repl_addr = dst_addr; - } else if (master_conn->nat_info && - master_conn->nat_info->nat_action & NAT_ACTION_DST) { - alg_nat_repl_addr = master_conn->rev_key.src.addr; + } else if (parent_conn->nat_info && + parent_conn->nat_info->nat_action & NAT_ACTION_DST) { + alg_nat_repl_addr = parent_conn->rev_key.src.addr; alg_exp_node->nat_rpl_dst = false; } else { - alg_nat_repl_addr = master_conn->rev_key.dst.addr; + alg_nat_repl_addr = parent_conn->rev_key.dst.addr; } } else { - src_addr = master_conn->rev_key.src.addr; - dst_addr = master_conn->rev_key.dst.addr; + src_addr = parent_conn->rev_key.src.addr; + dst_addr = parent_conn->rev_key.dst.addr; alg_exp_node->nat_rpl_dst = false; if (skip_nat) { alg_nat_repl_addr = src_addr; - } else if (master_conn->nat_info && - master_conn->nat_info->nat_action & NAT_ACTION_DST) { - alg_nat_repl_addr = master_conn->key.dst.addr; + } else if (parent_conn->nat_info && + parent_conn->nat_info->nat_action & NAT_ACTION_DST) { + alg_nat_repl_addr = parent_conn->key.dst.addr; alg_exp_node->nat_rpl_dst = true; } else { - alg_nat_repl_addr = master_conn->key.src.addr; + alg_nat_repl_addr = parent_conn->key.src.addr; } } if (src_ip_wc) { memset(&src_addr, 0, sizeof src_addr); } - alg_exp_node->key.dl_type = master_conn->key.dl_type; - alg_exp_node->key.nw_proto = master_conn->key.nw_proto; - alg_exp_node->key.zone = master_conn->key.zone; + alg_exp_node->key.dl_type = parent_conn->key.dl_type; + alg_exp_node->key.nw_proto = parent_conn->key.nw_proto; + alg_exp_node->key.zone = parent_conn->key.zone; alg_exp_node->key.src.addr = src_addr; alg_exp_node->key.dst.addr = dst_addr; alg_exp_node->key.src.port = ALG_WC_SRC_PORT; alg_exp_node->key.dst.port = dst_port; - alg_exp_node->master_mark = master_conn->mark; - alg_exp_node->master_label = master_conn->label; - memcpy(&alg_exp_node->master_key, &master_conn->key, - sizeof alg_exp_node->master_key); + alg_exp_node->parent_mark = parent_conn->mark; + alg_exp_node->parent_label = parent_conn->label; + memcpy(&alg_exp_node->parent_key, &parent_conn->key, + sizeof alg_exp_node->parent_key); /* Take the write lock here because it is almost 100% * likely that the lookup will fail and * expectation_create() will be called below. */ diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c index 8c2480e7a..6a5ba052d 100644 --- a/lib/ct-dpif.c +++ b/lib/ct-dpif.c @@ -323,9 +323,9 @@ ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds, } ct_dpif_format_protoinfo(ds, ",protoinfo=", &entry->protoinfo, verbose); ct_dpif_format_helper(ds, ",helper=", &entry->helper); - if (verbose && entry->tuple_master.l3_type != 0) { - ds_put_cstr(ds, ",master=("); - ct_dpif_format_tuple(ds, &entry->tuple_master); + if (verbose && entry->tuple_parent.l3_type != 0) { + ds_put_cstr(ds, ",parent=("); + ct_dpif_format_tuple(ds, &entry->tuple_parent); ds_put_cstr(ds, ")"); } } diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h index e4c7a640b..88f4c7e28 100644 --- a/lib/ct-dpif.h +++ b/lib/ct-dpif.h @@ -177,7 +177,7 @@ struct ct_dpif_entry { /* Const members. */ struct ct_dpif_tuple tuple_orig; struct ct_dpif_tuple tuple_reply; - struct ct_dpif_tuple tuple_master; + struct ct_dpif_tuple tuple_parent; struct ct_dpif_helper helper; uint32_t id; uint16_t zone; diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml index e72ba52ec..28865f88c 100644 --- a/lib/meta-flow.xml +++ b/lib/meta-flow.xml @@ -2794,7 +2794,7 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123) connection), or be of different protocol (i.e., when an ICMP response is sent to an UDP packet). In case of related connections, e.g., an FTP data connection, the original direction tuple contains the - original direction headers from the master connection, e.g., an FTP + original direction headers from the parent connection, e.g., an FTP control connection.

    diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c index 86ab866cf..78f1bf60b 100644 --- a/lib/netlink-conntrack.c +++ b/lib/netlink-conntrack.c @@ -237,7 +237,7 @@ nl_ct_flush(void) ofpbuf_uninit(&buf); /* Expectations are flushed automatically, because they do not - * have a master connection anymore */ + * have a parent connection anymore */ return err; } @@ -344,7 +344,7 @@ nl_ct_flush_zone(uint16_t flush_zone) ofpbuf_uninit(&buf); /* Expectations are flushed automatically, because they do not - * have a master connection anymore */ + * have a parent connection anymore */ return 0; } #endif @@ -1263,7 +1263,7 @@ nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry, return false; } if (attrs[CTA_TUPLE_MASTER] && - !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_master, + !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_parent, nfgen_family)) { return false; } diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index d129e60d3..88dd434e4 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -9771,7 +9771,7 @@ dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets dnl get the ACL lookup using the conntrack tuple and the inverted direction. dnl RELATED packets get ACL lookup using the conntrack tuple in the direction -dnl of the master connection, as storted in ct_mark. +dnl of the parent connection, as storted in ct_mark. dnl dnl Incoming non-related packet in the original direction (ACL IN) table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5 @@ -9782,7 +9782,7 @@ table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit( dnl Outgoing non-related reply packet (CT ACL IN) table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4 dnl -dnl Related packet (CT ACL in the direction of the master connection.) +dnl Related packet (CT ACL in the direction of the parent connection.) table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_MARK[[]]->NXM_NX_REG0[[]],resubmit(,3,ct),goto_table:4 dnl Drop everything else. table=1 priority=0, action=drop @@ -9815,7 +9815,7 @@ table=5 reg2=0 priority=1000 action=drop dnl Commit new non-related IP connections. table=5 priority=10 reg2=1 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 dnl Commit new related connections in either direction, which inherit the mark -dnl (the direction of the original direction master tuple) from the master +dnl (the direction of the original direction parent tuple) from the parent dnl connection. table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 dnl Forward everything else, including stateless accepts. diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 02f0e2716..14f349b5b 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -4810,7 +4810,7 @@ dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets dnl get the ACL lookup using the conntrack tuple and the inverted direction. dnl RELATED packets get ACL lookup using the conntrack tuple in the direction -dnl of the master connection, as stored in ct_label[0]. +dnl of the parent connection, as stored in ct_label[0]. dnl dnl Incoming non-related packet in the original direction (ACL IN) table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5 @@ -4821,7 +4821,7 @@ table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit( dnl Outgoing non-related reply packet (CT ACL IN) table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4 dnl -dnl Related packet (CT ACL in the direction of the master connection.) +dnl Related packet (CT ACL in the direction of the parent connection.) table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_LABEL[[0]]->NXM_NX_REG0[[0]],resubmit(,3,ct),goto_table:4 dnl Drop everything else. table=1 priority=0, action=drop @@ -4863,7 +4863,7 @@ dnl (This should not get any packets in this test.) table=5 priority=10 reg2=1 reg3=2 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat(dst=$2),exec(move:NXM_NX_REG3[[0]]->NXM_NX_CT_LABEL[[0]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 dnl Commit new related connections in either direction, which need 'nat' dnl and which inherit the label (the direction of the original direction -dnl master tuple) from the master connection. +dnl parent tuple) from the parent connection. table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6 dnl dnl NAT incoming non-NEW packets. Outgoing packets were NATted in table 0. -- GitLab From 91fc374a9c5a2a4d9520aaa3588a7a18338a476e Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 17 Jun 2020 14:16:08 -0700 Subject: [PATCH 341/432] Eliminate use of term "slave" in bond, LACP, and bundle contexts. The new term is "member". Most of these changes should not change user-visible behavior. One place where they do is in "ovs-ofctl dump-flows", which will now output "members:..." inside "bundle" actions instead of "slaves:...". I don't expect this to cause real problems in most systems. The old syntax is still supported on input for backward compatibility. Signed-off-by: Ben Pfaff Acked-by: Alin Gabriel Serdean --- Documentation/topics/bonding.rst | 167 +++--- Documentation/topics/porting.rst | 4 +- NEWS | 4 +- debian/changelog | 2 +- debian/ifupdown.sh | 4 +- include/openvswitch/ofp-actions.h | 28 +- lib/bundle.c | 101 ++-- lib/bundle.h | 4 +- lib/cfm.c | 4 +- lib/dpif-netdev.c | 41 +- lib/dpif-provider.h | 4 +- lib/dpif.c | 4 +- lib/dpif.h | 2 +- lib/lacp.c | 547 +++++++++---------- lib/lacp.h | 25 +- lib/lldp/lldp-const.h | 16 +- lib/lldp/lldpd-structs.h | 10 +- lib/mac-learning.c | 4 +- lib/mac-learning.h | 2 +- lib/netdev-linux.c | 22 +- lib/ofp-actions.c | 108 ++-- lib/ovs-actions.xml | 35 +- lib/rtnetlink.c | 16 +- lib/rtnetlink.h | 6 +- lib/unixctl.c | 6 +- ofproto/bond.c | 839 +++++++++++++++--------------- ofproto/bond.h | 29 +- ofproto/ofproto-dpif-rid.h | 4 +- ofproto/ofproto-dpif-sflow.c | 2 +- ofproto/ofproto-dpif-xlate.c | 22 +- ofproto/ofproto-dpif.c | 47 +- ofproto/ofproto-dpif.h | 2 +- ofproto/ofproto-provider.h | 12 +- ofproto/ofproto.c | 7 +- ofproto/ofproto.h | 11 +- tests/bundle.at | 48 +- tests/lacp.at | 146 +++--- tests/ofp-actions.at | 28 +- tests/ofproto-dpif.at | 100 ++-- tests/ofproto.at | 4 +- tests/ovs-ofctl.at | 56 +- tests/test-bundle.c | 112 ++-- vswitchd/bridge.c | 35 +- vswitchd/ovs-vswitchd.8.in | 42 +- vswitchd/vswitch.xml | 37 +- 45 files changed, 1393 insertions(+), 1356 deletions(-) diff --git a/Documentation/topics/bonding.rst b/Documentation/topics/bonding.rst index ac39fd18b..01bd5dfc2 100644 --- a/Documentation/topics/bonding.rst +++ b/Documentation/topics/bonding.rst @@ -25,22 +25,22 @@ Bonding ======= -Bonding allows two or more interfaces (the "slaves") to share network traffic. +Bonding allows two or more interfaces, its "members", to share network traffic. From a high-level point of view, bonded interfaces act like a single port, but they have the bandwidth of multiple network devices, e.g. two 1 GB physical interfaces act like a single 2 GB interface. Bonds also increase robustness: -the bonded port does not go down as long as at least one of its slaves is up. +the bonded port does not go down as long as at least one of its members is up. -In vswitchd, a bond always has at least two slaves (and may have more). If a -configuration error, etc. would cause a bond to have only one slave, the port +In vswitchd, a bond always has at least two members (and may have more). If a +configuration error, etc. would cause a bond to have only one member, the port becomes an ordinary port, not a bonded port, and none of the special features of bonded ports described in this section apply. There are many forms of bonding of which ovs-vswitchd implements only a few. The most complex bond ovs-vswitchd implements is called "source load balancing" -or SLB bonding. SLB bonding divides traffic among the slaves based on the -Ethernet source address. This is useful only if the traffic over the bond has -multiple Ethernet source addresses, for example if network traffic from +or SLB bonding. SLB bonding divides traffic among the members based on +the Ethernet source address. This is useful only if the traffic over the bond +has multiple Ethernet source addresses, for example if network traffic from multiple VMs are multiplexed over the bond. .. note:: @@ -50,89 +50,90 @@ multiple VMs are multiplexed over the bond. specified. -Enabling and Disabling Slaves ------------------------------ +Enabling and Disabling Members +------------------------------ -When a bond is created, a slave is initially enabled or disabled based on -whether carrier is detected on the NIC (see ``iface_create()``). After that, a -slave is disabled if its carrier goes down for a period of time longer than the -downdelay, and it is enabled if carrier comes up for longer than the updelay -(see ``bond_link_status_update()``). There is one exception where the updelay -is skipped: if no slaves at all are currently enabled, then the first slave on -which carrier comes up is enabled immediately. +When a bond is created, a member is initially enabled or disabled based +on whether carrier is detected on the NIC (see ``iface_create()``). After +that, a member is disabled if its carrier goes down for a period of time +longer than the downdelay, and it is enabled if carrier comes up for longer +than the updelay (see ``bond_link_status_update()``). There is one exception +where the updelay is skipped: if no members at all are currently +enabled, then the first member on which carrier comes up is enabled +immediately. The updelay should be set to a time longer than the STP forwarding delay of the physical switch to which the bond port is connected (if STP is enabled on that -switch). Otherwise, the slave will be enabled, and load may be shifted to it, -before the physical switch starts forwarding packets on that port, which can -cause some data to be "blackholed" for a time. The exception for a single -enabled slave does not cause any problem in this regard because when no slaves -are enabled all output packets are blackholed anyway. - -When a slave becomes disabled, the vswitch immediately chooses a new output -port for traffic that was destined for that slave (see -``bond_enable_slave()``). It also sends a "gratuitous learning packet", -specifically a RARP, on the bond port (on the newly chosen slave) for each MAC -address that the vswitch has learned on a port other than the bond (see -``bundle_send_learning_packets()``), to teach the physical switch that the new -slave should be used in place of the one that is now disabled. (This behavior -probably makes sense only for a vswitch that has only one port (the bond) -connected to a physical switch; vswitchd should probably provide a way to -disable or configure it in other scenarios.) +switch). Otherwise, the member will be enabled, and load may be shifted +to it, before the physical switch starts forwarding packets on that port, which +can cause some data to be dropped for a time. The exception for a single +enabled member does not cause any problem in this regard because when no +members are enabled all output packets are dropped anyway. + +When a member becomes disabled, the vswitch immediately chooses a new +output port for traffic that was destined for that member (see +``bond_enable_member()``). It also sends a "gratuitous learning packet", +specifically a RARP, on the bond port (on the newly chosen member) for +each MAC address that the vswitch has learned on a port other than the bond +(see ``bundle_send_learning_packets()``), to teach the physical switch that the +new member should be used in place of the one that is now disabled. +(This behavior probably makes sense only for a vswitch that has only one port +(the bond) connected to a physical switch; vswitchd should probably provide a +way to disable or configure it in other scenarios.) Bond Packet Input ----------------- -Bonding accepts unicast packets on any bond slave. This can occasionally cause -packet duplication for the first few packets sent to a given MAC, if the +Bonding accepts unicast packets on any member. This can occasionally +cause packet duplication for the first few packets sent to a given MAC, if the physical switch attached to the bond is flooding packets to that MAC because it -has not yet learned the correct slave for that MAC. +has not yet learned the correct member for that MAC. -Bonding only accepts multicast (and broadcast) packets on a single bond slave -(the "active slave") at any given time. Multicast packets received on other -slaves are dropped. Otherwise, every multicast packet would be duplicated, -once for every bond slave, because the physical switch attached to the bond -will flood those packets. +Bonding only accepts multicast (and broadcast) packets on a single bond +member (the "active member") at any given time. Multicast +packets received on other members are dropped. Otherwise, every +multicast packet would be duplicated, once for every bond member, +because the physical switch attached to the bond will flood those packets. Bonding also drops received packets when the vswitch has learned that the packet's MAC is on a port other than the bond port itself. This is because it is likely that the vswitch itself sent the packet out the bond port on a -different slave and is now receiving the packet back. This occurs when the -packet is multicast or the physical switch has not yet learned the MAC and is -flooding it. However, the vswitch makes an exception to this rule for +different member and is now receiving the packet back. This occurs when +the packet is multicast or the physical switch has not yet learned the MAC and +is flooding it. However, the vswitch makes an exception to this rule for broadcast ARP replies, which indicate that the MAC has moved to another switch, probably due to VM migration. (ARP replies are normally unicast, so this exception does not match normal ARP replies. It will match the learning packets sent on bond fail-over.) -The active slave is simply the first slave to be enabled after the bond is -created (see ``bond_choose_active_slave()``). If the active slave is disabled, -then a new active slave is chosen among the slaves that remain active. -Currently due to the way that configuration works, this tends to be the -remaining slave whose interface name is first alphabetically, but this is by no -means guaranteed. +The active member is simply the first member to be enabled after +the bond is created (see ``bond_choose_active_member()``). If the active +member is disabled, then a new active member is chosen among the +members that remain active. Currently due to the way that configuration +works, this tends to be the remaining member whose interface name is +first alphabetically, but this is by no means guaranteed. Bond Packet Output ------------------ -When a packet is sent out a bond port, the bond slave actually used is selected -based on the packet's source MAC and VLAN tag (see ``bond_choose_output_slave()``). -In particular, the source MAC and VLAN tag are hashed into one of 256 values, -and that value is looked up in a hash table (the "bond hash") kept in the -``bond_hash`` member of struct port. The hash table entry identifies a bond -slave. If no bond slave has yet been chosen for that hash table entry, -vswitchd chooses one arbitrarily. - -Every 10 seconds, vswitchd rebalances the bond slaves (see -``bond_rebalance()``). To rebalance, vswitchd examines the statistics for -the number of bytes transmitted by each slave over approximately the past +When a packet is sent out a bond port, the bond member actually used is +selected based on the packet's source MAC and VLAN tag (see +``bond_choose_output_member()``). In particular, the source MAC and VLAN tag +are hashed into one of 256 values, and that value is looked up in a hash table +(the "bond hash") kept in the ``bond_hash`` member of struct port. The hash +table entry identifies a bond member. If no bond member has yet been chosen +for that hash table entry, vswitchd chooses one arbitrarily. + +Every 10 seconds, vswitchd rebalances the bond members (see +``bond_rebalance()``). To rebalance, vswitchd examines the statistics for the +number of bytes transmitted by each member over approximately the past minute, with data sent more recently weighted more heavily than data sent less -recently. It considers each of the slaves in order from most-loaded to -least-loaded. If highly loaded slave H is significantly more heavily loaded -than the least-loaded slave L, and slave H carries at least two hashes, then -vswitchd shifts one of H's hashes to L. However, vswitchd will only shift a -hash from H to L if it will decrease the ratio of the load between H and L by -at least 0.1. +recently. It considers each of the members in order from most-loaded to +least-loaded. If highly loaded member H is significantly more heavily +loaded than the least-loaded member L, and member H carries at +least two hashes, then vswitchd shifts one of H's hashes to L. However, +vswitchd will only shift a hash from H to L if it will decrease the ratio of +the load between H and L by at least 0.1. Currently, "significantly more loaded" means that H must carry at least 1 Mbps more traffic, and that traffic must be at least 3% greater than L's. @@ -166,11 +167,11 @@ behavior on Open vSwitch. Active Backup Bonding ~~~~~~~~~~~~~~~~~~~~~ -Active Backup bonds send all traffic out one "active" slave until that slave -becomes unavailable. Since they are significantly less complicated than SLB -bonds, they are preferred when LACP is not an option. Additionally, they are -the only bond mode which supports attaching each slave to a different upstream -switch. +Active Backup bonds send all traffic out one "active" member until that +member becomes unavailable. Since they are significantly less +complicated than SLB bonds, they are preferred when LACP is not an option. +Additionally, they are the only bond mode which supports attaching each +member to a different upstream switch. SLB Bonding ~~~~~~~~~~~ @@ -195,15 +196,15 @@ SLB bonding has the following complications: This would cause packet duplication if not handled specially. Open vSwitch avoids packet duplication by accepting multicast and broadcast - packets on only the active slave, and dropping multicast and broadcast - packets on all other slaves. + packets on only the active member, and dropping multicast and + broadcast packets on all other members. 2. When Open vSwitch forwards a multicast or broadcast packet to a link in the - SLB bond other than the active slave, the remote switch will forward it to - all of the other links in the SLB bond, including the active slave. Without - special handling, this would mean that Open vSwitch would forward a second - copy of the packet to each switch port (other than the bond), including the - port that originated the packet. + SLB bond other than the active member, the remote switch will forward + it to all of the other links in the SLB bond, including the active + member. Without special handling, this would mean that Open vSwitch + would forward a second copy of the packet to each switch port (other than + the bond), including the port that originated the packet. Open vSwitch deals with this case by dropping packets received on any SLB bonded link that have a source MAC+VLAN that has been learned on any other @@ -226,11 +227,11 @@ SLB bonding has the following complications: 4. Suppose that a MAC+VLAN moves from an SLB bond to another port (e.g. when a VM is migrated from a different hypervisor to this one), that the MAC+VLAN emits a gratuitous ARP, and that Open vSwitch forwards that gratuitous ARP - to a link in the SLB bond other than the active slave. The remote switch - will forward the gratuitous ARP to all of the other links in the SLB bond, - including the active slave. Without additional special handling, this would - mean that Open vSwitch would learn that the MAC+VLAN was located on the SLB - bond, as a consequence of rule #3. + to a link in the SLB bond other than the active member. The remote + switch will forward the gratuitous ARP to all of the other links in the SLB + bond, including the active member. Without additional special + handling, this would mean that Open vSwitch would learn that the MAC+VLAN + was located on the SLB bond, as a consequence of rule #3. Open vSwitch avoids this problem by "locking" the MAC learning table entry for a MAC+VLAN from which a gratuitous ARP was received from a non-SLB bond diff --git a/Documentation/topics/porting.rst b/Documentation/topics/porting.rst index b327b2b0d..839b04d52 100644 --- a/Documentation/topics/porting.rst +++ b/Documentation/topics/porting.rst @@ -42,8 +42,8 @@ concordance, indexed by the area of the source tree: datapath/ vport --- vswitchd/ iface port ofproto/ port bundle - ofproto/bond.c slave bond - lib/lacp.c slave lacp + ofproto/bond.c member bond + lib/lacp.c member lacp lib/netdev.c netdev --- database Interface Port diff --git a/NEWS b/NEWS index fa8947cb2..0a7a8f7fb 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,8 @@ Post-v2.14.0 - Terminology: * The terms "master" and "slave" have been replaced by "primary" and "secondary", respectively, for OpenFlow connection roles. + * The term "slave" has been replaced by "member", for bonds, LACP, and + OpenFlow bundle actions. v2.14.0 - 17 Aug 2020 @@ -833,7 +835,7 @@ v2.4.0 - 20 Aug 2015 The implementation has been tested successfully against the Ixia Automated Network Validation Library (ANVL). - Stats are no longer updated on fake bond interface. - - Keep active bond slave selection across OVS restart. + - Keep active bond interface selection across OVS restart. - A simple wrapper script, 'ovs-docker', to integrate OVS with Docker containers. If and when there is a native integration of Open vSwitch with Docker, the wrapper script will be retired. diff --git a/debian/changelog b/debian/changelog index 2a57585e3..ea72a1474 100644 --- a/debian/changelog +++ b/debian/changelog @@ -388,7 +388,7 @@ openvswitch (2.4.0-1) unstable; urgency=low The implementation has been tested successfully against the Ixia Automated Network Validation Library (ANVL). - Stats are no longer updated on fake bond interface. - - Keep active bond slave selection across OVS restart. + - Keep active bond member selection across OVS restart. - A simple wrapper script, 'ovs-docker', to integrate OVS with Docker containers. If and when there is a native integration of Open vSwitch with Docker, the wrapper script will be retired. diff --git a/debian/ifupdown.sh b/debian/ifupdown.sh index 9a7772d03..01982acbf 100755 --- a/debian/ifupdown.sh +++ b/debian/ifupdown.sh @@ -65,9 +65,9 @@ if [ "${MODE}" = "start" ]; then ${OVS_EXTRA+-- $OVS_EXTRA} ip link set "${IFACE}" up - for slave in ${IF_OVS_BONDS} + for member in ${IF_OVS_BONDS} do - ip link set "${slave}" up + ip link set "${member}" up done ;; OVSPatchPort) diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h index 226e86d0b..41bcb55d2 100644 --- a/include/openvswitch/ofp-actions.h +++ b/include/openvswitch/ofp-actions.h @@ -66,7 +66,7 @@ struct vl_mff_map; OFPACT(CONTROLLER, ofpact_controller, userdata, "controller") \ OFPACT(ENQUEUE, ofpact_enqueue, ofpact, "enqueue") \ OFPACT(OUTPUT_REG, ofpact_output_reg, ofpact, "output_reg") \ - OFPACT(BUNDLE, ofpact_bundle, slaves, "bundle") \ + OFPACT(BUNDLE, ofpact_bundle, members, "bundle") \ \ /* Header changes. */ \ OFPACT(SET_FIELD, ofpact_set_field, ofpact, "set_field") \ @@ -364,24 +364,24 @@ struct ofpact_output_trunc { ); }; -/* Bundle slave choice algorithm to apply. +/* Bundle member choice algorithm to apply. * - * In the descriptions below, 'slaves' is the list of possible slaves in the + * In the descriptions below, 'members' is the list of possible members in the * order they appear in the OpenFlow action. */ enum nx_bd_algorithm { - /* Chooses the first live slave listed in the bundle. + /* Chooses the first live member listed in the bundle. * - * O(n_slaves) performance. */ + * O(n_members) performance. */ NX_BD_ALG_ACTIVE_BACKUP = 0, /* Highest Random Weight. * - * for i in [0,n_slaves): + * for i in [0,n_members): * weights[i] = hash(flow, i) - * slave = { slaves[i] such that weights[i] >= weights[j] for all j != i } + * member = { members[i] such that weights[i] >= weights[j] for all j != i } * - * Redistributes 1/n_slaves of traffic when a slave's liveness changes. - * O(n_slaves) performance. + * Redistributes 1/n_members of traffic when a member's liveness changes. + * O(n_members) performance. * * Uses the 'fields' and 'basis' parameters. */ NX_BD_ALG_HRW = 1 @@ -394,7 +394,7 @@ struct ofpact_bundle { OFPACT_PADDED_MEMBERS( struct ofpact ofpact; - /* Slave choice algorithm to apply to hash value. */ + /* Member choice algorithm to apply to hash value. */ enum nx_bd_algorithm algorithm; /* What fields to hash and how. */ @@ -403,10 +403,12 @@ struct ofpact_bundle { struct mf_subfield dst; - /* Slaves for output. */ - unsigned int n_slaves; + bool compat_syntax; + + /* Members for output. */ + unsigned int n_members; ); - ofp_port_t slaves[]; + ofp_port_t members[]; }; /* OFPACT_SET_VLAN_VID. diff --git a/lib/bundle.c b/lib/bundle.c index edb11f6be..d728380ec 100644 --- a/lib/bundle.c +++ b/lib/bundle.c @@ -39,14 +39,14 @@ VLOG_DEFINE_THIS_MODULE(bundle); static ofp_port_t execute_ab(const struct ofpact_bundle *bundle, - bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux) + bool (*member_enabled)(ofp_port_t ofp_port, void *aux), void *aux) { size_t i; - for (i = 0; i < bundle->n_slaves; i++) { - ofp_port_t slave = bundle->slaves[i]; - if (slave_enabled(slave, aux)) { - return slave; + for (i = 0; i < bundle->n_members; i++) { + ofp_port_t member = bundle->members[i]; + if (member_enabled(member, aux)) { + return member; } } @@ -56,12 +56,12 @@ execute_ab(const struct ofpact_bundle *bundle, static ofp_port_t execute_hrw(const struct ofpact_bundle *bundle, const struct flow *flow, struct flow_wildcards *wc, - bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux) + bool (*member_enabled)(ofp_port_t ofp_port, void *aux), void *aux) { uint32_t flow_hash, best_hash; int best, i; - if (bundle->n_slaves > 1) { + if (bundle->n_members > 1) { flow_mask_hash_fields(flow, wc, bundle->fields); } @@ -69,8 +69,8 @@ execute_hrw(const struct ofpact_bundle *bundle, best = -1; best_hash = 0; - for (i = 0; i < bundle->n_slaves; i++) { - if (slave_enabled(bundle->slaves[i], aux)) { + for (i = 0; i < bundle->n_members; i++) { + if (member_enabled(bundle->members[i], aux)) { uint32_t hash = hash_2words(i, flow_hash); if (best < 0 || hash > best_hash) { @@ -80,25 +80,25 @@ execute_hrw(const struct ofpact_bundle *bundle, } } - return best >= 0 ? bundle->slaves[best] : OFPP_NONE; + return best >= 0 ? bundle->members[best] : OFPP_NONE; } /* Executes 'bundle' on 'flow'. Sets fields in 'wc' that were used to - * calculate the result. Uses 'slave_enabled' to determine if the slave - * designated by 'ofp_port' is up. Returns the chosen slave, or - * OFPP_NONE if none of the slaves are acceptable. */ + * calculate the result. Uses 'member_enabled' to determine if the member + * designated by 'ofp_port' is up. Returns the chosen member, or + * OFPP_NONE if none of the members are acceptable. */ ofp_port_t bundle_execute(const struct ofpact_bundle *bundle, const struct flow *flow, struct flow_wildcards *wc, - bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), + bool (*member_enabled)(ofp_port_t ofp_port, void *aux), void *aux) { switch (bundle->algorithm) { case NX_BD_ALG_HRW: - return execute_hrw(bundle, flow, wc, slave_enabled, aux); + return execute_hrw(bundle, flow, wc, member_enabled, aux); case NX_BD_ALG_ACTIVE_BACKUP: - return execute_ab(bundle, slave_enabled, aux); + return execute_ab(bundle, member_enabled, aux); default: OVS_NOT_REACHED(); @@ -119,21 +119,21 @@ bundle_check(const struct ofpact_bundle *bundle, ofp_port_t max_ports, } } - for (i = 0; i < bundle->n_slaves; i++) { - ofp_port_t ofp_port = bundle->slaves[i]; + for (i = 0; i < bundle->n_members; i++) { + ofp_port_t ofp_port = bundle->members[i]; if (ofp_port != OFPP_NONE) { enum ofperr error = ofpact_check_output_port(ofp_port, max_ports); if (error) { - VLOG_WARN_RL(&rl, "invalid slave %"PRIu32, ofp_port); + VLOG_WARN_RL(&rl, "invalid member %"PRIu32, ofp_port); return error; } } - /* Controller slaves are unsupported due to the lack of a max_len + /* Controller members are unsupported due to the lack of a max_len * argument. This may or may not change in the future. There doesn't * seem to be a real-world use-case for supporting it. */ if (ofp_port == OFPP_CONTROLLER) { - VLOG_WARN_RL(&rl, "unsupported controller slave"); + VLOG_WARN_RL(&rl, "unsupported controller member"); return OFPERR_OFPBAC_BAD_OUT_PORT; } } @@ -150,38 +150,39 @@ static char * OVS_WARN_UNUSED_RESULT bundle_parse__(const char *s, const struct ofputil_port_map *port_map, char **save_ptr, const char *fields, const char *basis, const char *algorithm, - const char *slave_type, const char *dst, - const char *slave_delim, struct ofpbuf *ofpacts) + const char *member_type, const char *dst, + const char *member_delim, struct ofpbuf *ofpacts) { struct ofpact_bundle *bundle; - if (!slave_delim) { + if (!member_delim) { return xasprintf("%s: not enough arguments to bundle action", s); } - if (strcasecmp(slave_delim, "slaves")) { - return xasprintf("%s: missing slave delimiter, expected `slaves' " - "got `%s'", s, slave_delim); + if (strcasecmp(member_delim, "members") + && strcasecmp(member_delim, "slaves")) { + return xasprintf("%s: missing member delimiter, expected `members', " + "got `%s'", s, member_delim); } bundle = ofpact_put_BUNDLE(ofpacts); for (;;) { - ofp_port_t slave_port; - char *slave; + ofp_port_t member_port; + char *member; - slave = strtok_r(NULL, ", []", save_ptr); - if (!slave || bundle->n_slaves >= BUNDLE_MAX_SLAVES) { + member = strtok_r(NULL, ", []", save_ptr); + if (!member || bundle->n_members >= BUNDLE_MAX_MEMBERS) { break; } - if (!ofputil_port_from_string(slave, port_map, &slave_port)) { - return xasprintf("%s: bad port number", slave); + if (!ofputil_port_from_string(member, port_map, &member_port)) { + return xasprintf("%s: bad port number", member); } - ofpbuf_put(ofpacts, &slave_port, sizeof slave_port); + ofpbuf_put(ofpacts, &member_port, sizeof member_port); bundle = ofpacts->header; - bundle->n_slaves++; + bundle->n_members++; } if (ofpbuf_oversized(ofpacts)) { @@ -217,8 +218,8 @@ bundle_parse__(const char *s, const struct ofputil_port_map *port_map, return xasprintf("%s: unknown algorithm `%s'", s, algorithm); } - if (strcasecmp(slave_type, "ofport")) { - return xasprintf("%s: unknown slave_type `%s'", s, slave_type); + if (strcasecmp(member_type, "ofport")) { + return xasprintf("%s: unknown member_type `%s'", s, member_type); } if (dst) { @@ -245,7 +246,7 @@ char * OVS_WARN_UNUSED_RESULT bundle_parse(const char *s, const struct ofputil_port_map *port_map, struct ofpbuf *ofpacts) { - char *fields, *basis, *algorithm, *slave_type, *slave_delim; + char *fields, *basis, *algorithm, *member_type, *member_delim; char *tokstr, *save_ptr; char *error; @@ -254,12 +255,12 @@ bundle_parse(const char *s, const struct ofputil_port_map *port_map, fields = strtok_r(tokstr, ", ", &save_ptr); basis = strtok_r(NULL, ", ", &save_ptr); algorithm = strtok_r(NULL, ", ", &save_ptr); - slave_type = strtok_r(NULL, ", ", &save_ptr); - slave_delim = strtok_r(NULL, ": ", &save_ptr); + member_type = strtok_r(NULL, ", ", &save_ptr); + member_delim = strtok_r(NULL, ": ", &save_ptr); error = bundle_parse__(s, port_map, - &save_ptr, fields, basis, algorithm, slave_type, - NULL, slave_delim, ofpacts); + &save_ptr, fields, basis, algorithm, member_type, + NULL, member_delim, ofpacts); free(tokstr); return error; @@ -274,7 +275,7 @@ char * OVS_WARN_UNUSED_RESULT bundle_parse_load(const char *s, const struct ofputil_port_map *port_map, struct ofpbuf *ofpacts) { - char *fields, *basis, *algorithm, *slave_type, *dst, *slave_delim; + char *fields, *basis, *algorithm, *member_type, *dst, *member_delim; char *tokstr, *save_ptr; char *error; @@ -283,13 +284,13 @@ bundle_parse_load(const char *s, const struct ofputil_port_map *port_map, fields = strtok_r(tokstr, ", ", &save_ptr); basis = strtok_r(NULL, ", ", &save_ptr); algorithm = strtok_r(NULL, ", ", &save_ptr); - slave_type = strtok_r(NULL, ", ", &save_ptr); + member_type = strtok_r(NULL, ", ", &save_ptr); dst = strtok_r(NULL, ", ", &save_ptr); - slave_delim = strtok_r(NULL, ": ", &save_ptr); + member_delim = strtok_r(NULL, ": ", &save_ptr); error = bundle_parse__(s, port_map, - &save_ptr, fields, basis, algorithm, slave_type, - dst, slave_delim, ofpacts); + &save_ptr, fields, basis, algorithm, member_type, + dst, member_delim, ofpacts); free(tokstr); @@ -328,13 +329,13 @@ bundle_format(const struct ofpact_bundle *bundle, ds_put_char(s, ','); } - ds_put_format(s, "%sslaves:%s", colors.param, colors.end); - for (i = 0; i < bundle->n_slaves; i++) { + ds_put_format(s, "%smembers:%s", colors.param, colors.end); + for (i = 0; i < bundle->n_members; i++) { if (i) { ds_put_char(s, ','); } - ofputil_format_port(bundle->slaves[i], port_map, s); + ofputil_format_port(bundle->members[i], port_map, s); } ds_put_format(s, "%s)%s", colors.paren, colors.end); diff --git a/lib/bundle.h b/lib/bundle.h index 85a2e861d..b3b9cdcee 100644 --- a/lib/bundle.h +++ b/lib/bundle.h @@ -40,11 +40,11 @@ struct ofputil_port_map; * * See lib/ofp-actions.c for NXAST_BUNDLE specification. */ -#define BUNDLE_MAX_SLAVES 2048 +#define BUNDLE_MAX_MEMBERS 2048 ofp_port_t bundle_execute(const struct ofpact_bundle *, const struct flow *, struct flow_wildcards *wc, - bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), + bool (*member_enabled)(ofp_port_t ofp_port, void *aux), void *aux); enum ofperr bundle_check(const struct ofpact_bundle *, ofp_port_t max_ports, const struct match *); diff --git a/lib/cfm.c b/lib/cfm.c index 71d2c0206..cc43e70e3 100644 --- a/lib/cfm.c +++ b/lib/cfm.c @@ -780,8 +780,8 @@ cfm_process_heartbeat(struct cfm *cfm, const struct dp_packet *p) * * Faults can cause a controller or Open vSwitch to make potentially * expensive changes to the network topology. It seems prudent to trigger - * them judiciously, especially when CFM is used to check slave status of - * bonds. Furthermore, faults can be maliciously triggered by crafting + * them judiciously, especially when CFM is used to check status of bond + * members. Furthermore, faults can be maliciously triggered by crafting * unexpected CCMs. */ if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) { cfm->recv_fault |= CFM_FAULT_MAID; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 02df8f11e..300861ca5 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -629,9 +629,9 @@ struct tx_port { struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST]; }; -/* Contained by struct tx_bond 'slave_buckets'. */ -struct slave_entry { - odp_port_t slave_id; +/* Contained by struct tx_bond 'member_buckets'. */ +struct member_entry { + odp_port_t member_id; atomic_ullong n_packets; atomic_ullong n_bytes; }; @@ -640,7 +640,7 @@ struct slave_entry { struct tx_bond { struct cmap_node node; uint32_t bond_id; - struct slave_entry slave_buckets[BOND_BUCKETS]; + struct member_entry member_buckets[BOND_BUCKETS]; }; /* A set of properties for the current processing loop that is not directly @@ -1588,17 +1588,17 @@ dpif_netdev_bond_show(struct unixctl_conn *conn, int argc, if (cmap_count(&dp->tx_bonds) > 0) { struct tx_bond *dp_bond_entry; - uint32_t slave_id; ds_put_cstr(&reply, "Bonds:\n"); CMAP_FOR_EACH (dp_bond_entry, node, &dp->tx_bonds) { ds_put_format(&reply, " bond-id %"PRIu32":\n", dp_bond_entry->bond_id); for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) { - slave_id = - odp_to_u32(dp_bond_entry->slave_buckets[bucket].slave_id); - ds_put_format(&reply, " bucket %d - slave %"PRIu32"\n", - bucket, slave_id); + uint32_t member_id = odp_to_u32( + dp_bond_entry->member_buckets[bucket].member_id); + ds_put_format(&reply, + " bucket %d - member %"PRIu32"\n", + bucket, member_id); } } } @@ -6710,10 +6710,10 @@ dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd, for (int i = 0; i < BOND_BUCKETS; i++) { uint64_t n_packets, n_bytes; - atomic_read_relaxed(&tx->slave_buckets[i].n_packets, &n_packets); - atomic_read_relaxed(&tx->slave_buckets[i].n_bytes, &n_bytes); - atomic_init(&new_tx->slave_buckets[i].n_packets, n_packets); - atomic_init(&new_tx->slave_buckets[i].n_bytes, n_bytes); + atomic_read_relaxed(&tx->member_buckets[i].n_packets, &n_packets); + atomic_read_relaxed(&tx->member_buckets[i].n_bytes, &n_bytes); + atomic_init(&new_tx->member_buckets[i].n_packets, n_packets); + atomic_init(&new_tx->member_buckets[i].n_bytes, n_bytes); } cmap_replace(&pmd->tx_bonds, &tx->node, &new_tx->node, hash_bond_id(bond->bond_id)); @@ -7639,18 +7639,19 @@ dp_execute_lb_output_action(struct dp_netdev_pmd_thread *pmd, DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) { /* - * Lookup the bond-hash table using hash to get the slave. + * Lookup the bond-hash table using hash to get the member. */ uint32_t hash = dp_packet_get_rss_hash(packet); - struct slave_entry *s_entry = &p_bond->slave_buckets[hash & BOND_MASK]; - odp_port_t bond_member = s_entry->slave_id; + struct member_entry *s_entry + = &p_bond->member_buckets[hash & BOND_MASK]; + odp_port_t bond_member = s_entry->member_id; uint32_t size = dp_packet_size(packet); struct dp_packet_batch output_pkt; dp_packet_batch_init_packet(&output_pkt, packet); if (OVS_LIKELY(dp_execute_output_action(pmd, &output_pkt, true, bond_member))) { - /* Update slave stats. */ + /* Update member stats. */ non_atomic_ullong_add(&s_entry->n_packets, 1); non_atomic_ullong_add(&s_entry->n_bytes, size); } @@ -8293,7 +8294,7 @@ dpif_netdev_ipf_dump_done(struct dpif *dpif OVS_UNUSED, void *ipf_dump_ctx) static int dpif_netdev_bond_add(struct dpif *dpif, uint32_t bond_id, - odp_port_t *slave_map) + odp_port_t *member_map) { struct tx_bond *new_tx = xzalloc(sizeof *new_tx); struct dp_netdev *dp = get_dp_netdev(dpif); @@ -8302,7 +8303,7 @@ dpif_netdev_bond_add(struct dpif *dpif, uint32_t bond_id, /* Prepare new bond mapping. */ new_tx->bond_id = bond_id; for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) { - new_tx->slave_buckets[bucket].slave_id = slave_map[bucket]; + new_tx->member_buckets[bucket].member_id = member_map[bucket]; } ovs_mutex_lock(&dp->bond_mutex); @@ -8375,7 +8376,7 @@ dpif_netdev_bond_stats_get(struct dpif *dpif, uint32_t bond_id, for (int i = 0; i < BOND_BUCKETS; i++) { uint64_t pmd_n_bytes; - atomic_read_relaxed(&pmd_bond_entry->slave_buckets[i].n_bytes, + atomic_read_relaxed(&pmd_bond_entry->member_buckets[i].n_bytes, &pmd_n_bytes); n_bytes[i] += pmd_n_bytes; } diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 0e024c1c9..b817fceac 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -617,9 +617,9 @@ struct dpif_class { int (*meter_del)(struct dpif *, ofproto_meter_id meter_id, struct ofputil_meter_stats *, uint16_t n_bands); - /* Adds a bond with 'bond_id' and the slave-map to 'dpif'. */ + /* Adds a bond with 'bond_id' and the member-map to 'dpif'. */ int (*bond_add)(struct dpif *dpif, uint32_t bond_id, - odp_port_t *slave_map); + odp_port_t *member_map); /* Removes bond identified by 'bond_id' from 'dpif'. */ int (*bond_del)(struct dpif *dpif, uint32_t bond_id); diff --git a/lib/dpif.c b/lib/dpif.c index 53d65cf7c..ac2860764 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1993,10 +1993,10 @@ dpif_meter_del(struct dpif *dpif, ofproto_meter_id meter_id, } int -dpif_bond_add(struct dpif *dpif, uint32_t bond_id, odp_port_t *slave_map) +dpif_bond_add(struct dpif *dpif, uint32_t bond_id, odp_port_t *member_map) { return dpif->dpif_class->bond_del - ? dpif->dpif_class->bond_add(dpif, bond_id, slave_map) + ? dpif->dpif_class->bond_add(dpif, bond_id, member_map) : EOPNOTSUPP; } diff --git a/lib/dpif.h b/lib/dpif.h index f8bba23fe..cb047dbe2 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -898,7 +898,7 @@ int dpif_meter_del(struct dpif *, ofproto_meter_id meter_id, #define BOND_MASK 0xff #define BOND_BUCKETS (BOND_MASK + 1) -int dpif_bond_add(struct dpif *, uint32_t bond_id, odp_port_t *slave_map); +int dpif_bond_add(struct dpif *, uint32_t bond_id, odp_port_t *member_map); int dpif_bond_del(struct dpif *, uint32_t bond_id); int dpif_bond_stats_get(struct dpif *, uint32_t bond_id, uint64_t *n_bytes); bool dpif_supports_lb_output_action(const struct dpif *); diff --git a/lib/lacp.c b/lib/lacp.c index 705d88f50..540b2aa8c 100644 --- a/lib/lacp.c +++ b/lib/lacp.c @@ -92,12 +92,13 @@ enum pdu_subtype { SUBTYPE_MARKER, /* Link Aggregation Marker Protocol. */ }; -enum slave_status { +enum member_status { LACP_CURRENT, /* Current State. Partner up to date. */ LACP_EXPIRED, /* Expired State. Partner out of date. */ LACP_DEFAULTED, /* Defaulted State. No partner. */ }; +/* A LACP primary interface. */ struct lacp { struct ovs_list node; /* Node in all_lacps list. */ char *name; /* Name of this lacp object. */ @@ -105,8 +106,8 @@ struct lacp { uint16_t sys_priority; /* System Priority. */ bool active; /* Active or Passive. */ - struct hmap slaves; /* Slaves this LACP object controls. */ - struct slave *key_slave; /* Slave whose ID will be the aggregation key. */ + struct hmap members; /* Members this LACP object controls. */ + struct member *key_member; /* Member whose ID will be aggregation key. */ bool fast; /* True if using fast probe interval. */ bool negotiated; /* True if LACP negotiations were successful. */ @@ -116,17 +117,18 @@ struct lacp { struct ovs_refcount ref_cnt; }; -struct slave { - void *aux; /* Handle used to identify this slave. */ - struct hmap_node node; /* Node in master's slaves map. */ +/* A LACP member interface. */ +struct member { + void *aux; /* Handle used to identify this member. */ + struct hmap_node node; /* Node in primary's members map. */ - struct lacp *lacp; /* LACP object containing this slave. */ + struct lacp *lacp; /* LACP object containing this member. */ uint16_t port_id; /* Port ID. */ uint16_t port_priority; /* Port Priority. */ uint16_t key; /* Aggregation Key. 0 if default. */ - char *name; /* Name of this slave. */ + char *name; /* Name of this member. */ - enum slave_status status; /* Slave status. */ + enum member_status status; /* Member status. */ bool attached; /* Attached. Traffic may flow. */ bool carrier_up; /* Carrier state of link. */ struct lacp_info partner; /* Partner information. */ @@ -149,20 +151,20 @@ static struct ovs_list *const all_lacps OVS_GUARDED_BY(mutex) = &all_lacps__; static void lacp_update_attached(struct lacp *) OVS_REQUIRES(mutex); -static void slave_destroy(struct slave *) OVS_REQUIRES(mutex); -static void slave_set_defaulted(struct slave *) OVS_REQUIRES(mutex); -static void slave_set_expired(struct slave *) OVS_REQUIRES(mutex); -static void slave_get_actor(struct slave *, struct lacp_info *actor) +static void member_destroy(struct member *) OVS_REQUIRES(mutex); +static void member_set_defaulted(struct member *) OVS_REQUIRES(mutex); +static void member_set_expired(struct member *) OVS_REQUIRES(mutex); +static void member_get_actor(struct member *, struct lacp_info *actor) OVS_REQUIRES(mutex); -static void slave_get_priority(struct slave *, struct lacp_info *priority) +static void member_get_priority(struct member *, struct lacp_info *priority) OVS_REQUIRES(mutex); -static bool slave_may_tx(const struct slave *) +static bool member_may_tx(const struct member *) OVS_REQUIRES(mutex); -static struct slave *slave_lookup(const struct lacp *, const void *slave) +static struct member *member_lookup(const struct lacp *, const void *member) OVS_REQUIRES(mutex); static bool info_tx_equal(struct lacp_info *, struct lacp_info *) OVS_REQUIRES(mutex); -static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex); +static bool member_may_enable__(struct member *) OVS_REQUIRES(mutex); static unixctl_cb_func lacp_unixctl_show; static unixctl_cb_func lacp_unixctl_show_stats; @@ -254,7 +256,7 @@ lacp_create(void) OVS_EXCLUDED(mutex) struct lacp *lacp; lacp = xzalloc(sizeof *lacp); - hmap_init(&lacp->slaves); + hmap_init(&lacp->members); ovs_refcount_init(&lacp->ref_cnt); lacp_lock(); @@ -273,19 +275,19 @@ lacp_ref(const struct lacp *lacp_) return lacp; } -/* Destroys 'lacp' and its slaves. Does nothing if 'lacp' is NULL. */ +/* Destroys 'lacp' and its members. Does nothing if 'lacp' is NULL. */ void lacp_unref(struct lacp *lacp) OVS_EXCLUDED(mutex) { if (lacp && ovs_refcount_unref_relaxed(&lacp->ref_cnt) == 1) { - struct slave *slave, *next; + struct member *member, *next; lacp_lock(); - HMAP_FOR_EACH_SAFE (slave, next, node, &lacp->slaves) { - slave_destroy(slave); + HMAP_FOR_EACH_SAFE (member, next, node, &lacp->members) { + member_destroy(member); } - hmap_destroy(&lacp->slaves); + hmap_destroy(&lacp->members); ovs_list_remove(&lacp->node); free(lacp->name); free(lacp); @@ -336,39 +338,40 @@ lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex) return ret; } -/* Processes 'packet' which was received on 'slave_'. This function should be - * called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP. +/* Processes 'packet' which was received on 'member_'. This function should be + * called on all packets received on 'member_' with Ethernet Type + * ETH_TYPE_LACP. */ bool -lacp_process_packet(struct lacp *lacp, const void *slave_, +lacp_process_packet(struct lacp *lacp, const void *member_, const struct dp_packet *packet) OVS_EXCLUDED(mutex) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); const struct lacp_pdu *pdu; long long int tx_rate; - struct slave *slave; + struct member *member; bool lacp_may_enable = false; enum pdu_subtype subtype; lacp_lock(); - slave = slave_lookup(lacp, slave_); - if (!slave) { + member = member_lookup(lacp, member_); + if (!member) { goto out; } - slave->count_rx_pdus++; + member->count_rx_pdus++; pdu = parse_lacp_packet(packet, &subtype); switch (subtype) { case SUBTYPE_LACP: break; case SUBTYPE_MARKER: - slave->count_rx_pdus_marker++; + member->count_rx_pdus_marker++; VLOG_DBG("%s: received a LACP marker PDU.", lacp->name); goto out; case SUBTYPE_UNUSED: default: - slave->count_rx_pdus_bad++; + member->count_rx_pdus_bad++; VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.", lacp->name); goto out; @@ -377,30 +380,30 @@ lacp_process_packet(struct lacp *lacp, const void *slave_, /* On some NICs L1 state reporting is slow. In case LACP packets are * received while carrier (L1) state is still down, drop the LACP PDU and * trigger re-checking of L1 state. */ - if (!slave->carrier_up) { + if (!member->carrier_up) { VLOG_INFO_RL(&rl, "%s: carrier state is DOWN," - " dropping received LACP PDU.", slave->name); + " dropping received LACP PDU.", member->name); seq_change(connectivity_seq_get()); goto out; } - slave->status = LACP_CURRENT; + member->status = LACP_CURRENT; tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX; - timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * tx_rate); + timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate); - slave->ntt_actor = pdu->partner; + member->ntt_actor = pdu->partner; /* Update our information about our partner if it's out of date. This may * cause priorities to change so re-calculate attached status of all - * slaves. */ - if (memcmp(&slave->partner, &pdu->actor, sizeof pdu->actor)) { + * members. */ + if (memcmp(&member->partner, &pdu->actor, sizeof pdu->actor)) { lacp->update = true; - slave->partner = pdu->actor; + member->partner = pdu->actor; } /* Evaluate may_enable here to avoid dropping of packets till main thread * sets may_enable to true. */ - lacp_may_enable = slave_may_enable__(slave); + lacp_may_enable = member_may_enable__(member); out: lacp_unlock(); @@ -426,92 +429,92 @@ lacp_status(const struct lacp *lacp) OVS_EXCLUDED(mutex) } } -/* Registers 'slave_' as subordinate to 'lacp'. This should be called at least - * once per slave in a LACP managed bond. Should also be called whenever a - * slave's settings change. */ +/* Registers 'member_' as subordinate to 'lacp'. This should be called at + * least once per member in a LACP managed bond. Should also be called + * whenever a member's settings change. */ void -lacp_slave_register(struct lacp *lacp, void *slave_, - const struct lacp_slave_settings *s) +lacp_member_register(struct lacp *lacp, void *member_, + const struct lacp_member_settings *s) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; lacp_lock(); - slave = slave_lookup(lacp, slave_); - if (!slave) { - slave = xzalloc(sizeof *slave); - slave->lacp = lacp; - slave->aux = slave_; - hmap_insert(&lacp->slaves, &slave->node, hash_pointer(slave_, 0)); - slave_set_defaulted(slave); - - if (!lacp->key_slave) { - lacp->key_slave = slave; + member = member_lookup(lacp, member_); + if (!member) { + member = xzalloc(sizeof *member); + member->lacp = lacp; + member->aux = member_; + hmap_insert(&lacp->members, &member->node, hash_pointer(member_, 0)); + member_set_defaulted(member); + + if (!lacp->key_member) { + lacp->key_member = member; } } - if (!slave->name || strcmp(s->name, slave->name)) { - free(slave->name); - slave->name = xstrdup(s->name); + if (!member->name || strcmp(s->name, member->name)) { + free(member->name); + member->name = xstrdup(s->name); } - if (slave->port_id != s->id - || slave->port_priority != s->priority - || slave->key != s->key) { - slave->port_id = s->id; - slave->port_priority = s->priority; - slave->key = s->key; + if (member->port_id != s->id + || member->port_priority != s->priority + || member->key != s->key) { + member->port_id = s->id; + member->port_priority = s->priority; + member->key = s->key; lacp->update = true; if (lacp->active || lacp->negotiated) { - slave_set_expired(slave); + member_set_expired(member); } } lacp_unlock(); } -/* Unregisters 'slave_' with 'lacp'. */ +/* Unregisters 'member_' with 'lacp'. */ void -lacp_slave_unregister(struct lacp *lacp, const void *slave_) +lacp_member_unregister(struct lacp *lacp, const void *member_) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; lacp_lock(); - slave = slave_lookup(lacp, slave_); - if (slave) { - slave_destroy(slave); + member = member_lookup(lacp, member_); + if (member) { + member_destroy(member); lacp->update = true; } lacp_unlock(); } -/* This function should be called whenever the carrier status of 'slave_' has +/* This function should be called whenever the carrier status of 'member_' has * changed. If 'lacp' is null, this function has no effect.*/ void -lacp_slave_carrier_changed(const struct lacp *lacp, const void *slave_, - bool carrier_up) +lacp_member_carrier_changed(const struct lacp *lacp, const void *member_, + bool carrier_up) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; if (!lacp) { return; } lacp_lock(); - slave = slave_lookup(lacp, slave_); - if (!slave) { + member = member_lookup(lacp, member_); + if (!member) { goto out; } - if (slave->status == LACP_CURRENT || slave->lacp->active) { - slave_set_expired(slave); + if (member->status == LACP_CURRENT || member->lacp->active) { + member_set_expired(member); } - if (slave->carrier_up != carrier_up) { - slave->carrier_up = carrier_up; - slave->count_carrier_changed++; + if (member->carrier_up != carrier_up) { + member->carrier_up = carrier_up; + member->count_carrier_changed++; } out: @@ -519,35 +522,35 @@ out: } static bool -slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex) +member_may_enable__(struct member *member) OVS_REQUIRES(mutex) { - /* The slave may be enabled if it's attached to an aggregator and its + /* The member may be enabled if it's attached to an aggregator and its * partner is synchronized.*/ - return slave->attached && (slave->partner.state & LACP_STATE_SYNC - || (slave->lacp && slave->lacp->fallback_ab - && slave->status == LACP_DEFAULTED)); + return member->attached && (member->partner.state & LACP_STATE_SYNC + || (member->lacp && member->lacp->fallback_ab + && member->status == LACP_DEFAULTED)); } -/* This function should be called before enabling 'slave_' to send or receive - * traffic. If it returns false, 'slave_' should not enabled. As a +/* This function should be called before enabling 'member_' to send or receive + * traffic. If it returns false, 'member_' should not enabled. As a * convenience, returns true if 'lacp' is NULL. */ bool -lacp_slave_may_enable(const struct lacp *lacp, const void *slave_) +lacp_member_may_enable(const struct lacp *lacp, const void *member_) OVS_EXCLUDED(mutex) { if (lacp) { - struct slave *slave; + struct member *member; bool ret = false; lacp_lock(); - slave = slave_lookup(lacp, slave_); - if (slave) { - /* It is only called when carrier is up. So, enable slave's + member = member_lookup(lacp, member_); + if (member) { + /* It is only called when carrier is up. So, enable member's * carrier state if it is currently down. */ - if (!slave->carrier_up) { - slave->carrier_up = true; + if (!member->carrier_up) { + member->carrier_up = true; } - ret = slave_may_enable__(slave); + ret = member_may_enable__(member); } lacp_unlock(); return ret; @@ -556,19 +559,19 @@ lacp_slave_may_enable(const struct lacp *lacp, const void *slave_) } } -/* Returns true if partner information on 'slave_' is up to date. 'slave_' +/* Returns true if partner information on 'member_' is up to date. 'member_' * not being current, generally indicates a connectivity problem, or a * misconfigured (or broken) partner. */ bool -lacp_slave_is_current(const struct lacp *lacp, const void *slave_) +lacp_member_is_current(const struct lacp *lacp, const void *member_) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; bool ret; lacp_lock(); - slave = slave_lookup(lacp, slave_); - ret = slave ? slave->status != LACP_DEFAULTED : false; + member = member_lookup(lacp, member_); + ret = member ? member->status != LACP_DEFAULTED : false; lacp_unlock(); return ret; } @@ -577,21 +580,21 @@ lacp_slave_is_current(const struct lacp *lacp, const void *slave_) void lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; lacp_lock(); - HMAP_FOR_EACH (slave, node, &lacp->slaves) { - if (timer_expired(&slave->rx)) { - enum slave_status old_status = slave->status; - - if (slave->status == LACP_CURRENT) { - slave_set_expired(slave); - slave->count_link_expired++; - } else if (slave->status == LACP_EXPIRED) { - slave_set_defaulted(slave); - slave->count_link_defaulted++; + HMAP_FOR_EACH (member, node, &lacp->members) { + if (timer_expired(&member->rx)) { + enum member_status old_status = member->status; + + if (member->status == LACP_CURRENT) { + member_set_expired(member); + member->count_link_expired++; + } else if (member->status == LACP_EXPIRED) { + member_set_defaulted(member); + member->count_link_defaulted++; } - if (slave->status != old_status) { + if (member->status != old_status) { seq_change(connectivity_seq_get()); } } @@ -602,30 +605,30 @@ lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex) seq_change(connectivity_seq_get()); } - HMAP_FOR_EACH (slave, node, &lacp->slaves) { + HMAP_FOR_EACH (member, node, &lacp->members) { struct lacp_info actor; - if (!slave_may_tx(slave)) { + if (!member_may_tx(member)) { continue; } - slave_get_actor(slave, &actor); + member_get_actor(member, &actor); - if (timer_expired(&slave->tx) - || !info_tx_equal(&actor, &slave->ntt_actor)) { + if (timer_expired(&member->tx) + || !info_tx_equal(&actor, &member->ntt_actor)) { long long int duration; struct lacp_pdu pdu; - slave->ntt_actor = actor; - compose_lacp_pdu(&actor, &slave->partner, &pdu); - send_pdu(slave->aux, &pdu, sizeof pdu); - slave->count_tx_pdus++; + member->ntt_actor = actor; + compose_lacp_pdu(&actor, &member->partner, &pdu); + send_pdu(member->aux, &pdu, sizeof pdu); + member->count_tx_pdus++; - duration = (slave->partner.state & LACP_STATE_TIME + duration = (member->partner.state & LACP_STATE_TIME ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX); - timer_set_duration(&slave->tx, duration); + timer_set_duration(&member->tx, duration); seq_change(connectivity_seq_get()); } } @@ -636,16 +639,16 @@ lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex) void lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; lacp_lock(); - HMAP_FOR_EACH (slave, node, &lacp->slaves) { - if (slave_may_tx(slave)) { - timer_wait(&slave->tx); + HMAP_FOR_EACH (member, node, &lacp->members) { + if (member_may_tx(member)) { + timer_wait(&member->tx); } - if (slave->status != LACP_DEFAULTED) { - timer_wait(&slave->rx); + if (member->status != LACP_DEFAULTED) { + timer_wait(&member->rx); } } lacp_unlock(); @@ -653,12 +656,12 @@ lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex) /* Static Helpers. */ -/* Updates the attached status of all slaves controlled by 'lacp' and sets its - * negotiated parameter to true if any slaves are attachable. */ +/* Updates the attached status of all members controlled by 'lacp' and sets its + * negotiated parameter to true if any members are attachable. */ static void lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) { - struct slave *lead, *lead_current, *slave; + struct member *lead, *lead_current, *member; struct lacp_info lead_pri; bool lead_enable; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10); @@ -671,12 +674,12 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) /* Check if there is a working interface. * Store as lead_current, if there is one. */ - HMAP_FOR_EACH (slave, node, &lacp->slaves) { - if (slave->status == LACP_CURRENT && slave->attached) { + HMAP_FOR_EACH (member, node, &lacp->members) { + if (member->status == LACP_CURRENT && member->attached) { struct lacp_info pri; - slave_get_priority(slave, &pri); + member_get_priority(member, &pri); if (!lead_current || memcmp(&pri, &lead_pri, sizeof pri) < 0) { - lead_current = slave; + lead_current = member; lead = lead_current; lead_pri = pri; lead_enable = true; @@ -685,43 +688,43 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) } /* Find interface with highest priority. */ - HMAP_FOR_EACH (slave, node, &lacp->slaves) { + HMAP_FOR_EACH (member, node, &lacp->members) { struct lacp_info pri; - slave->attached = false; + member->attached = false; /* XXX: In the future allow users to configure the expected system ID. * For now just special case loopback. */ - if (eth_addr_equals(slave->partner.sys_id, slave->lacp->sys_id)) { - VLOG_WARN_RL(&rl, "slave %s: Loopback detected. Slave is " - "connected to its own bond", slave->name); + if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) { + VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is " + "connected to its own bond", member->name); continue; } - if (slave->status == LACP_DEFAULTED) { + if (member->status == LACP_DEFAULTED) { if (lacp->fallback_ab) { - slave->attached = true; + member->attached = true; } continue; } - slave_get_priority(slave, &pri); - bool enable = slave_may_enable__(slave); + member_get_priority(member, &pri); + bool enable = member_may_enable__(member); /* Check if partner MAC address is the same as on the working - * interface. Activate slave only if the MAC is the same, or + * interface. Activate member only if the MAC is the same, or * there is no working interface. */ if (!lead_current || (lead_current - && eth_addr_equals(slave->partner.sys_id, + && eth_addr_equals(member->partner.sys_id, lead_current->partner.sys_id))) { - slave->attached = true; + member->attached = true; } - if (slave->attached && + if (member->attached && (!lead || enable > lead_enable || (enable == lead_enable && memcmp(&pri, &lead_pri, sizeof pri) < 0))) { - lead = slave; + lead = member; lead_enable = enable; lead_pri = pri; } @@ -730,65 +733,66 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex) lacp->negotiated = lead != NULL; if (lead) { - HMAP_FOR_EACH (slave, node, &lacp->slaves) { - if ((lacp->fallback_ab && slave->status == LACP_DEFAULTED) - || lead->partner.key != slave->partner.key + HMAP_FOR_EACH (member, node, &lacp->members) { + if ((lacp->fallback_ab && member->status == LACP_DEFAULTED) + || lead->partner.key != member->partner.key || !eth_addr_equals(lead->partner.sys_id, - slave->partner.sys_id)) { - slave->attached = false; + member->partner.sys_id)) { + member->attached = false; } } } } static void -slave_destroy(struct slave *slave) OVS_REQUIRES(mutex) +member_destroy(struct member *member) OVS_REQUIRES(mutex) { - if (slave) { - struct lacp *lacp = slave->lacp; + if (member) { + struct lacp *lacp = member->lacp; lacp->update = true; - hmap_remove(&lacp->slaves, &slave->node); + hmap_remove(&lacp->members, &member->node); - if (lacp->key_slave == slave) { - struct hmap_node *slave_node = hmap_first(&lacp->slaves); + if (lacp->key_member == member) { + struct hmap_node *member_node = hmap_first(&lacp->members); - if (slave_node) { - lacp->key_slave = CONTAINER_OF(slave_node, struct slave, node); + if (member_node) { + lacp->key_member = CONTAINER_OF(member_node, struct member, + node); } else { - lacp->key_slave = NULL; + lacp->key_member = NULL; } } - free(slave->name); - free(slave); + free(member->name); + free(member); } } static void -slave_set_defaulted(struct slave *slave) OVS_REQUIRES(mutex) +member_set_defaulted(struct member *member) OVS_REQUIRES(mutex) { - memset(&slave->partner, 0, sizeof slave->partner); + memset(&member->partner, 0, sizeof member->partner); - slave->lacp->update = true; - slave->status = LACP_DEFAULTED; + member->lacp->update = true; + member->status = LACP_DEFAULTED; } static void -slave_set_expired(struct slave *slave) OVS_REQUIRES(mutex) +member_set_expired(struct member *member) OVS_REQUIRES(mutex) { - slave->status = LACP_EXPIRED; - slave->partner.state |= LACP_STATE_TIME; - slave->partner.state &= ~LACP_STATE_SYNC; + member->status = LACP_EXPIRED; + member->partner.state |= LACP_STATE_TIME; + member->partner.state &= ~LACP_STATE_SYNC; - timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * LACP_FAST_TIME_TX); + timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * LACP_FAST_TIME_TX); } static void -slave_get_actor(struct slave *slave, struct lacp_info *actor) +member_get_actor(struct member *member, struct lacp_info *actor) OVS_REQUIRES(mutex) { - struct lacp *lacp = slave->lacp; + struct lacp *lacp = member->lacp; uint16_t key; uint8_t state = 0; @@ -800,62 +804,62 @@ slave_get_actor(struct slave *slave, struct lacp_info *actor) state |= LACP_STATE_TIME; } - if (slave->attached) { + if (member->attached) { state |= LACP_STATE_SYNC; } - if (slave->status == LACP_DEFAULTED) { + if (member->status == LACP_DEFAULTED) { state |= LACP_STATE_DEF; } - if (slave->status == LACP_EXPIRED) { + if (member->status == LACP_EXPIRED) { state |= LACP_STATE_EXP; } - if (hmap_count(&lacp->slaves) > 1) { + if (hmap_count(&lacp->members) > 1) { state |= LACP_STATE_AGG; } - if (slave->attached || !lacp->negotiated) { + if (member->attached || !lacp->negotiated) { state |= LACP_STATE_COL | LACP_STATE_DIST; } - key = lacp->key_slave->key; + key = lacp->key_member->key; if (!key) { - key = lacp->key_slave->port_id; + key = lacp->key_member->port_id; } actor->state = state; actor->key = htons(key); - actor->port_priority = htons(slave->port_priority); - actor->port_id = htons(slave->port_id); + actor->port_priority = htons(member->port_priority); + actor->port_id = htons(member->port_id); actor->sys_priority = htons(lacp->sys_priority); actor->sys_id = lacp->sys_id; } -/* Given 'slave', populates 'priority' with data representing its LACP link +/* Given 'member', populates 'priority' with data representing its LACP link * priority. If two priority objects populated by this function are compared * using memcmp, the higher priority link will be less than the lower priority * link. */ static void -slave_get_priority(struct slave *slave, struct lacp_info *priority) +member_get_priority(struct member *member, struct lacp_info *priority) OVS_REQUIRES(mutex) { uint16_t partner_priority, actor_priority; /* Choose the lacp_info of the higher priority system by comparing their * system priorities and mac addresses. */ - actor_priority = slave->lacp->sys_priority; - partner_priority = ntohs(slave->partner.sys_priority); + actor_priority = member->lacp->sys_priority; + partner_priority = ntohs(member->partner.sys_priority); if (actor_priority < partner_priority) { - slave_get_actor(slave, priority); + member_get_actor(member, priority); } else if (partner_priority < actor_priority) { - *priority = slave->partner; - } else if (eth_addr_compare_3way(slave->lacp->sys_id, - slave->partner.sys_id) < 0) { - slave_get_actor(slave, priority); + *priority = member->partner; + } else if (eth_addr_compare_3way(member->lacp->sys_id, + member->partner.sys_id) < 0) { + member_get_actor(member, priority); } else { - *priority = slave->partner; + *priority = member->partner; } /* Key and state are not used in priority comparisons. */ @@ -864,22 +868,22 @@ slave_get_priority(struct slave *slave, struct lacp_info *priority) } static bool -slave_may_tx(const struct slave *slave) OVS_REQUIRES(mutex) +member_may_tx(const struct member *member) OVS_REQUIRES(mutex) { /* Check for L1 state as well as LACP state. */ - return (slave->carrier_up) && ((slave->lacp->active) || - (slave->status != LACP_DEFAULTED)); + return (member->carrier_up) && ((member->lacp->active) || + (member->status != LACP_DEFAULTED)); } -static struct slave * -slave_lookup(const struct lacp *lacp, const void *slave_) OVS_REQUIRES(mutex) +static struct member * +member_lookup(const struct lacp *lacp, const void *member_) OVS_REQUIRES(mutex) { - struct slave *slave; + struct member *member; - HMAP_FOR_EACH_IN_BUCKET (slave, node, hash_pointer(slave_, 0), - &lacp->slaves) { - if (slave->aux == slave_) { - return slave; + HMAP_FOR_EACH_IN_BUCKET (member, node, hash_pointer(member_, 0), + &lacp->members) { + if (member->aux == member_) { + return member; } } @@ -961,10 +965,10 @@ ds_put_lacp_state(struct ds *ds, uint8_t state) static void lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) { - struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); - const struct shash_node **sorted_slaves = NULL; + struct shash member_shash = SHASH_INITIALIZER(&member_shash); + const struct shash_node **sorted_members = NULL; - struct slave *slave; + struct member *member; int i; ds_put_format(ds, "---- %s ----\n", lacp->name); @@ -977,10 +981,10 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) ds_put_format(ds, " sys_id: " ETH_ADDR_FMT "\n", ETH_ADDR_ARGS(lacp->sys_id)); ds_put_format(ds, " sys_priority: %u\n", lacp->sys_priority); ds_put_cstr(ds, " aggregation key: "); - if (lacp->key_slave) { - ds_put_format(ds, "%u", lacp->key_slave->key - ? lacp->key_slave->key - : lacp->key_slave->port_id); + if (lacp->key_member) { + ds_put_format(ds, "%u", lacp->key_member->key + ? lacp->key_member->key + : lacp->key_member->port_id); } else { ds_put_cstr(ds, "none"); } @@ -993,18 +997,18 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) ds_put_cstr(ds, "slow\n"); } - HMAP_FOR_EACH (slave, node, &lacp->slaves) { - shash_add(&slave_shash, slave->name, slave); + HMAP_FOR_EACH (member, node, &lacp->members) { + shash_add(&member_shash, member->name, member); } - sorted_slaves = shash_sort(&slave_shash); + sorted_members = shash_sort(&member_shash); - for (i = 0; i < shash_count(&slave_shash); i++) { + for (i = 0; i < shash_count(&member_shash); i++) { char *status; struct lacp_info actor; - slave = sorted_slaves[i]->data; - slave_get_actor(slave, &actor); - switch (slave->status) { + member = sorted_members[i]->data; + member_get_actor(member, &actor); + switch (member->status) { case LACP_CURRENT: status = "current"; break; @@ -1018,11 +1022,11 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) OVS_NOT_REACHED(); } - ds_put_format(ds, "\nslave: %s: %s %s\n", slave->name, status, - slave->attached ? "attached" : "detached"); - ds_put_format(ds, " port_id: %u\n", slave->port_id); - ds_put_format(ds, " port_priority: %u\n", slave->port_priority); - ds_put_format(ds, " may_enable: %s\n", (slave_may_enable__(slave) + ds_put_format(ds, "\nmember: %s: %s %s\n", member->name, status, + member->attached ? "attached" : "detached"); + ds_put_format(ds, " port_id: %u\n", member->port_id); + ds_put_format(ds, " port_priority: %u\n", member->port_priority); + ds_put_format(ds, " may_enable: %s\n", (member_may_enable__(member) ? "true" : "false")); ds_put_format(ds, "\n actor sys_id: " ETH_ADDR_FMT "\n", @@ -1040,58 +1044,58 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) ds_put_cstr(ds, "\n\n"); ds_put_format(ds, " partner sys_id: " ETH_ADDR_FMT "\n", - ETH_ADDR_ARGS(slave->partner.sys_id)); + ETH_ADDR_ARGS(member->partner.sys_id)); ds_put_format(ds, " partner sys_priority: %u\n", - ntohs(slave->partner.sys_priority)); + ntohs(member->partner.sys_priority)); ds_put_format(ds, " partner port_id: %u\n", - ntohs(slave->partner.port_id)); + ntohs(member->partner.port_id)); ds_put_format(ds, " partner port_priority: %u\n", - ntohs(slave->partner.port_priority)); + ntohs(member->partner.port_priority)); ds_put_format(ds, " partner key: %u\n", - ntohs(slave->partner.key)); + ntohs(member->partner.key)); ds_put_cstr(ds, " partner state:"); - ds_put_lacp_state(ds, slave->partner.state); + ds_put_lacp_state(ds, member->partner.state); ds_put_cstr(ds, "\n"); } - shash_destroy(&slave_shash); - free(sorted_slaves); + shash_destroy(&member_shash); + free(sorted_members); } static void lacp_print_stats(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex) { - struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); - const struct shash_node **sorted_slaves = NULL; + struct shash member_shash = SHASH_INITIALIZER(&member_shash); + const struct shash_node **sorted_members = NULL; - struct slave *slave; + struct member *member; int i; ds_put_format(ds, "---- %s statistics ----\n", lacp->name); - HMAP_FOR_EACH (slave, node, &lacp->slaves) { - shash_add(&slave_shash, slave->name, slave); + HMAP_FOR_EACH (member, node, &lacp->members) { + shash_add(&member_shash, member->name, member); } - sorted_slaves = shash_sort(&slave_shash); - - for (i = 0; i < shash_count(&slave_shash); i++) { - slave = sorted_slaves[i]->data; - ds_put_format(ds, "\nslave: %s:\n", slave->name); - ds_put_format(ds, " TX PDUs: %u\n", slave->count_tx_pdus); - ds_put_format(ds, " RX PDUs: %u\n", slave->count_rx_pdus); - ds_put_format(ds, " RX Bad PDUs: %u\n", slave->count_rx_pdus_bad); + sorted_members = shash_sort(&member_shash); + + for (i = 0; i < shash_count(&member_shash); i++) { + member = sorted_members[i]->data; + ds_put_format(ds, "\nmember: %s:\n", member->name); + ds_put_format(ds, " TX PDUs: %u\n", member->count_tx_pdus); + ds_put_format(ds, " RX PDUs: %u\n", member->count_rx_pdus); + ds_put_format(ds, " RX Bad PDUs: %u\n", member->count_rx_pdus_bad); ds_put_format(ds, " RX Marker Request PDUs: %u\n", - slave->count_rx_pdus_marker); + member->count_rx_pdus_marker); ds_put_format(ds, " Link Expired: %u\n", - slave->count_link_expired); + member->count_link_expired); ds_put_format(ds, " Link Defaulted: %u\n", - slave->count_link_defaulted); + member->count_link_defaulted); ds_put_format(ds, " Carrier Status Changed: %u\n", - slave->count_carrier_changed); + member->count_carrier_changed); } - shash_destroy(&slave_shash); - free(sorted_slaves); + shash_destroy(&member_shash); + free(sorted_members); } static void @@ -1152,27 +1156,28 @@ out: lacp_unlock(); } -/* Extract a snapshot of the current state and counters for a slave port. - Return false if the slave is not active. */ +/* Extract a snapshot of the current state and counters for a member port. + Return false if the member is not active. */ bool -lacp_get_slave_stats(const struct lacp *lacp, const void *slave_, struct lacp_slave_stats *stats) +lacp_get_member_stats(const struct lacp *lacp, const void *member_, + struct lacp_member_stats *stats) OVS_EXCLUDED(mutex) { - struct slave *slave; + struct member *member; struct lacp_info actor; bool ret; ovs_mutex_lock(&mutex); - slave = slave_lookup(lacp, slave_); - if (slave) { + member = member_lookup(lacp, member_); + if (member) { ret = true; - slave_get_actor(slave, &actor); + member_get_actor(member, &actor); stats->dot3adAggPortActorSystemID = actor.sys_id; - stats->dot3adAggPortPartnerOperSystemID = slave->partner.sys_id; - stats->dot3adAggPortAttachedAggID = (lacp->key_slave->key ? - lacp->key_slave->key : - lacp->key_slave->port_id); + stats->dot3adAggPortPartnerOperSystemID = member->partner.sys_id; + stats->dot3adAggPortAttachedAggID = (lacp->key_member->key ? + lacp->key_member->key : + lacp->key_member->port_id); /* Construct my admin-state. Assume aggregation is configured on. */ stats->dot3adAggPortActorAdminState = LACP_STATE_AGG; @@ -1189,12 +1194,12 @@ lacp_get_slave_stats(const struct lacp *lacp, const void *slave_, struct lacp_sl stats->dot3adAggPortPartnerAdminState = 0; stats->dot3adAggPortActorOperState = actor.state; - stats->dot3adAggPortPartnerOperState = slave->partner.state; + stats->dot3adAggPortPartnerOperState = member->partner.state; /* Read out the latest counters */ - stats->dot3adAggPortStatsLACPDUsRx = slave->count_rx_pdus; - stats->dot3adAggPortStatsIllegalRx = slave->count_rx_pdus_bad; - stats->dot3adAggPortStatsLACPDUsTx = slave->count_tx_pdus; + stats->dot3adAggPortStatsLACPDUsRx = member->count_rx_pdus; + stats->dot3adAggPortStatsIllegalRx = member->count_rx_pdus_bad; + stats->dot3adAggPortStatsLACPDUsTx = member->count_tx_pdus; } else { ret = false; } diff --git a/lib/lacp.h b/lib/lacp.h index d731ae9a6..908ec201c 100644 --- a/lib/lacp.h +++ b/lib/lacp.h @@ -46,32 +46,32 @@ struct lacp *lacp_ref(const struct lacp *); void lacp_configure(struct lacp *, const struct lacp_settings *); bool lacp_is_active(const struct lacp *); -bool lacp_process_packet(struct lacp *, const void *slave, +bool lacp_process_packet(struct lacp *, const void *member, const struct dp_packet *packet); enum lacp_status lacp_status(const struct lacp *); -struct lacp_slave_settings { +struct lacp_member_settings { char *name; /* Name (for debugging). */ uint16_t id; /* Port ID. */ uint16_t priority; /* Port priority. */ uint16_t key; /* Aggregation key. */ }; -void lacp_slave_register(struct lacp *, void *slave_, - const struct lacp_slave_settings *); -void lacp_slave_unregister(struct lacp *, const void *slave); -void lacp_slave_carrier_changed(const struct lacp *, const void *slave, - bool carrier_up); -bool lacp_slave_may_enable(const struct lacp *, const void *slave); -bool lacp_slave_is_current(const struct lacp *, const void *slave_); +void lacp_member_register(struct lacp *, void *member_, + const struct lacp_member_settings *); +void lacp_member_unregister(struct lacp *, const void *member); +void lacp_member_carrier_changed(const struct lacp *, const void *member, + bool carrier_up); +bool lacp_member_may_enable(const struct lacp *, const void *member); +bool lacp_member_is_current(const struct lacp *, const void *member_); /* Callback function for lacp_run() for sending a LACP PDU. */ -typedef void lacp_send_pdu(void *slave, const void *pdu, size_t pdu_size); +typedef void lacp_send_pdu(void *member, const void *pdu, size_t pdu_size); void lacp_run(struct lacp *, lacp_send_pdu *); void lacp_wait(struct lacp *); -struct lacp_slave_stats { +struct lacp_member_stats { /* id */ struct eth_addr dot3adAggPortActorSystemID; struct eth_addr dot3adAggPortPartnerOperSystemID; @@ -92,6 +92,7 @@ struct lacp_slave_stats { /* uint32_t dot3adAggPortStatsMarkerResponsePDUsTx; */ }; -bool lacp_get_slave_stats(const struct lacp *, const void *slave_, struct lacp_slave_stats *); +bool lacp_get_member_stats(const struct lacp *, const void *member_, + struct lacp_member_stats *); #endif /* lacp.h */ diff --git a/lib/lldp/lldp-const.h b/lib/lldp/lldp-const.h index eceb612d1..8c5c0733e 100644 --- a/lib/lldp/lldp-const.h +++ b/lib/lldp/lldp-const.h @@ -218,13 +218,13 @@ #define LLDPD_MODE_MAX LLDPD_MODE_FDP -/* Bond slave src mac type constants */ -#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_UNKNOWN 0 -#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_REAL 1 -#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_ZERO 2 -#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_FIXED 3 -#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_LOCALLY_ADMINISTERED 4 -#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_MAX \ - LLDP_BOND_SLAVE_SRC_MAC_TYPE_LOCALLY_ADMINISTERED +/* Bond member src mac type constants */ +#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_UNKNOWN 0 +#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_REAL 1 +#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_ZERO 2 +#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_FIXED 3 +#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_LOCALLY_ADMINISTERED 4 +#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_MAX \ + LLDP_BOND_MEMBER_SRC_MAC_TYPE_LOCALLY_ADMINISTERED #endif /* _LLDP_H */ diff --git a/lib/lldp/lldpd-structs.h b/lib/lldp/lldpd-structs.h index 6a3ffb8d3..fe5d5f9f8 100644 --- a/lib/lldp/lldpd-structs.h +++ b/lib/lldp/lldpd-structs.h @@ -135,8 +135,8 @@ struct lldpd_config { int c_set_ifdescr; /* Set interface description */ int c_promisc; /* Interfaces should be in promiscuous mode */ int c_tx_hold; /* Transmit hold */ - int c_bond_slave_src_mac_type; /* Src mac type in lldp frames over bond - * slaves */ + int c_bond_member_src_mac_type; /* Src mac type in lldp frames over bond + * member interfaces */ int c_lldp_portid_type; /* The PortID type */ }; @@ -158,9 +158,9 @@ struct lldpd_ops { }; /* An interface is uniquely identified by h_ifindex, h_ifname and h_ops. This - * means if an interface becomes enslaved, it will be considered as a new - * interface. The same applies for renaming and we include the index in case of - * renaming to an existing interface. + * means if an interface becomes a bond member, it will be considered as a + * new interface. The same applies for renaming and we include the index in + * case of renaming to an existing interface. */ struct lldpd_hardware { struct ovs_list h_entries; diff --git a/lib/mac-learning.c b/lib/mac-learning.c index f6183480d..9442858d9 100644 --- a/lib/mac-learning.c +++ b/lib/mac-learning.c @@ -384,7 +384,7 @@ is_mac_learning_update_needed(const struct mac_learning *ml, if (is_gratuitous_arp) { /* We don't want to learn from gratuitous ARP packets that are - * reflected back over bond slaves so we lock the learning table. For + * reflected back over bond members so we lock the learning table. For * more detail, see the bigger comment in update_learning_table__(). */ if (!is_bond) { return true; /* Need to set the gratuitous ARP lock. */ @@ -424,7 +424,7 @@ update_learning_table__(struct mac_learning *ml, struct eth_addr src, mac = mac_learning_insert(ml, src, vlan); if (is_gratuitous_arp) { /* Gratuitous ARP packets received over non-bond interfaces could be - * reflected back over bond slaves. We don't want to learn from these + * reflected back over bond members. We don't want to learn from these * reflected packets, so we lock each entry for which a gratuitous ARP * packet was received over a non-bond interface and refrain from * learning from gratuitous ARP packets that arrive over bond diff --git a/lib/mac-learning.h b/lib/mac-learning.h index ad2f1fe4e..0ddab06cb 100644 --- a/lib/mac-learning.h +++ b/lib/mac-learning.h @@ -95,7 +95,7 @@ struct mac_learning; #define MAC_ENTRY_DEFAULT_IDLE_TIME 300 /* Time, in seconds, to lock an entry updated by a gratuitous ARP to avoid - * relearning based on a reflection from a bond slave. */ + * relearning based on a reflection from a bond member. */ #define MAC_GRAT_ARP_LOCK_TIME 5 /* A MAC learning table entry. diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index fe7fb9b29..6be23dbee 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -257,15 +257,15 @@ enum { IOV_AUXBUF = 1, }; -struct linux_lag_slave { +struct linux_lag_member { uint32_t block_id; struct shash_node *node; }; -/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */ +/* Protects 'lag_shash' and the mutable members of struct linux_lag_member. */ static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER; -/* All slaves whose LAG masters are network devices in OvS. */ +/* All members whose LAG primary interfaces are OVS network devices. */ static struct shash lag_shash OVS_GUARDED_BY(lag_mutex) = SHASH_INITIALIZER(&lag_shash); @@ -661,9 +661,9 @@ static void netdev_linux_update_lag(struct rtnetlink_change *change) OVS_REQUIRES(lag_mutex) { - struct linux_lag_slave *lag; + struct linux_lag_member *lag; - if (change->slave && netdev_linux_kind_is_lag(change->slave)) { + if (change->sub && netdev_linux_kind_is_lag(change->sub)) { lag = shash_find_data(&lag_shash, change->ifname); if (!lag) { @@ -691,12 +691,12 @@ netdev_linux_update_lag(struct rtnetlink_change *change) /* delete ingress block in case it exists */ tc_add_del_qdisc(change->if_index, false, 0, TC_INGRESS); - /* LAG master is linux netdev so add slave to same block. */ + /* LAG master is linux netdev so add member to same block. */ error = tc_add_del_qdisc(change->if_index, true, block_id, TC_INGRESS); if (error) { - VLOG_WARN("failed to bind LAG slave %s to master's block", - change->ifname); + VLOG_WARN("failed to bind LAG member %s to " + "primary's block", change->ifname); shash_delete(&lag_shash, lag->node); free(lag); } @@ -705,7 +705,7 @@ netdev_linux_update_lag(struct rtnetlink_change *change) netdev_close(master_netdev); } } else if (change->master_ifindex == 0) { - /* Check if this was a lag slave that has been freed. */ + /* Check if this was a lag member that has been removed. */ lag = shash_find_data(&lag_shash, change->ifname); if (lag) { @@ -860,7 +860,7 @@ netdev_linux_update__(struct netdev_linux *dev, rtnetlink_report_link(); } - if (change->master && netdev_linux_kind_is_lag(change->master)) { + if (change->primary && netdev_linux_kind_is_lag(change->primary)) { dev->is_lag_master = true; } @@ -6376,7 +6376,7 @@ netdev_linux_update_via_netlink(struct netdev_linux *netdev) netdev->get_ifindex_error = 0; changed = true; } - if (change->master && netdev_linux_kind_is_lag(change->master)) { + if (change->primary && netdev_linux_kind_is_lag(change->primary)) { netdev->is_lag_master = true; } if (changed) { diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index be08a53fd..e2e829772 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -1334,39 +1334,39 @@ check_OUTPUT_REG(const struct ofpact_output_reg *a, /* Action structure for NXAST_BUNDLE and NXAST_BUNDLE_LOAD. * - * The bundle actions choose a slave from a supplied list of options. + * The bundle actions choose a member from a supplied list of options. * NXAST_BUNDLE outputs to its selection. NXAST_BUNDLE_LOAD writes its * selection to a register. * - * The list of possible slaves follows the nx_action_bundle structure. The size - * of each slave is governed by its type as indicated by the 'slave_type' - * parameter. The list of slaves should be padded at its end with zeros to make - * the total length of the action a multiple of 8. + * The list of possible members follows the nx_action_bundle structure. The + * size of each member is governed by its type as indicated by the + * 'member_type' parameter. The list of members should be padded at its end + * with zeros to make the total length of the action a multiple of 8. * - * Switches infer from the 'slave_type' parameter the size of each slave. All - * implementations must support the NXM_OF_IN_PORT 'slave_type' which indicates - * that the slaves are OpenFlow port numbers with NXM_LENGTH(NXM_OF_IN_PORT) == - * 2 byte width. Switches should reject actions which indicate unknown or - * unsupported slave types. + * Switches infer from the 'member_type' parameter the size of each member. + * All implementations must support the NXM_OF_IN_PORT 'member_type' which + * indicates that the members are OpenFlow port numbers with + * NXM_LENGTH(NXM_OF_IN_PORT) == 2 byte width. Switches should reject actions + * which indicate unknown or unsupported member types. * * Switches use a strategy dictated by the 'algorithm' parameter to choose a - * slave. If the switch does not support the specified 'algorithm' parameter, + * member. If the switch does not support the specified 'algorithm' parameter, * it should reject the action. * - * Several algorithms take into account liveness when selecting slaves. The - * liveness of a slave is implementation defined (with one exception), but will - * generally take into account things like its carrier status and the results - * of any link monitoring protocols which happen to be running on it. In order - * to give controllers a place-holder value, the OFPP_NONE port is always - * considered live, that is, NXAST_BUNDLE_LOAD stores OFPP_NONE in the output - * register if no slave is live. - * - * Some slave selection strategies require the use of a hash function, in which - * case the 'fields' and 'basis' parameters should be populated. The 'fields' - * parameter (one of NX_HASH_FIELDS_*) designates which parts of the flow to - * hash. Refer to the definition of "enum nx_hash_fields" for details. The - * 'basis' parameter is used as a universal hash parameter. Different values - * of 'basis' yield different hash results. + * Several algorithms take into account liveness when selecting members. The + * liveness of a member is implementation defined (with one exception), but + * will generally take into account things like its carrier status and the + * results of any link monitoring protocols which happen to be running on it. + * In order to give controllers a place-holder value, the OFPP_NONE port is + * always considered live, that is, NXAST_BUNDLE_LOAD stores OFPP_NONE in the + * output register if no member is live. + * + * Some member selection strategies require the use of a hash function, in + * which case the 'fields' and 'basis' parameters should be populated. The + * 'fields' parameter (one of NX_HASH_FIELDS_*) designates which parts of the + * flow to hash. Refer to the definition of "enum nx_hash_fields" for details. + * The 'basis' parameter is used as a universal hash parameter. Different + * values of 'basis' yield different hash results. * * The 'zero' parameter at the end of the action structure is reserved for * future use. Switches are required to reject actions which have nonzero @@ -1375,24 +1375,24 @@ check_OUTPUT_REG(const struct ofpact_output_reg *a, * NXAST_BUNDLE actions should have 'ofs_nbits' and 'dst' zeroed. Switches * should reject actions which have nonzero bytes in either of these fields. * - * NXAST_BUNDLE_LOAD stores the OpenFlow port number of the selected slave in + * NXAST_BUNDLE_LOAD stores the OpenFlow port number of the selected member in * dst[ofs:ofs+n_bits]. The format and semantics of 'dst' and 'ofs_nbits' are * similar to those for the NXAST_REG_LOAD action. */ struct nx_action_bundle { ovs_be16 type; /* OFPAT_VENDOR. */ - ovs_be16 len; /* Length including slaves. */ + ovs_be16 len; /* Length including members. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_BUNDLE or NXAST_BUNDLE_LOAD. */ - /* Slave choice algorithm to apply to hash value. */ + /* Member choice algorithm to apply to hash value. */ ovs_be16 algorithm; /* One of NX_BD_ALG_*. */ /* What fields to hash and how. */ ovs_be16 fields; /* One of NX_HASH_FIELDS_*. */ ovs_be16 basis; /* Universal hash parameter. */ - ovs_be32 slave_type; /* NXM_OF_IN_PORT. */ - ovs_be16 n_slaves; /* Number of slaves. */ + ovs_be32 member_type; /* NXM_OF_IN_PORT. */ + ovs_be16 n_members; /* Number of members. */ ovs_be16 ofs_nbits; /* (ofs << 6) | (n_bits - 1). */ ovs_be32 dst; /* Destination. */ @@ -1408,29 +1408,29 @@ decode_bundle(bool load, const struct nx_action_bundle *nab, { static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5); struct ofpact_bundle *bundle; - uint32_t slave_type; - size_t slaves_size, i; + uint32_t member_type; + size_t members_size, i; enum ofperr error; bundle = ofpact_put_BUNDLE(ofpacts); - bundle->n_slaves = ntohs(nab->n_slaves); + bundle->n_members = ntohs(nab->n_members); bundle->basis = ntohs(nab->basis); bundle->fields = ntohs(nab->fields); bundle->algorithm = ntohs(nab->algorithm); - slave_type = ntohl(nab->slave_type); - slaves_size = ntohs(nab->len) - sizeof *nab; + member_type = ntohl(nab->member_type); + members_size = ntohs(nab->len) - sizeof *nab; error = OFPERR_OFPBAC_BAD_ARGUMENT; if (!flow_hash_fields_valid(bundle->fields)) { VLOG_WARN_RL(&rll, "unsupported fields %d", (int) bundle->fields); - } else if (bundle->n_slaves > BUNDLE_MAX_SLAVES) { - VLOG_WARN_RL(&rll, "too many slaves"); + } else if (bundle->n_members > BUNDLE_MAX_MEMBERS) { + VLOG_WARN_RL(&rll, "too many members"); } else if (bundle->algorithm != NX_BD_ALG_HRW && bundle->algorithm != NX_BD_ALG_ACTIVE_BACKUP) { VLOG_WARN_RL(&rll, "unsupported algorithm %d", (int) bundle->algorithm); - } else if (slave_type != mf_nxm_header(MFF_IN_PORT)) { - VLOG_WARN_RL(&rll, "unsupported slave type %"PRIu32, slave_type); + } else if (member_type != mf_nxm_header(MFF_IN_PORT)) { + VLOG_WARN_RL(&rll, "unsupported member type %"PRIu32, member_type); } else { error = 0; } @@ -1461,15 +1461,15 @@ decode_bundle(bool load, const struct nx_action_bundle *nab, } } - if (slaves_size < bundle->n_slaves * sizeof(ovs_be16)) { + if (members_size < bundle->n_members * sizeof(ovs_be16)) { VLOG_WARN_RL(&rll, "Nicira action %s only has %"PRIuSIZE" bytes " - "allocated for slaves. %"PRIuSIZE" bytes are required " - "for %u slaves.", - load ? "bundle_load" : "bundle", slaves_size, - bundle->n_slaves * sizeof(ovs_be16), bundle->n_slaves); + "allocated for members. %"PRIuSIZE" bytes are " + "required for %u members.", + load ? "bundle_load" : "bundle", members_size, + bundle->n_members * sizeof(ovs_be16), bundle->n_members); error = OFPERR_OFPBAC_BAD_LEN; } else { - for (i = 0; i < bundle->n_slaves; i++) { + for (i = 0; i < bundle->n_members; i++) { ofp_port_t ofp_port = u16_to_ofp(ntohs(((ovs_be16 *)(nab + 1))[i])); ofpbuf_put(ofpacts, &ofp_port, sizeof ofp_port); @@ -1506,29 +1506,29 @@ encode_BUNDLE(const struct ofpact_bundle *bundle, enum ofp_version ofp_version OVS_UNUSED, struct ofpbuf *out) { - int slaves_len = ROUND_UP(2 * bundle->n_slaves, OFP_ACTION_ALIGN); + int members_len = ROUND_UP(2 * bundle->n_members, OFP_ACTION_ALIGN); struct nx_action_bundle *nab; - ovs_be16 *slaves; + ovs_be16 *members; size_t i; nab = (bundle->dst.field ? put_NXAST_BUNDLE_LOAD(out) : put_NXAST_BUNDLE(out)); - nab->len = htons(ntohs(nab->len) + slaves_len); + nab->len = htons(ntohs(nab->len) + members_len); nab->algorithm = htons(bundle->algorithm); nab->fields = htons(bundle->fields); nab->basis = htons(bundle->basis); - nab->slave_type = htonl(mf_nxm_header(MFF_IN_PORT)); - nab->n_slaves = htons(bundle->n_slaves); + nab->member_type = htonl(mf_nxm_header(MFF_IN_PORT)); + nab->n_members = htons(bundle->n_members); if (bundle->dst.field) { nab->ofs_nbits = nxm_encode_ofs_nbits(bundle->dst.ofs, bundle->dst.n_bits); nab->dst = htonl(nxm_header_from_mff(bundle->dst.field)); } - slaves = ofpbuf_put_zeros(out, slaves_len); - for (i = 0; i < bundle->n_slaves; i++) { - slaves[i] = htons(ofp_to_u16(bundle->slaves[i])); + members = ofpbuf_put_zeros(out, members_len); + for (i = 0; i < bundle->n_members; i++) { + members[i] = htons(ofp_to_u16(bundle->members[i])); } } @@ -3585,7 +3585,7 @@ check_STACK_POP(const struct ofpact_stack *a, */ struct nx_action_cnt_ids { ovs_be16 type; /* OFPAT_VENDOR. */ - ovs_be16 len; /* Length including slaves. */ + ovs_be16 len; /* Length including cnt_ids. */ ovs_be32 vendor; /* NX_VENDOR_ID. */ ovs_be16 subtype; /* NXAST_DEC_TTL_CNT_IDS. */ diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml index 7169b15c0..a2778de4b 100644 --- a/lib/ovs-actions.xml +++ b/lib/ovs-actions.xml @@ -789,15 +789,16 @@ $ ovs-ofctl -O OpenFlow10 add-flow br0 actions=mod_nw_src:1.2.3.4

    The bundle and bundle_load actions

    - bundle(fields, basis, algorithm, ofport, slaves:port...) - bundle_load(fields, basis, algorithm, ofport, dst, slaves:port...) + bundle(fields, basis, algorithm, ofport, members:port...) + bundle_load(fields, basis, algorithm, ofport, dst, members:port...)

    - These actions choose a port (``slave'') from a comma-separated OpenFlow - port list. After selecting the port, bundle - outputs to it, whereas bundle_load writes its port number - to dst, which must be a 16-bit or wider field or subfield in - the syntax described under ``Field Specifications'' above. + These actions choose a port (a ``member'') from a + comma-separated OpenFlow port list. After selecting the + port, bundle outputs to it, whereas + bundle_load writes its port number to dst, + which must be a 16-bit or wider field or subfield in the syntax + described under ``Field Specifications'' above.

    @@ -854,20 +855,20 @@ $ ovs-ofctl -O OpenFlow10 add-flow br0 actions=mod_nw_src:1.2.3.4

    active_backup
    - Chooses the first live port listed in slaves. + Chooses the first live port listed in members.
    hrw (Highest Random Weight)

    Computes the following, considering only the live ports in - slaves: + members:

    -for i in [1,n_slaves]:
    +for i in [1,n_members]:
         weights[i] = hash(flow, i)
    -slave = { i such that weights[i] >= weights[j] for all j != i }
    +member = { i such that weights[i] >= weights[j] for all j != i }
               

    @@ -877,17 +878,17 @@ for i in [1,n_slaves]:

    - The algorithms take port liveness into account when selecting slaves. - The definition of whether a port is live is subject to change. It - currently takes into account carrier status and link monitoring - protocols such as BFD and CFM. If none of the slaves is live, - bundle does not output the packet and + The algorithms take port liveness into account when selecting + members. The definition of whether a port is live is subject to + change. It currently takes into account carrier status and link + monitoring protocols such as BFD and CFM. If none of the members is + live, bundle does not output the packet and bundle_load stores OFPP_NONE (65535) in the output field.

    - Example: bundle(eth_src,0,hrw,ofport,slaves:4,8) uses an + Example: bundle(eth_src,0,hrw,ofport,members:4,8) uses an Ethernet source hash with basis 0, to select between OpenFlow ports 4 and 8 using the Highest Random Weight algorithm.

    diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c index f822dffc7..125802925 100644 --- a/lib/rtnetlink.c +++ b/lib/rtnetlink.c @@ -68,12 +68,12 @@ rtnetlink_parse_link_info(const struct nlattr *nla, ARRAY_SIZE(linkinfo_policy)); if (parsed) { - change->master = (linkinfo[IFLA_INFO_KIND] - ? nl_attr_get_string(linkinfo[IFLA_INFO_KIND]) - : NULL); - change->slave = (linkinfo[IFLA_INFO_SLAVE_KIND] - ? nl_attr_get_string(linkinfo[IFLA_INFO_SLAVE_KIND]) - : NULL); + change->primary = (linkinfo[IFLA_INFO_KIND] + ? nl_attr_get_string(linkinfo[IFLA_INFO_KIND]) + : NULL); + change->sub = (linkinfo[IFLA_INFO_SLAVE_KIND] + ? nl_attr_get_string(linkinfo[IFLA_INFO_SLAVE_KIND]) + : NULL); } return parsed; @@ -134,8 +134,8 @@ rtnetlink_parse(struct ofpbuf *buf, struct rtnetlink_change *change) parsed = rtnetlink_parse_link_info(attrs[IFLA_LINKINFO], change); } else { - change->master = NULL; - change->slave = NULL; + change->primary = NULL; + change->sub = NULL; } } } else if (rtnetlink_type_is_rtnlgrp_addr(nlmsg->nlmsg_type)) { diff --git a/lib/rtnetlink.h b/lib/rtnetlink.h index 422d1db11..b6ddb4bd1 100644 --- a/lib/rtnetlink.h +++ b/lib/rtnetlink.h @@ -49,9 +49,9 @@ struct rtnetlink_change { /* Network device address status. */ /* xxx To be added when needed. */ - /* Link info. */ - const char *master; /* Kind of master (NULL if not master). */ - const char *slave; /* Kind of slave (NULL if not slave). */ + /* Link bonding info. */ + const char *primary; /* Kind of primary (NULL if not primary). */ + const char *sub; /* Kind of subordinate (NULL if not sub). */ }; /* Function called to report that a netdev has changed. 'change' describes the diff --git a/lib/unixctl.c b/lib/unixctl.c index c216de3d0..69aed6722 100644 --- a/lib/unixctl.c +++ b/lib/unixctl.c @@ -77,7 +77,9 @@ unixctl_list_commands(struct unixctl_conn *conn, int argc OVS_UNUSED, const struct shash_node *node = nodes[i]; const struct unixctl_command *command = node->data; - ds_put_format(&ds, " %-23s %s\n", node->name, command->usage); + if (command->usage) { + ds_put_format(&ds, " %-23s %s\n", node->name, command->usage); + } } free(nodes); @@ -94,7 +96,7 @@ unixctl_version(struct unixctl_conn *conn, int argc OVS_UNUSED, /* Registers a unixctl command with the given 'name'. 'usage' describes the * arguments to the command; it is used only for presentation to the user in - * "list-commands" output. + * "list-commands" output. (If 'usage' is NULL, then the command is hidden.) * * 'cb' is called when the command is received. It is passed an array * containing the command name and arguments, plus a copy of 'aux'. Normally diff --git a/ofproto/bond.c b/ofproto/bond.c index 40c9408bc..35b9caac0 100644 --- a/ofproto/bond.c +++ b/ofproto/bond.c @@ -57,13 +57,13 @@ static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__; /* Priority for internal rules created to handle recirculation */ #define RECIRC_RULE_PRIORITY 20 -/* A hash bucket for mapping a flow to a slave. +/* A hash bucket for mapping a flow to a member interface. * "struct bond" has an array of BOND_BUCKETS of these. */ struct bond_entry { - struct bond_slave *slave; /* Assigned slave, NULL if unassigned. */ + struct bond_member *member; /* Assigned member, NULL if unassigned. */ uint64_t tx_bytes /* Count of bytes recently transmitted. */ OVS_GUARDED_BY(rwlock); - struct ovs_list list_node; /* In bond_slave's 'entries' list. */ + struct ovs_list list_node; /* In bond_member's 'entries' list. */ /* Recirculation. * @@ -74,12 +74,12 @@ struct bond_entry { uint64_t pr_tx_bytes OVS_GUARDED_BY(rwlock); }; -/* A bond slave, that is, one of the links comprising a bond. */ -struct bond_slave { - struct hmap_node hmap_node; /* In struct bond's slaves hmap. */ - struct ovs_list list_node; /* In struct bond's enabled_slaves list. */ - struct bond *bond; /* The bond that contains this slave. */ - void *aux; /* Client-provided handle for this slave. */ +/* A bond member interface, that is, one of the links comprising a bond. */ +struct bond_member { + struct hmap_node hmap_node; /* In struct bond's members hmap. */ + struct ovs_list list_node; /* In struct bond's enabled_members list. */ + struct bond *bond; /* The bond that contains this member. */ + void *aux; /* Client-provided handle for this member. */ struct netdev *netdev; /* Network device, owned by the client. */ uint64_t change_seq; /* Tracks changes in 'netdev'. */ @@ -88,8 +88,8 @@ struct bond_slave { /* Link status. */ bool enabled; /* May be chosen for flows? */ - bool may_enable; /* Client considers this slave bondable. */ - bool is_primary; /* This slave is preferred over others. */ + bool may_enable; /* Client considers this member bondable. */ + bool is_primary; /* This member is preferred over others. */ long long delay_expires; /* Time after which 'enabled' may change. */ /* Rebalancing info. Used only by bond_rebalance(). */ @@ -105,27 +105,27 @@ struct bond { char *name; /* Name provided by client. */ struct ofproto_dpif *ofproto; /* The bridge this bond belongs to. */ - /* Slaves. */ - struct hmap slaves; + /* Members. */ + struct hmap members; - /* Enabled slaves. + /* Enabled members. * - * Any reader or writer of 'enabled_slaves' must hold 'mutex'. - * (To prevent the bond_slave from disappearing they must also hold + * Any reader or writer of 'enabled_members' must hold 'mutex'. + * (To prevent the bond_member from disappearing they must also hold * 'rwlock'.) */ struct ovs_mutex mutex OVS_ACQ_AFTER(rwlock); - struct ovs_list enabled_slaves OVS_GUARDED; /* Contains struct bond_slaves. */ + struct ovs_list enabled_members OVS_GUARDED; /* Of struct bond_members. */ /* Bonding info. */ enum bond_mode balance; /* Balancing mode, one of BM_*. */ - struct bond_slave *active_slave; - int updelay, downdelay; /* Delay before slave goes up/down, in ms. */ + struct bond_member *active_member; + int updelay, downdelay; /* Delay before member goes up/down, in ms. */ enum lacp_status lacp_status; /* Status of LACP negotiations. */ bool bond_revalidate; /* True if flows need revalidation. */ uint32_t basis; /* Basis for flow hash function. */ bool use_lb_output_action; /* Use lb_output action to avoid recirculation. Applicable only for Balance TCP mode. */ - char *primary; /* Name of the primary slave interface. */ + char *primary; /* Name of the primary member. */ /* SLB specific bonding info. */ struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */ @@ -135,15 +135,14 @@ struct bond { uint32_t recirc_id; /* Non zero if recirculation can be used.*/ struct hmap pr_rule_ops; /* Helps to maintain post recirculation rules.*/ - /* Store active slave to OVSDB. */ - bool active_slave_changed; /* Set to true whenever the bond changes - active slave. It will be reset to false - after it is stored into OVSDB */ + /* Store active member to OVSDB. */ + bool active_member_changed; /* Set to true whenever the bond changes active + * member. It will be reset to false after + * it is stored into OVSDB */ /* Interface name may not be persistent across an OS reboot, use - * MAC address for identifing the active slave */ - struct eth_addr active_slave_mac; - /* The MAC address of the active interface. */ + * MAC address for identifing the active member. */ + struct eth_addr active_member_mac; /* MAC address of the active member. */ /* Legacy compatibility. */ bool lacp_fallback_ab; /* Fallback to active-backup on LACP failure. */ @@ -166,24 +165,24 @@ struct bond_pr_rule_op { }; static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock); -static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_) +static struct bond_member *bond_member_lookup(struct bond *, const void *member_) OVS_REQ_RDLOCK(rwlock); -static void bond_enable_slave(struct bond_slave *, bool enable) +static void bond_enable_member(struct bond_member *, bool enable) OVS_REQ_WRLOCK(rwlock); -static void bond_link_status_update(struct bond_slave *) +static void bond_link_status_update(struct bond_member *) OVS_REQ_WRLOCK(rwlock); -static void bond_choose_active_slave(struct bond *) +static void bond_choose_active_member(struct bond *) OVS_REQ_WRLOCK(rwlock); static struct bond_entry *lookup_bond_entry(const struct bond *, const struct flow *, uint16_t vlan) OVS_REQ_RDLOCK(rwlock); -static struct bond_slave *get_enabled_slave(struct bond *) +static struct bond_member *get_enabled_member(struct bond *) OVS_REQ_RDLOCK(rwlock); -static struct bond_slave *choose_output_slave(const struct bond *, - const struct flow *, - struct flow_wildcards *, - uint16_t vlan) +static struct bond_member *choose_output_member(const struct bond *, + const struct flow *, + struct flow_wildcards *, + uint16_t vlan) OVS_REQ_RDLOCK(rwlock); static void update_recirc_rules__(struct bond *); static bool bond_is_falling_back_to_ab(const struct bond *); @@ -226,8 +225,8 @@ bond_mode_to_string(enum bond_mode balance) { /* Creates and returns a new bond whose configuration is initially taken from * 's'. * - * The caller should register each slave on the new bond by calling - * bond_slave_register(). */ + * The caller should register each member on the new bond by calling + * bond_member_register(). */ struct bond * bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto) { @@ -235,14 +234,14 @@ bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto) bond = xzalloc(sizeof *bond); bond->ofproto = ofproto; - hmap_init(&bond->slaves); - ovs_list_init(&bond->enabled_slaves); + hmap_init(&bond->members); + ovs_list_init(&bond->enabled_members); ovs_mutex_init(&bond->mutex); ovs_refcount_init(&bond->ref_cnt); hmap_init(&bond->pr_rule_ops); - bond->active_slave_mac = eth_addr_zero; - bond->active_slave_changed = false; + bond->active_member_mac = eth_addr_zero; + bond->active_member_changed = false; bond->primary = NULL; bond_reconfigure(bond, s); @@ -264,7 +263,7 @@ bond_ref(const struct bond *bond_) void bond_unref(struct bond *bond) { - struct bond_slave *slave; + struct bond_member *member; if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) { return; @@ -274,12 +273,12 @@ bond_unref(struct bond *bond) hmap_remove(all_bonds, &bond->hmap_node); ovs_rwlock_unlock(&rwlock); - HMAP_FOR_EACH_POP (slave, hmap_node, &bond->slaves) { - /* Client owns 'slave->netdev'. */ - free(slave->name); - free(slave); + HMAP_FOR_EACH_POP (member, hmap_node, &bond->members) { + /* Client owns 'member->netdev'. */ + free(member->name); + free(member); } - hmap_destroy(&bond->slaves); + hmap_destroy(&bond->members); ovs_mutex_destroy(&bond->mutex); @@ -357,14 +356,14 @@ update_recirc_rules__(struct bond *bond) return; } else { for (i = 0; i < BOND_BUCKETS; i++) { - struct bond_slave *slave = bond->hash[i].slave; + struct bond_member *member = bond->hash[i].member; - if (slave) { + if (member) { match_init_catchall(&match); match_set_recirc_id(&match, bond->recirc_id); match_set_dp_hash_masked(&match, i, BOND_MASK); - add_pr_rule(bond, &match, slave->ofp_port, + add_pr_rule(bond, &match, member->ofp_port, &bond->hash[i].pr_rule); } } @@ -425,8 +424,8 @@ update_recirc_rules(struct bond *bond) /* Updates 'bond''s overall configuration to 's'. * - * The caller should register each slave on 'bond' by calling - * bond_slave_register(). This is optional if none of the slaves' + * The caller should register each member on 'bond' by calling + * bond_member_register(). This is optional if none of the members' * configuration has changed. In any case it can't hurt. * * Returns true if the configuration has changed in such a way that requires @@ -515,21 +514,21 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s) return revalidate; } -static struct bond_slave * -bond_find_slave_by_mac(const struct bond *bond, const struct eth_addr mac) +static struct bond_member * +bond_find_member_by_mac(const struct bond *bond, const struct eth_addr mac) { - struct bond_slave *slave; + struct bond_member *member; - /* Find the last active slave */ - HMAP_FOR_EACH(slave, hmap_node, &bond->slaves) { - struct eth_addr slave_mac; + /* Find the last active member */ + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + struct eth_addr member_mac; - if (netdev_get_etheraddr(slave->netdev, &slave_mac)) { + if (netdev_get_etheraddr(member->netdev, &member_mac)) { continue; } - if (eth_addr_equals(slave_mac, mac)) { - return slave; + if (eth_addr_equals(member_mac, mac)) { + return member; } } @@ -537,144 +536,144 @@ bond_find_slave_by_mac(const struct bond *bond, const struct eth_addr mac) } static void -bond_active_slave_changed(struct bond *bond) +bond_active_member_changed(struct bond *bond) { - if (bond->active_slave) { + if (bond->active_member) { struct eth_addr mac; - netdev_get_etheraddr(bond->active_slave->netdev, &mac); - bond->active_slave_mac = mac; + netdev_get_etheraddr(bond->active_member->netdev, &mac); + bond->active_member_mac = mac; } else { - bond->active_slave_mac = eth_addr_zero; + bond->active_member_mac = eth_addr_zero; } - bond->active_slave_changed = true; + bond->active_member_changed = true; seq_change(connectivity_seq_get()); } static void -bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev) +bond_member_set_netdev__(struct bond_member *member, struct netdev *netdev) OVS_REQ_WRLOCK(rwlock) { - if (slave->netdev != netdev) { - slave->netdev = netdev; - slave->change_seq = 0; + if (member->netdev != netdev) { + member->netdev = netdev; + member->change_seq = 0; } } -/* Registers 'slave_' as a slave of 'bond'. The 'slave_' pointer is an - * arbitrary client-provided pointer that uniquely identifies a slave within a - * bond. If 'slave_' already exists within 'bond' then this function - * reconfigures the existing slave. +/* Registers 'member_' as a member interface of 'bond'. The 'member_' pointer + * is an arbitrary client-provided pointer that uniquely identifies a member + * within a bond. If 'member_' already exists within 'bond' then this function + * reconfigures the existing member. * - * 'netdev' must be the network device that 'slave_' represents. It is owned + * 'netdev' must be the network device that 'member_' represents. It is owned * by the client, so the client must not close it before either unregistering - * 'slave_' or destroying 'bond'. + * 'member_' or destroying 'bond'. */ void -bond_slave_register(struct bond *bond, void *slave_, - ofp_port_t ofport, struct netdev *netdev) +bond_member_register(struct bond *bond, void *member_, + ofp_port_t ofport, struct netdev *netdev) { - struct bond_slave *slave; + struct bond_member *member; ovs_rwlock_wrlock(&rwlock); - slave = bond_slave_lookup(bond, slave_); - if (!slave) { - slave = xzalloc(sizeof *slave); - - hmap_insert(&bond->slaves, &slave->hmap_node, hash_pointer(slave_, 0)); - slave->bond = bond; - slave->aux = slave_; - slave->ofp_port = ofport; - slave->delay_expires = LLONG_MAX; - slave->name = xstrdup(netdev_get_name(netdev)); + member = bond_member_lookup(bond, member_); + if (!member) { + member = xzalloc(sizeof *member); + + hmap_insert(&bond->members, &member->hmap_node, hash_pointer(member_, 0)); + member->bond = bond; + member->aux = member_; + member->ofp_port = ofport; + member->delay_expires = LLONG_MAX; + member->name = xstrdup(netdev_get_name(netdev)); bond->bond_revalidate = true; - slave->enabled = false; - bond_enable_slave(slave, netdev_get_carrier(netdev)); + member->enabled = false; + bond_enable_member(member, netdev_get_carrier(netdev)); } - bond_slave_set_netdev__(slave, netdev); + bond_member_set_netdev__(member, netdev); - free(slave->name); - slave->name = xstrdup(netdev_get_name(netdev)); - if (bond->primary && !strcmp(bond->primary, slave->name)) { - slave->is_primary = true; + free(member->name); + member->name = xstrdup(netdev_get_name(netdev)); + if (bond->primary && !strcmp(bond->primary, member->name)) { + member->is_primary = true; } else { - slave->is_primary = false; + member->is_primary = false; } ovs_rwlock_unlock(&rwlock); } -/* Updates the network device to be used with 'slave_' to 'netdev'. +/* Updates the network device to be used with 'member_' to 'netdev'. * * This is useful if the caller closes and re-opens the network device - * registered with bond_slave_register() but doesn't need to change anything + * registered with bond_member_register() but doesn't need to change anything * else. */ void -bond_slave_set_netdev(struct bond *bond, void *slave_, struct netdev *netdev) +bond_member_set_netdev(struct bond *bond, void *member_, struct netdev *netdev) { - struct bond_slave *slave; + struct bond_member *member; ovs_rwlock_wrlock(&rwlock); - slave = bond_slave_lookup(bond, slave_); - if (slave) { - bond_slave_set_netdev__(slave, netdev); + member = bond_member_lookup(bond, member_); + if (member) { + bond_member_set_netdev__(member, netdev); } ovs_rwlock_unlock(&rwlock); } -/* Unregisters 'slave_' from 'bond'. If 'bond' does not contain such a slave - * then this function has no effect. +/* Unregisters 'member_' from 'bond'. If 'bond' does not contain such a + * member then this function has no effect. * - * Unregistering a slave invalidates all flows. */ + * Unregistering a member invalidates all flows. */ void -bond_slave_unregister(struct bond *bond, const void *slave_) +bond_member_unregister(struct bond *bond, const void *member_) { - struct bond_slave *slave; + struct bond_member *member; bool del_active; ovs_rwlock_wrlock(&rwlock); - slave = bond_slave_lookup(bond, slave_); - if (!slave) { + member = bond_member_lookup(bond, member_); + if (!member) { goto out; } bond->bond_revalidate = true; - bond_enable_slave(slave, false); + bond_enable_member(member, false); - del_active = bond->active_slave == slave; + del_active = bond->active_member == member; if (bond->hash) { struct bond_entry *e; for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) { - if (e->slave == slave) { - e->slave = NULL; + if (e->member == member) { + e->member = NULL; } } } - free(slave->name); + free(member->name); - hmap_remove(&bond->slaves, &slave->hmap_node); - /* Client owns 'slave->netdev'. */ - free(slave); + hmap_remove(&bond->members, &member->hmap_node); + /* Client owns 'member->netdev'. */ + free(member); if (del_active) { - bond_choose_active_slave(bond); + bond_choose_active_member(bond); bond->send_learning_packets = true; } out: ovs_rwlock_unlock(&rwlock); } -/* Should be called on each slave in 'bond' before bond_run() to indicate - * whether or not 'slave_' may be enabled. This function is intended to allow +/* Should be called on each member in 'bond' before bond_run() to indicate + * whether or not 'member_' may be enabled. This function is intended to allow * other protocols to have some impact on bonding decisions. For example LACP - * or high level link monitoring protocols may decide that a given slave should - * not be able to send traffic. */ + * or high level link monitoring protocols may decide that a given member + * should not be able to send traffic. */ void -bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable) +bond_member_set_may_enable(struct bond *bond, void *member_, bool may_enable) { ovs_rwlock_wrlock(&rwlock); - bond_slave_lookup(bond, slave_)->may_enable = may_enable; + bond_member_lookup(bond, member_)->may_enable = may_enable; ovs_rwlock_unlock(&rwlock); } @@ -686,7 +685,7 @@ bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable) bool bond_run(struct bond *bond, enum lacp_status lacp_status) { - struct bond_slave *slave, *primary; + struct bond_member *member, *primary; bool revalidate; ovs_rwlock_wrlock(&rwlock); @@ -702,21 +701,21 @@ bond_run(struct bond *bond, enum lacp_status lacp_status) } } - /* Enable slaves based on link status and LACP feedback. */ + /* Enable members based on link status and LACP feedback. */ primary = NULL; - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - bond_link_status_update(slave); - slave->change_seq = seq_read(connectivity_seq_get()); + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + bond_link_status_update(member); + member->change_seq = seq_read(connectivity_seq_get()); - /* Discover if there is an active slave marked 'primary'. */ - if (bond->balance == BM_AB && slave->is_primary && slave->enabled) { - primary = slave; + /* Discover if there is an active member marked 'primary'. */ + if (bond->balance == BM_AB && member->is_primary && member->enabled) { + primary = member; } } - if (!bond->active_slave || !bond->active_slave->enabled || - (primary && bond->active_slave != primary)) { - bond_choose_active_slave(bond); + if (!bond->active_member || !bond->active_member->enabled || + (primary && bond->active_member != primary)) { + bond_choose_active_member(bond); } revalidate = bond->bond_revalidate; @@ -730,15 +729,15 @@ bond_run(struct bond *bond, enum lacp_status lacp_status) void bond_wait(struct bond *bond) { - struct bond_slave *slave; + struct bond_member *member; ovs_rwlock_rdlock(&rwlock); - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (slave->delay_expires != LLONG_MAX) { - poll_timer_wait_until(slave->delay_expires); + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (member->delay_expires != LLONG_MAX) { + poll_timer_wait_until(member->delay_expires); } - seq_wait(connectivity_seq_get(), slave->change_seq); + seq_wait(connectivity_seq_get(), member->change_seq); } if (bond->bond_revalidate) { @@ -760,7 +759,7 @@ may_send_learning_packets(const struct bond *bond) return ((bond->lacp_status == LACP_DISABLED && (bond->balance == BM_SLB || bond->balance == BM_AB)) || (bond->lacp_fallback_ab && bond->lacp_status == LACP_CONFIGURED)) - && bond->active_slave; + && bond->active_member; } /* Returns true if 'bond' needs the client to send out packets to assist with @@ -795,7 +794,7 @@ struct dp_packet * bond_compose_learning_packet(struct bond *bond, const struct eth_addr eth_src, uint16_t vlan, void **port_aux) { - struct bond_slave *slave; + struct bond_member *member; struct dp_packet *packet; struct flow flow; @@ -803,7 +802,7 @@ bond_compose_learning_packet(struct bond *bond, const struct eth_addr eth_src, ovs_assert(may_send_learning_packets(bond)); memset(&flow, 0, sizeof flow); flow.dl_src = eth_src; - slave = choose_output_slave(bond, &flow, NULL, vlan); + member = choose_output_member(bond, &flow, NULL, vlan); packet = dp_packet_new(0); compose_rarp(packet, eth_src); @@ -811,7 +810,7 @@ bond_compose_learning_packet(struct bond *bond, const struct eth_addr eth_src, eth_push_vlan(packet, htons(ETH_TYPE_VLAN), htons(vlan)); } - *port_aux = slave->aux; + *port_aux = member->aux; ovs_rwlock_unlock(&rwlock); return packet; } @@ -825,7 +824,7 @@ bond_is_falling_back_to_ab(const struct bond *bond) && bond->lacp_status == LACP_CONFIGURED); } -/* Checks whether a packet that arrived on 'slave_' within 'bond', with an +/* Checks whether a packet that arrived on 'member_' within 'bond', with an * Ethernet destination address of 'eth_dst', should be admitted. * * The return value is one of the following: @@ -841,22 +840,22 @@ bond_is_falling_back_to_ab(const struct bond *bond) * learning). */ enum bond_verdict -bond_check_admissibility(struct bond *bond, const void *slave_, +bond_check_admissibility(struct bond *bond, const void *member_, const struct eth_addr eth_dst) { enum bond_verdict verdict = BV_DROP; - struct bond_slave *slave; + struct bond_member *member; static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); ovs_rwlock_rdlock(&rwlock); - slave = bond_slave_lookup(bond, slave_); - if (!slave) { + member = bond_member_lookup(bond, member_); + if (!member) { goto out; } /* LACP bonds have very loose admissibility restrictions because we can * assume the remote switch is aware of the bond and will "do the right - * thing". However, as a precaution we drop packets on disabled slaves + * thing". However, as a precaution we drop packets on disabled members * because no correctly implemented partner switch should be sending * packets to them. * @@ -864,14 +863,15 @@ bond_check_admissibility(struct bond *bond, const void *slave_, * drop all incoming traffic except if lacp_fallback_ab is enabled. */ switch (bond->lacp_status) { case LACP_NEGOTIATED: - /* To reduce packet-drops due to delay in enabling of slave (post + /* To reduce packet-drops due to delay in enabling of member (post * LACP-SYNC), from main thread, check for may_enable as well. * When may_enable is TRUE, it means LACP is UP and waiting for the - * main thread to run LACP state machine and enable the slave. */ - verdict = (slave->enabled || slave->may_enable) ? BV_ACCEPT : BV_DROP; - if (!slave->enabled && slave->may_enable) { - VLOG_DBG_RL(&rl, "bond %s: slave %s: main thread not yet enabled slave", - bond->name, bond->active_slave->name); + * main thread to run LACP state machine and enable the member. */ + verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : BV_DROP; + if (!member->enabled && member->may_enable) { + VLOG_DBG_RL(&rl, "bond %s: member %s: " + "main thread has not yet enabled member", + bond->name, bond->active_member->name); } goto out; case LACP_CONFIGURED: @@ -886,9 +886,9 @@ bond_check_admissibility(struct bond *bond, const void *slave_, break; } - /* Drop all multicast packets on inactive slaves. */ + /* Drop all multicast packets on inactive members. */ if (eth_addr_is_multicast(eth_dst)) { - if (bond->active_slave != slave) { + if (bond->active_member != member) { goto out; } } @@ -905,12 +905,12 @@ bond_check_admissibility(struct bond *bond, const void *slave_, /* fall through */ case BM_AB: - /* Drop all packets which arrive on backup slaves. This is similar to + /* Drop all packets which arrive on backup members. This is similar to * how Linux bonding handles active-backup bonds. */ - if (bond->active_slave != slave) { + if (bond->active_member != member) { VLOG_DBG_RL(&rl, "active-backup bond received packet on backup" - " slave (%s) destined for " ETH_ADDR_FMT, - slave->name, ETH_ADDR_ARGS(eth_dst)); + " member (%s) destined for " ETH_ADDR_FMT, + member->name, ETH_ADDR_ARGS(eth_dst)); goto out; } verdict = BV_ACCEPT; @@ -918,27 +918,28 @@ bond_check_admissibility(struct bond *bond, const void *slave_, case BM_SLB: /* Drop all packets for which we have learned a different input port, - * because we probably sent the packet on one slave and got it back on + * because we probably sent the packet on one member and got it back on * the other. Gratuitous ARP packets are an exception to this rule: * the host has moved to another switch. The exception to the * exception is if we locked the learning table to avoid reflections on - * bond slaves. */ + * bond members. */ verdict = BV_DROP_IF_MOVED; goto out; } OVS_NOT_REACHED(); out: - if (slave && (verdict != BV_ACCEPT)) { - VLOG_DBG_RL(&rl, "slave (%s): Admissibility verdict is to drop pkt %s." - "active slave: %s, may_enable: %s enable: %s " + if (member && (verdict != BV_ACCEPT)) { + VLOG_DBG_RL(&rl, "member (%s): " + "Admissibility verdict is to drop pkt %s." + "active member: %s, may_enable: %s enable: %s " "LACP status:%d", - slave->name, + member->name, (verdict == BV_DROP_IF_MOVED) ? "as different port is learned" : "", - (bond->active_slave == slave) ? "true" : "false", - slave->may_enable ? "true" : "false", - slave->enabled ? "true" : "false", + (bond->active_member == member) ? "true" : "false", + member->may_enable ? "true" : "false", + member->enabled ? "true" : "false", bond->lacp_status); } @@ -947,9 +948,9 @@ out: } -/* Returns the slave (registered on 'bond' by bond_slave_register()) to which - * a packet with the given 'flow' and 'vlan' should be forwarded. Returns - * NULL if the packet should be dropped because no slaves are enabled. +/* Returns the member (registered on 'bond' by bond_member_register()) to which + * a packet with the given 'flow' and 'vlan' should be forwarded. Returns NULL + * if the packet should be dropped because no members are enabled. * * 'vlan' is not necessarily the same as 'flow->vlan_tci'. First, 'vlan' * should be a VID only (i.e. excluding the PCP bits). Second, @@ -962,15 +963,15 @@ out: * have been initialized (e.g., by flow_wildcards_init_catchall()). */ void * -bond_choose_output_slave(struct bond *bond, const struct flow *flow, - struct flow_wildcards *wc, uint16_t vlan) +bond_choose_output_member(struct bond *bond, const struct flow *flow, + struct flow_wildcards *wc, uint16_t vlan) { - struct bond_slave *slave; + struct bond_member *member; void *aux; ovs_rwlock_rdlock(&rwlock); - slave = choose_output_slave(bond, flow, wc, vlan); - aux = slave ? slave->aux : NULL; + member = choose_output_member(bond, flow, wc, vlan); + aux = member ? member->aux : NULL; ovs_rwlock_unlock(&rwlock); return aux; @@ -981,7 +982,7 @@ static void bond_entry_account(struct bond_entry *entry, uint64_t rule_tx_bytes) OVS_REQ_WRLOCK(rwlock) { - if (entry->slave) { + if (entry->member) { uint64_t delta; delta = rule_tx_bytes - entry->pr_tx_bytes; @@ -1040,12 +1041,12 @@ bond_update_post_recirc_rules__(struct bond* bond, const bool force) /* Make sure all bond entries are populated */ for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) { - if (!e->slave || !e->slave->enabled) { + if (!e->member || !e->member->enabled) { update_rules = true; - e->slave = CONTAINER_OF(hmap_random_node(&bond->slaves), - struct bond_slave, hmap_node); - if (!e->slave->enabled) { - e->slave = bond->active_slave; + e->member = CONTAINER_OF(hmap_random_node(&bond->members), + struct bond_member, hmap_node); + if (!e->member->enabled) { + e->member = bond->active_member; } } } @@ -1103,10 +1104,10 @@ bond_account(struct bond *bond, const struct flow *flow, uint16_t vlan, ovs_rwlock_unlock(&rwlock); } -static struct bond_slave * -bond_slave_from_bal_node(struct ovs_list *bal) OVS_REQ_RDLOCK(rwlock) +static struct bond_member * +bond_member_from_bal_node(struct ovs_list *bal) OVS_REQ_RDLOCK(rwlock) { - return CONTAINER_OF(bal, struct bond_slave, bal_node); + return CONTAINER_OF(bal, struct bond_member, bal_node); } static void @@ -1115,24 +1116,24 @@ log_bals(struct bond *bond, const struct ovs_list *bals) { if (VLOG_IS_DBG_ENABLED()) { struct ds ds = DS_EMPTY_INITIALIZER; - const struct bond_slave *slave; + const struct bond_member *member; - LIST_FOR_EACH (slave, bal_node, bals) { + LIST_FOR_EACH (member, bal_node, bals) { if (ds.length) { ds_put_char(&ds, ','); } ds_put_format(&ds, " %s %"PRIu64"kB", - slave->name, slave->tx_bytes / 1024); + member->name, member->tx_bytes / 1024); - if (!slave->enabled) { + if (!member->enabled) { ds_put_cstr(&ds, " (disabled)"); } - if (!ovs_list_is_empty(&slave->entries)) { + if (!ovs_list_is_empty(&member->entries)) { struct bond_entry *e; ds_put_cstr(&ds, " ("); - LIST_FOR_EACH (e, list_node, &slave->entries) { - if (&e->list_node != ovs_list_front(&slave->entries)) { + LIST_FOR_EACH (e, list_node, &member->entries) { + if (&e->list_node != ovs_list_front(&member->entries)) { ds_put_cstr(&ds, " + "); } ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB", @@ -1146,12 +1147,12 @@ log_bals(struct bond *bond, const struct ovs_list *bals) } } -/* Shifts 'hash' from its current slave to 'to'. */ +/* Shifts 'hash' from its current member to 'to'. */ static void -bond_shift_load(struct bond_entry *hash, struct bond_slave *to) +bond_shift_load(struct bond_entry *hash, struct bond_member *to) OVS_REQ_WRLOCK(rwlock) { - struct bond_slave *from = hash->slave; + struct bond_member *from = hash->member; struct bond *bond = from->bond; uint64_t delta = hash->tx_bytes; @@ -1168,19 +1169,19 @@ bond_shift_load(struct bond_entry *hash, struct bond_slave *to) to->tx_bytes += delta; /* Arrange for flows to be revalidated. */ - hash->slave = to; + hash->member = to; bond->bond_revalidate = true; } /* Picks and returns a bond_entry to migrate from 'from' (the most heavily - * loaded bond slave) to a bond slave that has 'to_tx_bytes' bytes of load, - * given that doing so must decrease the ratio of the load on the two slaves by - * at least 0.1. Returns NULL if there is no appropriate entry. + * loaded bond member) to a bond member that has 'to_tx_bytes' bytes of load, + * given that doing so must decrease the ratio of the load on the two members + * by at least 0.1. Returns NULL if there is no appropriate entry. * * The list of entries isn't sorted. I don't know of a reason to prefer to * shift away small hashes or large hashes. */ static struct bond_entry * -choose_entry_to_migrate(const struct bond_slave *from, uint64_t to_tx_bytes) +choose_entry_to_migrate(const struct bond_member *from, uint64_t to_tx_bytes) OVS_REQ_WRLOCK(rwlock) { struct bond_entry *e; @@ -1217,28 +1218,28 @@ choose_entry_to_migrate(const struct bond_slave *from, uint64_t to_tx_bytes) return NULL; } -/* Inserts 'slave' into 'bals' so that descending order of 'tx_bytes' is +/* Inserts 'member' into 'bals' so that descending order of 'tx_bytes' is * maintained. */ static void -insert_bal(struct ovs_list *bals, struct bond_slave *slave) +insert_bal(struct ovs_list *bals, struct bond_member *member) { - struct bond_slave *pos; + struct bond_member *pos; LIST_FOR_EACH (pos, bal_node, bals) { - if (slave->tx_bytes > pos->tx_bytes) { + if (member->tx_bytes > pos->tx_bytes) { break; } } - ovs_list_insert(&pos->bal_node, &slave->bal_node); + ovs_list_insert(&pos->bal_node, &member->bal_node); } -/* Removes 'slave' from its current list and then inserts it into 'bals' so +/* Removes 'member' from its current list and then inserts it into 'bals' so * that descending order of 'tx_bytes' is maintained. */ static void -reinsert_bal(struct ovs_list *bals, struct bond_slave *slave) +reinsert_bal(struct ovs_list *bals, struct bond_member *member) { - ovs_list_remove(&slave->bal_node); - insert_bal(bals, slave); + ovs_list_remove(&member->bal_node); + insert_bal(bals, member); } /* If 'bond' needs rebalancing, does so. @@ -1250,7 +1251,7 @@ reinsert_bal(struct ovs_list *bals, struct bond_slave *slave) void bond_rebalance(struct bond *bond) { - struct bond_slave *slave; + struct bond_member *member; struct bond_entry *e; struct ovs_list bals; bool rebalanced = false; @@ -1269,41 +1270,43 @@ bond_rebalance(struct bond *bond) bond_recirculation_account(bond); } - /* Add each bond_entry to its slave's 'entries' list. - * Compute each slave's tx_bytes as the sum of its entries' tx_bytes. */ - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - slave->tx_bytes = 0; - ovs_list_init(&slave->entries); + /* Add each bond_entry to its member's 'entries' list. + * Compute each member's tx_bytes as the sum of its entries' tx_bytes. */ + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + member->tx_bytes = 0; + ovs_list_init(&member->entries); } for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) { - if (e->slave && e->tx_bytes) { - e->slave->tx_bytes += e->tx_bytes; - ovs_list_push_back(&e->slave->entries, &e->list_node); + if (e->member && e->tx_bytes) { + e->member->tx_bytes += e->tx_bytes; + ovs_list_push_back(&e->member->entries, &e->list_node); } } - /* Add enabled slaves to 'bals' in descending order of tx_bytes. + /* Add enabled members to 'bals' in descending order of tx_bytes. * - * XXX This is O(n**2) in the number of slaves but it could be O(n lg n) + * XXX This is O(n**2) in the number of members but it could be O(n lg n) * with a proper list sort algorithm. */ ovs_list_init(&bals); - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (slave->enabled) { - insert_bal(&bals, slave); + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (member->enabled) { + insert_bal(&bals, member); } } log_bals(bond, &bals); - /* Shift load from the most-loaded slaves to the least-loaded slaves. */ + /* Shift load from the most-loaded members to the least-loaded members. */ while (!ovs_list_is_short(&bals)) { - struct bond_slave *from = bond_slave_from_bal_node(ovs_list_front(&bals)); - struct bond_slave *to = bond_slave_from_bal_node(ovs_list_back(&bals)); + struct bond_member *from + = bond_member_from_bal_node(ovs_list_front(&bals)); + struct bond_member *to + = bond_member_from_bal_node(ovs_list_back(&bals)); uint64_t overload; overload = from->tx_bytes - to->tx_bytes; if (overload < to->tx_bytes >> 5 || overload < 100000) { - /* The extra load on 'from' (and all less-loaded slaves), compared - * to that of 'to' (the least-loaded slave), is less than ~3%, or + /* The extra load on 'from' (and all less-loaded members), compared + * to that of 'to' (the least-loaded member), is less than ~3%, or * it is less than ~1Mbps. No point in rebalancing. */ break; } @@ -1317,7 +1320,7 @@ bond_rebalance(struct bond *bond) /* Delete element from from->entries. * * We don't add the element to to->hashes. That would only allow - * 'e' to be migrated to another slave in this rebalancing run, and + * 'e' to be migrated to another member in this rebalancing run, and * there is no point in doing that. */ ovs_list_remove(&e->list_node); @@ -1363,14 +1366,14 @@ bond_find(const char *name) OVS_REQ_RDLOCK(rwlock) return NULL; } -static struct bond_slave * -bond_lookup_slave(struct bond *bond, const char *slave_name) +static struct bond_member * +bond_lookup_member(struct bond *bond, const char *member_name) { - struct bond_slave *slave; + struct bond_member *member; - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (!strcmp(slave->name, slave_name)) { - return slave; + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (!strcmp(member->name, member_name)) { + return member; } } return NULL; @@ -1384,22 +1387,22 @@ bond_unixctl_list(struct unixctl_conn *conn, struct ds ds = DS_EMPTY_INITIALIZER; const struct bond *bond; - ds_put_cstr(&ds, "bond\ttype\trecircID\tslaves\n"); + ds_put_cstr(&ds, "bond\ttype\trecircID\tmembers\n"); ovs_rwlock_rdlock(&rwlock); HMAP_FOR_EACH (bond, hmap_node, all_bonds) { - const struct bond_slave *slave; + const struct bond_member *member; size_t i; ds_put_format(&ds, "%s\t%s\t%d\t", bond->name, bond_mode_to_string(bond->balance), bond->recirc_id); i = 0; - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { + HMAP_FOR_EACH (member, hmap_node, &bond->members) { if (i++ > 0) { ds_put_cstr(&ds, ", "); } - ds_put_cstr(&ds, slave->name); + ds_put_cstr(&ds, member->name); } ds_put_char(&ds, '\n'); } @@ -1412,9 +1415,9 @@ static void bond_print_details(struct ds *ds, const struct bond *bond) OVS_REQ_RDLOCK(rwlock) { - struct shash slave_shash = SHASH_INITIALIZER(&slave_shash); - const struct shash_node **sorted_slaves = NULL; - const struct bond_slave *slave; + struct shash member_shash = SHASH_INITIALIZER(&member_shash); + const struct shash_node **sorted_members = NULL; + const struct bond_member *member; bool use_lb_output_action; bool may_recirc; uint32_t recirc_id; @@ -1464,43 +1467,43 @@ bond_print_details(struct ds *ds, const struct bond *bond) bond->lacp_fallback_ab ? "true" : "false"); bool found_primary = false; - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (slave->is_primary) { + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (member->is_primary) { found_primary = true; } - shash_add(&slave_shash, slave->name, slave); + shash_add(&member_shash, member->name, member); } ds_put_format(ds, "active-backup primary: %s%s\n", bond->primary ? bond->primary : "", (!found_primary && bond->primary) - ? " (no such slave)" : ""); + ? " (no such member)" : ""); - slave = bond_find_slave_by_mac(bond, bond->active_slave_mac); - ds_put_cstr(ds, "active slave mac: "); - ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_slave_mac)); - ds_put_format(ds,"(%s)\n", slave ? slave->name : "none"); + member = bond_find_member_by_mac(bond, bond->active_member_mac); + ds_put_cstr(ds, "active member mac: "); + ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_member_mac)); + ds_put_format(ds, "(%s)\n", member ? member->name : "none"); - sorted_slaves = shash_sort(&slave_shash); - for (i = 0; i < shash_count(&slave_shash); i++) { + sorted_members = shash_sort(&member_shash); + for (i = 0; i < shash_count(&member_shash); i++) { struct bond_entry *be; - slave = sorted_slaves[i]->data; + member = sorted_members[i]->data; /* Basic info. */ - ds_put_format(ds, "\nslave %s: %s\n", - slave->name, slave->enabled ? "enabled" : "disabled"); - if (slave == bond->active_slave) { - ds_put_cstr(ds, " active slave\n"); + ds_put_format(ds, "\nmember %s: %s\n", + member->name, member->enabled ? "enabled" : "disabled"); + if (member == bond->active_member) { + ds_put_cstr(ds, " active member\n"); } - if (slave->delay_expires != LLONG_MAX) { + if (member->delay_expires != LLONG_MAX) { ds_put_format(ds, " %s expires in %lld ms\n", - slave->enabled ? "downdelay" : "updelay", - slave->delay_expires - time_msec()); + member->enabled ? "downdelay" : "updelay", + member->delay_expires - time_msec()); } ds_put_format(ds, " may_enable: %s\n", - slave->may_enable ? "true" : "false"); + member->may_enable ? "true" : "false"); if (!bond_is_balanced(bond)) { continue; @@ -1511,7 +1514,7 @@ bond_print_details(struct ds *ds, const struct bond *bond) int hash = be - bond->hash; uint64_t be_tx_k; - if (be->slave != slave) { + if (be->member != member) { continue; } @@ -1524,8 +1527,8 @@ bond_print_details(struct ds *ds, const struct bond *bond) /* XXX How can we list the MACs assigned to hashes of SLB bonds? */ } } - shash_destroy(&slave_shash); - free(sorted_slaves); + shash_destroy(&member_shash); + free(sorted_members); ds_put_cstr(ds, "\n"); } @@ -1567,9 +1570,9 @@ bond_unixctl_migrate(struct unixctl_conn *conn, { const char *bond_s = argv[1]; const char *hash_s = argv[2]; - const char *slave_s = argv[3]; + const char *member_s = argv[3]; struct bond *bond; - struct bond_slave *slave; + struct bond_member *member; struct bond_entry *entry; int hash; @@ -1592,20 +1595,21 @@ bond_unixctl_migrate(struct unixctl_conn *conn, goto out; } - slave = bond_lookup_slave(bond, slave_s); - if (!slave) { - unixctl_command_reply_error(conn, "no such slave"); + member = bond_lookup_member(bond, member_s); + if (!member) { + unixctl_command_reply_error(conn, "no such member"); goto out; } - if (!slave->enabled) { - unixctl_command_reply_error(conn, "cannot migrate to disabled slave"); + if (!member->enabled) { + unixctl_command_reply_error(conn, + "cannot migrate to disabled member"); goto out; } entry = &bond->hash[hash]; bond->bond_revalidate = true; - entry->slave = slave; + entry->member = member; unixctl_command_reply(conn, "migrated"); out: @@ -1613,14 +1617,14 @@ out: } static void -bond_unixctl_set_active_slave(struct unixctl_conn *conn, - int argc OVS_UNUSED, const char *argv[], - void *aux OVS_UNUSED) +bond_unixctl_set_active_member(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) { const char *bond_s = argv[1]; - const char *slave_s = argv[2]; + const char *member_s = argv[2]; struct bond *bond; - struct bond_slave *slave; + struct bond_member *member; ovs_rwlock_wrlock(&rwlock); bond = bond_find(bond_s); @@ -1629,25 +1633,26 @@ bond_unixctl_set_active_slave(struct unixctl_conn *conn, goto out; } - slave = bond_lookup_slave(bond, slave_s); - if (!slave) { - unixctl_command_reply_error(conn, "no such slave"); + member = bond_lookup_member(bond, member_s); + if (!member) { + unixctl_command_reply_error(conn, "no such member"); goto out; } - if (!slave->enabled) { - unixctl_command_reply_error(conn, "cannot make disabled slave active"); + if (!member->enabled) { + unixctl_command_reply_error(conn, + "cannot make disabled member active"); goto out; } - if (bond->active_slave != slave) { + if (bond->active_member != member) { bond->bond_revalidate = true; - bond->active_slave = slave; - VLOG_INFO("bond %s: active interface is now %s", - bond->name, slave->name); + bond->active_member = member; + VLOG_INFO("bond %s: active member is now %s", + bond->name, member->name); bond->send_learning_packets = true; unixctl_command_reply(conn, "done"); - bond_active_slave_changed(bond); + bond_active_member_changed(bond); } else { unixctl_command_reply(conn, "no change"); } @@ -1656,12 +1661,12 @@ out: } static void -enable_slave(struct unixctl_conn *conn, const char *argv[], bool enable) +enable_member(struct unixctl_conn *conn, const char *argv[], bool enable) { const char *bond_s = argv[1]; - const char *slave_s = argv[2]; + const char *member_s = argv[2]; struct bond *bond; - struct bond_slave *slave; + struct bond_member *member; ovs_rwlock_wrlock(&rwlock); bond = bond_find(bond_s); @@ -1670,13 +1675,13 @@ enable_slave(struct unixctl_conn *conn, const char *argv[], bool enable) goto out; } - slave = bond_lookup_slave(bond, slave_s); - if (!slave) { - unixctl_command_reply_error(conn, "no such slave"); + member = bond_lookup_member(bond, member_s); + if (!member) { + unixctl_command_reply_error(conn, "no such member"); goto out; } - bond_enable_slave(slave, enable); + bond_enable_member(member, enable); unixctl_command_reply(conn, enable ? "enabled" : "disabled"); out: @@ -1684,19 +1689,19 @@ out: } static void -bond_unixctl_enable_slave(struct unixctl_conn *conn, - int argc OVS_UNUSED, const char *argv[], - void *aux OVS_UNUSED) +bond_unixctl_enable_member(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) { - enable_slave(conn, argv, true); + enable_member(conn, argv, true); } static void -bond_unixctl_disable_slave(struct unixctl_conn *conn, - int argc OVS_UNUSED, const char *argv[], - void *aux OVS_UNUSED) +bond_unixctl_disable_member(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) { - enable_slave(conn, argv, false); + enable_member(conn, argv, false); } static void @@ -1747,16 +1752,24 @@ bond_init(void) unixctl_command_register("bond/list", "", 0, 0, bond_unixctl_list, NULL); unixctl_command_register("bond/show", "[port]", 0, 1, bond_unixctl_show, NULL); - unixctl_command_register("bond/migrate", "port hash slave", 3, 3, + unixctl_command_register("bond/migrate", "port hash member", 3, 3, bond_unixctl_migrate, NULL); - unixctl_command_register("bond/set-active-slave", "port slave", 2, 2, - bond_unixctl_set_active_slave, NULL); - unixctl_command_register("bond/enable-slave", "port slave", 2, 2, - bond_unixctl_enable_slave, NULL); - unixctl_command_register("bond/disable-slave", "port slave", 2, 2, - bond_unixctl_disable_slave, NULL); + unixctl_command_register("bond/set-active-member", "port member", 2, 2, + bond_unixctl_set_active_member, NULL); + unixctl_command_register("bond/enable-member", "port member", 2, 2, + bond_unixctl_enable_member, NULL); + unixctl_command_register("bond/disable-member", "port member", 2, 2, + bond_unixctl_disable_member, NULL); unixctl_command_register("bond/hash", "mac [vlan] [basis]", 1, 3, bond_unixctl_hash, NULL); + + /* Backward-compatibility command names. */ + unixctl_command_register("bond/set-active-slave", NULL, 2, 2, + bond_unixctl_set_active_member, NULL); + unixctl_command_register("bond/enable-slave", NULL, 2, 2, + bond_unixctl_enable_member, NULL); + unixctl_command_register("bond/disable-slave", NULL, 2, 2, + bond_unixctl_disable_member, NULL); } static void @@ -1779,15 +1792,15 @@ bond_entry_reset(struct bond *bond) } } -static struct bond_slave * -bond_slave_lookup(struct bond *bond, const void *slave_) +static struct bond_member * +bond_member_lookup(struct bond *bond, const void *member_) { - struct bond_slave *slave; + struct bond_member *member; - HMAP_FOR_EACH_IN_BUCKET (slave, hmap_node, hash_pointer(slave_, 0), - &bond->slaves) { - if (slave->aux == slave_) { - return slave; + HMAP_FOR_EACH_IN_BUCKET (member, hmap_node, hash_pointer(member_, 0), + &bond->members) { + if (member->aux == member_) { + return member; } } @@ -1795,51 +1808,51 @@ bond_slave_lookup(struct bond *bond, const void *slave_) } static void -bond_enable_slave(struct bond_slave *slave, bool enable) +bond_enable_member(struct bond_member *member, bool enable) { - struct bond *bond = slave->bond; + struct bond *bond = member->bond; - slave->delay_expires = LLONG_MAX; - if (enable != slave->enabled) { - slave->bond->bond_revalidate = true; - slave->enabled = enable; + member->delay_expires = LLONG_MAX; + if (enable != member->enabled) { + member->bond->bond_revalidate = true; + member->enabled = enable; - ovs_mutex_lock(&slave->bond->mutex); + ovs_mutex_lock(&member->bond->mutex); if (enable) { - ovs_list_insert(&slave->bond->enabled_slaves, &slave->list_node); + ovs_list_insert(&member->bond->enabled_members, &member->list_node); } else { bond->send_learning_packets = true; - ovs_list_remove(&slave->list_node); + ovs_list_remove(&member->list_node); } - ovs_mutex_unlock(&slave->bond->mutex); + ovs_mutex_unlock(&member->bond->mutex); - VLOG_INFO("interface %s: %s", slave->name, - slave->enabled ? "enabled" : "disabled"); + VLOG_INFO("member %s: %s", member->name, + member->enabled ? "enabled" : "disabled"); } } static void -bond_link_status_update(struct bond_slave *slave) +bond_link_status_update(struct bond_member *member) { - struct bond *bond = slave->bond; + struct bond *bond = member->bond; bool up; - up = netdev_get_carrier(slave->netdev) && slave->may_enable; - if ((up == slave->enabled) != (slave->delay_expires == LLONG_MAX)) { + up = netdev_get_carrier(member->netdev) && member->may_enable; + if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); - VLOG_INFO_RL(&rl, "interface %s: link state %s", - slave->name, up ? "up" : "down"); - if (up == slave->enabled) { - slave->delay_expires = LLONG_MAX; - VLOG_INFO_RL(&rl, "interface %s: will not be %s", - slave->name, up ? "disabled" : "enabled"); + VLOG_INFO_RL(&rl, "member %s: link state %s", + member->name, up ? "up" : "down"); + if (up == member->enabled) { + member->delay_expires = LLONG_MAX; + VLOG_INFO_RL(&rl, "member %s: will not be %s", + member->name, up ? "disabled" : "enabled"); } else { int delay = up ? bond->updelay : bond->downdelay; - slave->delay_expires = time_msec() + delay; + member->delay_expires = time_msec() + delay; if (delay) { - VLOG_INFO_RL(&rl, "interface %s: will be %s if it stays %s " + VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s " "for %d ms", - slave->name, + member->name, up ? "enabled" : "disabled", up ? "up" : "down", delay); @@ -1847,8 +1860,8 @@ bond_link_status_update(struct bond_slave *slave) } } - if (time_msec() >= slave->delay_expires) { - bond_enable_slave(slave, up); + if (time_msec() >= member->delay_expires) { + bond_enable_member(member, up); } } @@ -1869,29 +1882,29 @@ lookup_bond_entry(const struct bond *bond, const struct flow *flow, return &bond->hash[bond_hash(bond, flow, vlan) & BOND_MASK]; } -/* Selects and returns an enabled slave from the 'enabled_slaves' list - * in a round-robin fashion. If the 'enabled_slaves' list is empty, +/* Selects and returns an enabled member from the 'enabled_members' list + * in a round-robin fashion. If the 'enabled_members' list is empty, * returns NULL. */ -static struct bond_slave * -get_enabled_slave(struct bond *bond) +static struct bond_member * +get_enabled_member(struct bond *bond) { struct ovs_list *node; ovs_mutex_lock(&bond->mutex); - if (ovs_list_is_empty(&bond->enabled_slaves)) { + if (ovs_list_is_empty(&bond->enabled_members)) { ovs_mutex_unlock(&bond->mutex); return NULL; } - node = ovs_list_pop_front(&bond->enabled_slaves); - ovs_list_push_back(&bond->enabled_slaves, node); + node = ovs_list_pop_front(&bond->enabled_members); + ovs_list_push_back(&bond->enabled_members, node); ovs_mutex_unlock(&bond->mutex); - return CONTAINER_OF(node, struct bond_slave, list_node); + return CONTAINER_OF(node, struct bond_member, list_node); } -static struct bond_slave * -choose_output_slave(const struct bond *bond, const struct flow *flow, +static struct bond_member * +choose_output_member(const struct bond *bond, const struct flow *flow, struct flow_wildcards *wc, uint16_t vlan) { struct bond_entry *e; @@ -1910,7 +1923,7 @@ choose_output_slave(const struct bond *bond, const struct flow *flow, switch (balance) { case BM_AB: - return bond->active_slave; + return bond->active_member; case BM_TCP: if (bond->lacp_status != LACP_NEGOTIATED) { @@ -1926,90 +1939,90 @@ choose_output_slave(const struct bond *bond, const struct flow *flow, flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_ETH_SRC); } e = lookup_bond_entry(bond, flow, vlan); - if (!e->slave || !e->slave->enabled) { - e->slave = get_enabled_slave(CONST_CAST(struct bond*, bond)); + if (!e->member || !e->member->enabled) { + e->member = get_enabled_member(CONST_CAST(struct bond *, bond)); } - return e->slave; + return e->member; default: OVS_NOT_REACHED(); } } -static struct bond_slave * -bond_choose_slave(const struct bond *bond) +static struct bond_member * +bond_choose_member(const struct bond *bond) { - struct bond_slave *slave, *best; + struct bond_member *member, *best; /* If there's a primary and it's active, return that. */ - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (slave->is_primary && slave->enabled) { - return slave; + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (member->is_primary && member->enabled) { + return member; } } - /* Find the last active slave. */ - slave = bond_find_slave_by_mac(bond, bond->active_slave_mac); - if (slave && slave->enabled) { - return slave; + /* Find the last active member. */ + member = bond_find_member_by_mac(bond, bond->active_member_mac); + if (member && member->enabled) { + return member; } - /* Find an enabled slave. */ - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (slave->enabled) { - return slave; + /* Find an enabled member. */ + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (member->enabled) { + return member; } } - /* All interfaces are disabled. Find an interface that will be enabled + /* All members are disabled. Find an member that will be enabled * after its updelay expires. */ best = NULL; - HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) { - if (slave->delay_expires != LLONG_MAX - && slave->may_enable - && (!best || slave->delay_expires < best->delay_expires)) { - best = slave; + HMAP_FOR_EACH (member, hmap_node, &bond->members) { + if (member->delay_expires != LLONG_MAX + && member->may_enable + && (!best || member->delay_expires < best->delay_expires)) { + best = member; } } return best; } static void -bond_choose_active_slave(struct bond *bond) +bond_choose_active_member(struct bond *bond) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); - struct bond_slave *old_active_slave = bond->active_slave; + struct bond_member *old_active_member = bond->active_member; - bond->active_slave = bond_choose_slave(bond); - if (bond->active_slave) { - if (bond->active_slave->enabled) { - VLOG_INFO_RL(&rl, "bond %s: active interface is now %s", - bond->name, bond->active_slave->name); + bond->active_member = bond_choose_member(bond); + if (bond->active_member) { + if (bond->active_member->enabled) { + VLOG_INFO_RL(&rl, "bond %s: active member is now %s", + bond->name, bond->active_member->name); } else { - VLOG_INFO_RL(&rl, "bond %s: active interface is now %s, skipping " - "remaining %lld ms updelay (since no interface was " - "enabled)", bond->name, bond->active_slave->name, - bond->active_slave->delay_expires - time_msec()); - bond_enable_slave(bond->active_slave, true); + VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping " + "remaining %lld ms updelay (since no member was " + "enabled)", bond->name, bond->active_member->name, + bond->active_member->delay_expires - time_msec()); + bond_enable_member(bond->active_member, true); } bond->send_learning_packets = true; - if (bond->active_slave != old_active_slave) { - bond_active_slave_changed(bond); + if (bond->active_member != old_active_member) { + bond_active_member_changed(bond); } - } else if (old_active_slave) { - bond_active_slave_changed(bond); - VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name); + } else if (old_active_member) { + bond_active_member_changed(bond); + VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name); } } /* - * Return true if bond has unstored active slave change. - * If return true, 'mac' will store the bond's current active slave's + * Return true if bond has unstored active member change. + * If return true, 'mac' will store the bond's current active member's * MAC address. */ bool -bond_get_changed_active_slave(const char *name, struct eth_addr *mac, +bond_get_changed_active_member(const char *name, struct eth_addr *mac, bool force) { struct bond *bond; @@ -2017,9 +2030,9 @@ bond_get_changed_active_slave(const char *name, struct eth_addr *mac, ovs_rwlock_wrlock(&rwlock); bond = bond_find(name); if (bond) { - if (bond->active_slave_changed || force) { - *mac = bond->active_slave_mac; - bond->active_slave_changed = false; + if (bond->active_member_changed || force) { + *mac = bond->active_member_mac; + bond->active_member_changed = false; ovs_rwlock_unlock(&rwlock); return true; } @@ -2038,19 +2051,19 @@ bond_use_lb_output_action(const struct bond *bond) static void bond_add_lb_output_buckets(const struct bond *bond) { - ofp_port_t slave_map[BOND_BUCKETS]; + ofp_port_t member_map[BOND_BUCKETS]; for (int i = 0; i < BOND_BUCKETS; i++) { - struct bond_slave *slave = bond->hash[i].slave; + struct bond_member *member = bond->hash[i].member; - if (slave) { - slave_map[i] = slave->ofp_port; + if (member) { + member_map[i] = member->ofp_port; } else { - slave_map[i] = OFPP_NONE; + member_map[i] = OFPP_NONE; } } ofproto_dpif_add_lb_output_buckets(bond->ofproto, bond->recirc_id, - slave_map); + member_map); } static void diff --git a/ofproto/bond.h b/ofproto/bond.h index ecb90919c..1683ec878 100644 --- a/ofproto/bond.h +++ b/ofproto/bond.h @@ -28,7 +28,7 @@ struct ofpbuf; struct ofproto_dpif; enum lacp_status; -/* How flows are balanced among bond slaves. */ +/* How flows are balanced among bond member interfaces. */ enum bond_mode { BM_TCP, /* Transport Layer Load Balance. */ BM_SLB, /* Source Load Balance. */ @@ -51,12 +51,12 @@ struct bond_settings { const char *primary; /* For AB mode, primary interface name. */ /* Link status detection. */ - int up_delay; /* ms before enabling an up slave. */ - int down_delay; /* ms before disabling a down slave. */ + int up_delay; /* ms before enabling an up member. */ + int down_delay; /* ms before disabling a down member. */ bool lacp_fallback_ab_cfg; /* Fallback to active-backup on LACP failure. */ - struct eth_addr active_slave_mac; + struct eth_addr active_member_mac; /* The MAC address of the interface that was active during the last ovs run. */ @@ -74,22 +74,23 @@ void bond_unref(struct bond *); struct bond *bond_ref(const struct bond *); bool bond_reconfigure(struct bond *, const struct bond_settings *); -void bond_slave_register(struct bond *, void *slave_, ofp_port_t ofport, struct netdev *); -void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *); -void bond_slave_unregister(struct bond *, const void *slave); +void bond_member_register(struct bond *, void *member_, ofp_port_t ofport, + struct netdev *); +void bond_member_set_netdev(struct bond *, void *member_, struct netdev *); +void bond_member_unregister(struct bond *, const void *member); bool bond_run(struct bond *, enum lacp_status); void bond_wait(struct bond *); -void bond_slave_set_may_enable(struct bond *, void *slave_, bool may_enable); +void bond_member_set_may_enable(struct bond *, void *member_, bool may_enable); /* Special MAC learning support for SLB bonding. */ bool bond_should_send_learning_packets(struct bond *); struct dp_packet *bond_compose_learning_packet(struct bond *, const struct eth_addr eth_src, uint16_t vlan, void **port_aux); -bool bond_get_changed_active_slave(const char *name, struct eth_addr *mac, - bool force); +bool bond_get_changed_active_member(const char *name, struct eth_addr *mac, + bool force); /* Packet processing. */ enum bond_verdict { @@ -97,10 +98,10 @@ enum bond_verdict { BV_DROP, /* Drop this packet. */ BV_DROP_IF_MOVED /* Drop if we've learned a different port. */ }; -enum bond_verdict bond_check_admissibility(struct bond *, const void *slave_, +enum bond_verdict bond_check_admissibility(struct bond *, const void *member_, const struct eth_addr dst); -void *bond_choose_output_slave(struct bond *, const struct flow *, - struct flow_wildcards *, uint16_t vlan); +void *bond_choose_output_member(struct bond *, const struct flow *, + struct flow_wildcards *, uint16_t vlan); /* Rebalancing. */ void bond_account(struct bond *, const struct flow *, uint16_t vlan, @@ -119,7 +120,7 @@ void bond_rebalance(struct bond *); * * On handling first output packet, 256 post recirculation flows are installed: * - * recirc_id=, dp_hash=<[0..255]>/0xff, actions: output + * recirc_id=, dp_hash=<[0..255]>/0xff, actions: output * * Bond module pulls stats from those post recirculation rules. If rebalancing * is needed, those rules are updated with new output actions. diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h index 30cd5275f..4df630c62 100644 --- a/ofproto/ofproto-dpif-rid.h +++ b/ofproto/ofproto-dpif-rid.h @@ -41,8 +41,8 @@ struct rule; * * Recirculation is the use of freezing to allow a frame to re-enter the * datapath packet processing path to achieve more flexible packet processing, - * such as modifying header fields after MPLS POP action and selecting a slave - * port for bond ports. + * such as modifying header fields after MPLS POP action and selecting a + * member interface for bond ports. * * * Data path and user space interface diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c index f616fb2bb..fdcb9eabb 100644 --- a/ofproto/ofproto-dpif-sflow.c +++ b/ofproto/ofproto-dpif-sflow.c @@ -305,7 +305,7 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller, SFLEthernet_counters* eth_counters; struct netdev_stats stats; enum netdev_flags flags; - struct lacp_slave_stats lacp_stats; + struct lacp_member_stats lacp_stats; const char *ifName; dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort)); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index e0ede2cab..11aa20754 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -2431,7 +2431,7 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle, } vid = out_xvlan.v[0].vid; if (ovs_list_is_empty(&out_xbundle->xports)) { - /* Partially configured bundle with no slaves. Drop the packet. */ + /* Partially configured bundle with no members. Drop the packet. */ return; } else if (!out_xbundle->bond) { xport = CONTAINER_OF(ovs_list_front(&out_xbundle->xports), struct xport, @@ -2456,12 +2456,12 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle, } } - ofport = bond_choose_output_slave(out_xbundle->bond, - &ctx->xin->flow, wc, vid); + ofport = bond_choose_output_member(out_xbundle->bond, + &ctx->xin->flow, wc, vid); xport = xport_lookup(ctx->xcfg, ofport); if (!xport) { - /* No slaves enabled, so drop packet. */ + /* No member interfaces enabled, so drop packet. */ return; } @@ -3379,11 +3379,11 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport) if (packet) { lacp_may_enable = lacp_process_packet(xport->xbundle->lacp, xport->ofport, packet); - /* Update LACP status in bond-slave to avoid packet-drops until - * LACP state machine is run by the main thread. */ + /* Update LACP status in bond-member to avoid packet-drops + * until LACP state machine is run by the main thread. */ if (xport->xbundle->bond && lacp_may_enable) { - bond_slave_set_may_enable(xport->xbundle->bond, xport->ofport, - lacp_may_enable); + bond_member_set_may_enable(xport->xbundle->bond, xport->ofport, + lacp_may_enable); } } slow = SLOW_LACP; @@ -4210,7 +4210,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, if (xr && bond_use_lb_output_action(xport->xbundle->bond)) { /* * If bond mode is balance-tcp and optimize balance tcp is enabled - * then use the hash directly for slave selection and avoid + * then use the hash directly for member selection and avoid * recirculation. * * Currently support for netdev datapath only. @@ -5391,7 +5391,7 @@ xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id) } static bool -slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_) +member_enabled_cb(ofp_port_t ofp_port, void *xbridge_) { const struct xbridge *xbridge = xbridge_; struct xport *port; @@ -5420,7 +5420,7 @@ xlate_bundle_action(struct xlate_ctx *ctx, { ofp_port_t port; - port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb, + port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, member_enabled_cb, CONST_CAST(struct xbridge *, ctx->xbridge)); if (bundle->dst.field) { nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc); diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 4f0638f23..fd0b2fdea 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -2199,7 +2199,7 @@ port_modified(struct ofport *port_) struct netdev *netdev = port->up.netdev; if (port->bundle && port->bundle->bond) { - bond_slave_set_netdev(port->bundle->bond, port, netdev); + bond_member_set_netdev(port->bundle->bond, port, netdev); } if (port->cfm) { @@ -3140,10 +3140,10 @@ bundle_del_port(struct ofport_dpif *port) port->bundle = NULL; if (bundle->lacp) { - lacp_slave_unregister(bundle->lacp, port); + lacp_member_unregister(bundle->lacp, port); } if (bundle->bond) { - bond_slave_unregister(bundle->bond, port); + bond_member_unregister(bundle->bond, port); } bundle_update(bundle); @@ -3151,7 +3151,7 @@ bundle_del_port(struct ofport_dpif *port) static bool bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port, - struct lacp_slave_settings *lacp) + struct lacp_member_settings *lacp) { struct ofport_dpif *port; @@ -3177,7 +3177,7 @@ bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port, } if (lacp) { bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE; - lacp_slave_register(bundle->lacp, port, lacp); + lacp_member_register(bundle->lacp, port, lacp); } return true; @@ -3236,8 +3236,8 @@ bundle_set(struct ofproto *ofproto_, void *aux, return 0; } - ovs_assert(s->n_slaves == 1 || s->bond != NULL); - ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL)); + ovs_assert(s->n_members == 1 || s->bond != NULL); + ovs_assert((s->lacp != NULL) == (s->lacp_members != NULL)); if (!bundle) { bundle = xmalloc(sizeof *bundle); @@ -3283,18 +3283,18 @@ bundle_set(struct ofproto *ofproto_, void *aux, /* Update set of ports. */ ok = true; - for (i = 0; i < s->n_slaves; i++) { - if (!bundle_add_port(bundle, s->slaves[i], - s->lacp ? &s->lacp_slaves[i] : NULL)) { + for (i = 0; i < s->n_members; i++) { + if (!bundle_add_port(bundle, s->members[i], + s->lacp ? &s->lacp_members[i] : NULL)) { ok = false; } } - if (!ok || ovs_list_size(&bundle->ports) != s->n_slaves) { + if (!ok || ovs_list_size(&bundle->ports) != s->n_members) { struct ofport_dpif *next_port; LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) { - for (i = 0; i < s->n_slaves; i++) { - if (s->slaves[i] == port->up.ofp_port) { + for (i = 0; i < s->n_members; i++) { + if (s->members[i] == port->up.ofp_port) { goto found; } } @@ -3303,7 +3303,7 @@ bundle_set(struct ofproto *ofproto_, void *aux, found: ; } } - ovs_assert(ovs_list_size(&bundle->ports) <= s->n_slaves); + ovs_assert(ovs_list_size(&bundle->ports) <= s->n_members); if (ovs_list_is_empty(&bundle->ports)) { bundle_destroy(bundle); @@ -3408,8 +3408,8 @@ bundle_set(struct ofproto *ofproto_, void *aux, } LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bond_slave_register(bundle->bond, port, - port->up.ofp_port, port->up.netdev); + bond_member_register(bundle->bond, port, + port->up.ofp_port, port->up.netdev); } } else { bond_unref(bundle->bond); @@ -3562,7 +3562,7 @@ bundle_run(struct ofbundle *bundle) struct ofport_dpif *port; LIST_FOR_EACH (port, bundle_node, &bundle->ports) { - bond_slave_set_may_enable(bundle->bond, port, port->up.may_enable); + bond_member_set_may_enable(bundle->bond, port, port->up.may_enable); } if (bond_run(bundle->bond, lacp_status(bundle->lacp))) { @@ -3808,7 +3808,7 @@ may_enable_port(struct ofport_dpif *ofport) /* If LACP is enabled, it must report that the link is enabled. */ if (ofport->bundle - && !lacp_slave_may_enable(ofport->bundle->lacp, ofport)) { + && !lacp_member_may_enable(ofport->bundle->lacp, ofport)) { return false; } @@ -3824,7 +3824,7 @@ port_run(struct ofport_dpif *ofport) ofport->carrier_seq = carrier_seq; if (carrier_changed && ofport->bundle) { - lacp_slave_carrier_changed(ofport->bundle->lacp, ofport, enable); + lacp_member_carrier_changed(ofport->bundle->lacp, ofport, enable); } if (enable) { @@ -3936,7 +3936,7 @@ port_del(struct ofproto *ofproto_, ofp_port_t ofp_port) /* The caller is going to close ofport->up.netdev. If this is a * bonded port, then the bond is using that netdev, so remove it * from the bond. The client will need to reconfigure everything - * after deleting ports, so then the slave will get re-added. */ + * after deleting ports, so then the member will get re-added. */ bundle_remove(&ofport->up); } } @@ -4020,11 +4020,12 @@ vport_get_status(const struct ofport *ofport_, char **errp) } static int -port_get_lacp_stats(const struct ofport *ofport_, struct lacp_slave_stats *stats) +port_get_lacp_stats(const struct ofport *ofport_, + struct lacp_member_stats *stats) { struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); if (ofport->bundle && ofport->bundle->lacp) { - if (lacp_get_slave_stats(ofport->bundle->lacp, ofport, stats)) { + if (lacp_get_member_stats(ofport->bundle->lacp, ofport, stats)) { return 0; } } @@ -4125,7 +4126,7 @@ port_is_lacp_current(const struct ofport *ofport_) { const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_); return (ofport->bundle && ofport->bundle->lacp - ? lacp_slave_is_current(ofport->bundle->lacp, ofport) + ? lacp_member_is_current(ofport->bundle->lacp, ofport) : -1); } diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index 1f5794f03..b41c3d82a 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -385,7 +385,7 @@ int ofproto_dpif_add_internal_flow(struct ofproto_dpif *, int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *, int priority); int ofproto_dpif_add_lb_output_buckets(struct ofproto_dpif *, uint32_t bond_id, - const ofp_port_t *slave_map); + const ofp_port_t *member_map); int ofproto_dpif_delete_lb_output_buckets(struct ofproto_dpif *, uint32_t bond_id); bool ovs_lb_output_action_supported(struct ofproto_dpif *); diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index afecb24cb..9ad2b71d2 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -1225,7 +1225,7 @@ struct ofproto_class { * not support LACP. */ int (*port_get_lacp_stats)(const struct ofport *port, - struct lacp_slave_stats *stats); + struct lacp_member_stats *stats); /* ## ----------------------- ## */ /* ## OpenFlow Rule Functions ## */ @@ -1707,11 +1707,11 @@ struct ofproto_class { /* If 's' is nonnull, this function registers a "bundle" associated with * client data pointer 'aux' in 'ofproto'. A bundle is the same concept as - * a Port in OVSDB, that is, it consists of one or more "slave" devices - * (Interfaces, in OVSDB) along with VLAN and LACP configuration and, if - * there is more than one slave, a bonding configuration. If 'aux' is - * already registered then this function updates its configuration to 's'. - * Otherwise, this function registers a new bundle. + * a Port in OVSDB, that is, it consists of one or more "member" + * devices (Interfaces, in OVSDB) along with VLAN and LACP configuration + * and, if there is more than one member, a bonding configuration. If 'aux' + * is already registered then this function updates its configuration to + * 's'. Otherwise, this function registers a new bundle. * * If 's' is NULL, this function unregisters the bundle registered on * 'ofproto' associated with client data pointer 'aux'. If no such bundle diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index 4a78fb575..b91517cd2 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1391,7 +1391,8 @@ ofproto_port_is_lacp_current(struct ofproto *ofproto, ofp_port_t ofp_port) } int -ofproto_port_get_lacp_stats(const struct ofport *port, struct lacp_slave_stats *stats) +ofproto_port_get_lacp_stats(const struct ofport *port, + struct lacp_member_stats *stats) { struct ofproto *ofproto = port->ofproto; int error; @@ -1409,8 +1410,8 @@ ofproto_port_get_lacp_stats(const struct ofport *port, struct lacp_slave_stats * /* Registers a "bundle" associated with client data pointer 'aux' in 'ofproto'. * A bundle is the same concept as a Port in OVSDB, that is, it consists of one - * or more "slave" devices (Interfaces, in OVSDB) along with a VLAN - * configuration plus, if there is more than one slave, a bonding + * or more "member" devices (Interfaces, in OVSDB) along with a VLAN + * configuration plus, if there is more than one member, a bonding * configuration. * * If 'aux' is already registered then this function updates its configuration diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h index 2dd253167..b0262da2d 100644 --- a/ofproto/ofproto.h +++ b/ofproto/ofproto.h @@ -388,7 +388,8 @@ bool ofproto_port_bfd_status_changed(struct ofproto *, ofp_port_t ofp_port); int ofproto_port_get_bfd_status(struct ofproto *, ofp_port_t ofp_port, struct smap *); int ofproto_port_is_lacp_current(struct ofproto *, ofp_port_t ofp_port); -int ofproto_port_get_lacp_stats(const struct ofport *, struct lacp_slave_stats *); +int ofproto_port_get_lacp_stats(const struct ofport *, + struct lacp_member_stats *); int ofproto_port_set_stp(struct ofproto *, ofp_port_t ofp_port, const struct ofproto_port_stp_settings *); int ofproto_port_get_stp_status(struct ofproto *, ofp_port_t ofp_port, @@ -441,8 +442,8 @@ enum port_priority_tags_mode { struct ofproto_bundle_settings { char *name; /* For use in log messages. */ - ofp_port_t *slaves; /* OpenFlow port numbers for slaves. */ - size_t n_slaves; + ofp_port_t *members; /* OpenFlow port numbers for members. */ + size_t n_members; enum port_vlan_mode vlan_mode; /* Selects mode for vlan and trunks */ uint16_t qinq_ethtype; @@ -452,10 +453,10 @@ struct ofproto_bundle_settings { enum port_priority_tags_mode use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */ - struct bond_settings *bond; /* Must be nonnull iff if n_slaves > 1. */ + struct bond_settings *bond; /* Must be nonnull iff if n_members > 1. */ struct lacp_settings *lacp; /* Nonnull to enable LACP. */ - struct lacp_slave_settings *lacp_slaves; /* Array of n_slaves elements. */ + struct lacp_member_settings *lacp_members; /* Array of n_members elements. */ bool protected; /* Protected port mode */ }; diff --git a/tests/bundle.at b/tests/bundle.at index 0a4eadc1e..2c2396cb8 100644 --- a/tests/bundle.at +++ b/tests/bundle.at @@ -9,7 +9,7 @@ AT_BANNER([bundle link selection]) AT_SETUP([hrw bundle link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5']], +AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],members:1,2,3,4,5']], [0], [ignore]) # 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 # 110000: disruption=0.50 (perfect=0.50) 0.50 0.50 0.00 0.00 0.00 0.00 @@ -80,7 +80,7 @@ AT_CLEANUP AT_SETUP([active_backup bundle link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,active_backup,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5,6']], +AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,active_backup,ofport,NXM_NX_REG0[],members:1,2,3,4,5,6']], [0], [100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 110000: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 @@ -152,7 +152,7 @@ AT_CLEANUP AT_SETUP([hrw bundle single link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:1']], +AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],members:1']], [0], [ignore]) # 1: disruption=1.00 (perfect=1.00) 1.00 # 0: disruption=1.00 (perfect=1.00) 0.00 @@ -161,7 +161,7 @@ AT_CLEANUP AT_SETUP([hrw bundle no link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:']], +AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],members:']], [0], [ignore]) AT_CLEANUP #: disruption=0.00 (perfect=0.00) @@ -176,29 +176,29 @@ AT_CLEANUP AT_SETUP([bundle action bad fields]) AT_KEYWORDS([bundle_action]) -AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(xyzzy,60,hrw,ofport,slaves:1,2))'], [1], [], - [ovs-ofctl: xyzzy,60,hrw,ofport,slaves:1,2: unknown fields `xyzzy' +AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(xyzzy,60,hrw,ofport,members:1,2))'], [1], [], + [ovs-ofctl: xyzzy,60,hrw,ofport,members:1,2: unknown fields `xyzzy' ]) AT_CLEANUP AT_SETUP([bundle action bad algorithm]) AT_KEYWORDS([bundle_action]) -AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,fubar,ofport,slaves:1,2))'], [1], [], - [ovs-ofctl: symmetric_l4,60,fubar,ofport,slaves:1,2: unknown algorithm `fubar' +AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,fubar,ofport,members:1,2))'], [1], [], + [ovs-ofctl: symmetric_l4,60,fubar,ofport,members:1,2: unknown algorithm `fubar' ]) AT_CLEANUP -AT_SETUP([bundle action bad slave type]) +AT_SETUP([bundle action bad member type]) AT_KEYWORDS([bundle_action]) -AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,robot,slaves:1,2))'], [1], [], - [ovs-ofctl: symmetric_l4,60,hrw,robot,slaves:1,2: unknown slave_type `robot' +AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,robot,members:1,2))'], [1], [], + [ovs-ofctl: symmetric_l4,60,hrw,robot,members:1,2: unknown member_type `robot' ]) AT_CLEANUP -AT_SETUP([bundle action bad slave delimiter]) +AT_SETUP([bundle action bad member delimiter]) AT_KEYWORDS([bundle_action]) AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,ofport,robot:1,2))'], [1], [], - [ovs-ofctl: symmetric_l4,60,hrw,ofport,robot:1,2: missing slave delimiter, expected `slaves' got `robot' + [ovs-ofctl: symmetric_l4,60,hrw,ofport,robot:1,2: missing member delimiter, expected `members', got `robot' ]) AT_CLEANUP @@ -211,9 +211,9 @@ dnl Valgrind warnings for use-after-free bugs. AT_SETUP([bundle action with many ports]) AT_KEYWORDS([bundle_action]) OVS_VSWITCHD_START -AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l4,0,hrw,ofport,slaves:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])']) +AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l4,0,hrw,ofport,members:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])']) AT_CHECK([ovs-ofctl dump-flows br0 --no-stats], [0], [dnl - actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l4,0,hrw,ofport,slaves:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) + actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l4,0,hrw,ofport,members:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) ]) OVS_VSWITCHD_STOP AT_CLEANUP @@ -226,7 +226,7 @@ OVS_VSWITCHD_START([dnl add-port br0 p2 -- set Interface p2 type=dummy -- \ set Interface p2 ofport_request=2 ]) -AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle(eth_src,50,active_backup,ofport,slaves:1,2)']) +AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle(eth_src,50,active_backup,ofport,members:1,2)']) AT_CHECK([ovs-ofctl mod-port br0 p1 up]) AT_CHECK([ovs-ofctl mod-port br0 p2 up]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=LOCAL,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06'], [0], [stdout]) @@ -264,7 +264,7 @@ OVS_VSWITCHD_START([dnl add-port br0 p2 -- set Interface p2 type=dummy -- \ set Interface p2 ofport_request=2 ]) -AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle_load(eth_src,50,hrw,ofport,OXM_OF_ETH_SRC[[0..15]],slaves:1,2)']) +AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle_load(eth_src,50,hrw,ofport,OXM_OF_ETH_SRC[[0..15]],members:1,2)']) AT_CHECK([ovs-ofctl mod-port br0 p1 down]) AT_CHECK([ovs-ofctl mod-port br0 p2 down]) AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=LOCAL,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06'], [0], [stdout]) @@ -276,7 +276,7 @@ AT_CLEANUP AT_SETUP([hrw bundle symmetric_l3 link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5']], +AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:1,2,3,4,5']], [0], [ignore]) # 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 # 110000: disruption=0.50 (perfect=0.50) 0.50 0.50 0.00 0.00 0.00 0.00 @@ -347,7 +347,7 @@ AT_CLEANUP AT_SETUP([active_backup bundle symmetric_l3 link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,active_backup,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5,6']], +AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,active_backup,ofport,NXM_NX_REG0[],members:1,2,3,4,5,6']], [0], [100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00 110000: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00 @@ -419,7 +419,7 @@ AT_CLEANUP AT_SETUP([hrw bundle symmetric_l3 single link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:1']], +AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:1']], [0], [ignore]) # 1: disruption=1.00 (perfect=1.00) 1.00 # 0: disruption=1.00 (perfect=1.00) 0.00 @@ -428,7 +428,7 @@ AT_CLEANUP AT_SETUP([hrw bundle symmetric_l3 single link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:1']], +AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:1']], [0], [ignore]) # 1: disruption=1.00 (perfect=1.00) 1.00 # 0: disruption=1.00 (perfect=1.00) 0.00 @@ -437,7 +437,7 @@ AT_CLEANUP AT_SETUP([hrw bundle symmetric_l3 no link selection]) AT_KEYWORDS([bundle_action]) -AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:']], +AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:']], [0], [ignore]) AT_CLEANUP #: disruption=0.00 (perfect=0.00) @@ -446,9 +446,9 @@ AT_CLEANUP AT_SETUP([bundle symmetric_l3 action with many ports]) AT_KEYWORDS([bundle_action]) OVS_VSWITCHD_START -AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l3,0,hrw,ofport,slaves:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])']) +AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l3,0,hrw,ofport,members:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])']) AT_CHECK([ovs-ofctl dump-flows br0 --no-stats], [0], [dnl - actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l3,0,hrw,ofport,slaves:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) + actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l3,0,hrw,ofport,members:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40) ]) OVS_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/lacp.at b/tests/lacp.at index 5257f0cce..f44331e85 100644 --- a/tests/lacp.at +++ b/tests/lacp.at @@ -5,9 +5,9 @@ m4_define([STRIP_RECIRC_ID], [[sed ' s/Recirc-ID.*$// ' ]]) -# Strips out active slave mac address since it may change over time. -m4_define([STRIP_ACTIVE_SLAVE_MAC], [[sed ' - s/active slave mac.*$// +# Strips out active member mac address since it may change over time. +m4_define([STRIP_ACTIVE_MEMBER_MAC], [[sed ' + s/active member mac.*$// ' ]]) AT_SETUP([lacp - config]) @@ -27,7 +27,7 @@ AT_CHECK([ovs-appctl lacp/show], [0], [dnl aggregation key: 1 lacp_time: slow -slave: p1: expired attached +member: p1: expired attached port_id: 1 port_priority: 65535 may_enable: false @@ -78,7 +78,7 @@ AT_CHECK([sed -e 's/aggregation key:.*/aggregation key: /' < stdout], [ aggregation key: lacp_time: fast -slave: p1: expired attached +member: p1: expired attached port_id: 11 port_priority: 111 may_enable: false @@ -97,7 +97,7 @@ slave: p1: expired attached partner key: 0 partner state: timeout -slave: p2: expired attached +member: p2: expired attached port_id: 22 port_priority: 222 may_enable: false @@ -127,12 +127,12 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: -active slave mac: 00:00:00:00:00:00(none) +active member mac: 00:00:00:00:00:00(none) -slave p1: disabled +member p1: disabled may_enable: false -slave p2: disabled +member p2: disabled may_enable: false ]) @@ -140,8 +140,8 @@ OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([lacp - negotiation]) -# Create bond0 on br0 with interfaces p0 and p1 -# and bond1 on br1 with interfaces p2 and p3 +# Create bond0 on br0 with members p0 and p1 +# and bond1 on br1 with members p2 and p3 # with p0 patched to p2 and p1 patched to p3. OVS_VSWITCHD_START( [add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ @@ -193,9 +193,9 @@ done AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC -ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [stdout]) -AT_CHECK([sed '/active slave/d' stdout], [0], [dnl +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [stdout]) +AT_CHECK([sed '/active member/d' stdout], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 @@ -203,7 +203,7 @@ AT_CHECK([sed '/active slave/d' stdout], [0], [dnl aggregation key: 2 lacp_time: fast -slave: p0: current attached +member: p0: current attached port_id: 1 port_priority: 65535 may_enable: true @@ -222,7 +222,7 @@ slave: p0: current attached partner key: 4 partner state: activity timeout aggregation synchronized collecting distributing -slave: p1: current attached +member: p1: current attached port_id: 2 port_priority: 65535 may_enable: true @@ -247,7 +247,7 @@ slave: p1: current attached aggregation key: 4 lacp_time: fast -slave: p2: current attached +member: p2: current attached port_id: 3 port_priority: 65535 may_enable: true @@ -266,7 +266,7 @@ slave: p2: current attached partner key: 2 partner state: activity timeout aggregation synchronized collecting distributing -slave: p3: current attached +member: p3: current attached port_id: 4 port_priority: 65535 may_enable: true @@ -295,10 +295,10 @@ lacp_status: negotiated lacp_fallback_ab: false active-backup primary: -slave p0: enabled +member p0: enabled may_enable: true -slave p1: enabled +member p1: enabled may_enable: true ---- bond1 ---- @@ -312,16 +312,16 @@ lacp_status: negotiated lacp_fallback_ab: false active-backup primary: -slave p2: enabled +member p2: enabled may_enable: true -slave p3: enabled +member p3: enabled may_enable: true ]) -AT_CHECK([grep 'active slave$' stdout], [0], [dnl - active slave - active slave +AT_CHECK([grep 'active member$' stdout], [0], [dnl + active member + active member ]) # Redirect the patch link between p0 and p2 so that no packets get @@ -335,8 +335,8 @@ ovs-appctl time/warp 4100 100 AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC -ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [dnl +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 @@ -344,7 +344,7 @@ ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [d aggregation key: 2 lacp_time: fast -slave: p0: expired attached +member: p0: expired attached port_id: 1 port_priority: 65535 may_enable: false @@ -363,7 +363,7 @@ slave: p0: expired attached partner key: 4 partner state: activity timeout aggregation collecting distributing -slave: p1: current attached +member: p1: current attached port_id: 2 port_priority: 65535 may_enable: true @@ -388,7 +388,7 @@ slave: p1: current attached aggregation key: 4 lacp_time: fast -slave: p2: expired attached +member: p2: expired attached port_id: 3 port_priority: 65535 may_enable: false @@ -407,7 +407,7 @@ slave: p2: expired attached partner key: 2 partner state: activity timeout aggregation collecting distributing -slave: p3: current attached +member: p3: current attached port_id: 4 port_priority: 65535 may_enable: true @@ -435,13 +435,13 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: - + -slave p0: disabled +member p0: disabled may_enable: false -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true ---- bond1 ---- @@ -454,13 +454,13 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: - + -slave p2: disabled +member p2: disabled may_enable: false -slave p3: enabled - active slave +member p3: enabled + active member may_enable: true ]) @@ -471,8 +471,8 @@ ovs-appctl time/warp 4100 100 AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC -ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [dnl +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 @@ -480,7 +480,7 @@ ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [d aggregation key: 2 lacp_time: fast -slave: p0: defaulted detached +member: p0: defaulted detached port_id: 1 port_priority: 65535 may_enable: false @@ -499,7 +499,7 @@ slave: p0: defaulted detached partner key: 0 partner state: -slave: p1: current attached +member: p1: current attached port_id: 2 port_priority: 65535 may_enable: true @@ -524,7 +524,7 @@ slave: p1: current attached aggregation key: 4 lacp_time: fast -slave: p2: defaulted detached +member: p2: defaulted detached port_id: 3 port_priority: 65535 may_enable: false @@ -543,7 +543,7 @@ slave: p2: defaulted detached partner key: 0 partner state: -slave: p3: current attached +member: p3: current attached port_id: 4 port_priority: 65535 may_enable: true @@ -571,13 +571,13 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: - + -slave p0: disabled +member p0: disabled may_enable: false -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true ---- bond1 ---- @@ -590,13 +590,13 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: - + -slave p2: disabled +member p2: disabled may_enable: false -slave p3: enabled - active slave +member p3: enabled + active member may_enable: true ]) @@ -612,8 +612,8 @@ ovs-appctl time/warp 30100 100 AT_CHECK( [ovs-appctl lacp/show bond0 ovs-appctl lacp/show bond1 -ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC -ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [dnl +ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC +ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [dnl ---- bond0 ---- status: active negotiated sys_id: aa:55:aa:55:00:00 @@ -621,7 +621,7 @@ ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [d aggregation key: 2 lacp_time: fast -slave: p0: current attached +member: p0: current attached port_id: 1 port_priority: 65535 may_enable: true @@ -640,7 +640,7 @@ slave: p0: current attached partner key: 4 partner state: activity timeout aggregation synchronized collecting distributing -slave: p1: current attached +member: p1: current attached port_id: 2 port_priority: 65535 may_enable: true @@ -665,7 +665,7 @@ slave: p1: current attached aggregation key: 4 lacp_time: fast -slave: p2: current attached +member: p2: current attached port_id: 3 port_priority: 65535 may_enable: true @@ -684,7 +684,7 @@ slave: p2: current attached partner key: 2 partner state: activity timeout aggregation synchronized collecting distributing -slave: p3: current attached +member: p3: current attached port_id: 4 port_priority: 65535 may_enable: true @@ -712,13 +712,13 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: - + -slave p0: enabled +member p0: enabled may_enable: true -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true ---- bond1 ---- @@ -731,13 +731,13 @@ downdelay: 0 ms lacp_status: negotiated lacp_fallback_ab: false active-backup primary: - + -slave p2: enabled +member p2: enabled may_enable: true -slave p3: enabled - active slave +member p3: enabled + active member may_enable: true ]) @@ -771,8 +771,8 @@ ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log # Set miss_send_len to 128, enabling port_status messages to our service connection. ovs-appctl -t ovs-ofctl ofctl/send 0409000c0123456700000080 -# Create bond0 on br0 with interfaces p0 and p1 -# and bond1 on br1 with interfaces p2 and p3 +# Create bond0 on br0 with members p0 and p1 +# and bond1 on br1 with members p2 and p3 # with p0 patched to p2 and p1 patched to p3. AT_CHECK([ovs-vsctl add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ @@ -866,8 +866,8 @@ ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log # Set miss_send_len to 128, enabling port_status messages to our service connection. ovs-appctl -t ovs-ofctl ofctl/send 0509000c0123456700000080 -# Create bond0 on br0 with interfaces p0 and p1 -# and bond1 on br1 with interfaces p2 and p3 +# Create bond0 on br0 with members p0 and p1 +# and bond1 on br1 with members p2 and p3 # with p0 patched to p2 and p1 patched to p3. AT_CHECK([ovs-vsctl add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ @@ -961,8 +961,8 @@ ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log # Set miss_send_len to 128, enabling port_status messages to our service connection. ovs-appctl -t ovs-ofctl ofctl/send 0609000c0123456700000080 -# Create bond0 on br0 with interfaces p0 and p1 -# and bond1 on br1 with interfaces p2 and p3 +# Create bond0 on br0 with members p0 and p1 +# and bond1 on br1 with members p2 and p3 # with p0 patched to p2 and p1 patched to p3. AT_CHECK([ovs-vsctl add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ other-config:lacp-time=fast \ diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at index c79d7d0e2..199db8ed0 100644 --- a/tests/ofp-actions.at +++ b/tests/ofp-actions.at @@ -80,11 +80,11 @@ ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffff0000ffff0000 # actions=multipath(eth_src,50,modulo_n,1,0,NXM_NX_REG0[]) ffff 0020 00002320 000a 0000 0032 0000 0000 0000 0000 0000 0000 001f 00010004 -# actions=bundle(eth_src,0,hrw,ofport,slaves:4,8) +# actions=bundle(eth_src,0,hrw,ofport,members:4,8) ffff 0028 00002320 000c 0001 0000 0000 00000002 0002 0000 00000000 00000000 dnl 0004 0008 00000000 -# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],slaves:4,8) +# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],members:4,8) ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl 0004 0008 00000000 @@ -444,11 +444,11 @@ ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff # actions=multipath(eth_src,50,modulo_n,1,0,NXM_NX_REG0[]) ffff 0020 00002320 000a 0000 0032 0000 0000 0000 0000 0000 0000 001f 00010004 -# actions=bundle(eth_src,0,hrw,ofport,slaves:4,8) +# actions=bundle(eth_src,0,hrw,ofport,members:4,8) ffff 0028 00002320 000c 0001 0000 0000 00000002 0002 0000 00000000 00000000 dnl 0004 0008 00000000 -# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],slaves:4,8) +# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],members:4,8) ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl 0004 0008 00000000 @@ -958,17 +958,17 @@ bad_action 'enqueue:asdf:123' 'asdf: enqueue to unknown port' # bundle bad_action 'bundle:123' '123: not enough arguments to bundle action' bad_action 'bundle(symmetric_l4,60,hrw,ofport,ports:1,2,3,4,5)' \ - "symmetric_l4,60,hrw,ofport,ports:1,2,3,4,5: missing slave delimiter, expected \`slaves' got \`ports'" -bad_action 'bundle(symmetric_l4,60,hrw,ofport,slaves:xyzzy,2,3,4,5)' \ + "symmetric_l4,60,hrw,ofport,ports:1,2,3,4,5: missing member delimiter, expected \`members', got \`ports'" +bad_action 'bundle(symmetric_l4,60,hrw,ofport,members:xyzzy,2,3,4,5)' \ 'xyzzy: bad port number' -bad_action 'bundle(asymmetric_l4,60,hrw,ofport,slaves:1,2,3,4,5)' \ - "asymmetric_l4,60,hrw,ofport,slaves:1,2,3,4,5: unknown fields \`asymmetric_l4'" -bad_action 'bundle(symmetric_l4,60,hrt,ofport,slaves:1,2,3,4,5)' \ - "symmetric_l4,60,hrt,ofport,slaves:1,2,3,4,5: unknown algorithm \`hrt'" -bad_action 'bundle(symmetric_l4,60,hrw,odpport,slaves:1,2,3,4,5)' \ - "symmetric_l4,60,hrw,odpport,slaves:1,2,3,4,5: unknown slave_type \`odpport'" -bad_action 'bundle_load(symmetric_l4,60,hrw,ofport,actset_output,slaves:1,2,3,4,5)' \ - "symmetric_l4,60,hrw,ofport,actset_output,slaves:1,2,3,4,5: experimenter OXM field 'actset_output' not supported" +bad_action 'bundle(asymmetric_l4,60,hrw,ofport,members:1,2,3,4,5)' \ + "asymmetric_l4,60,hrw,ofport,members:1,2,3,4,5: unknown fields \`asymmetric_l4'" +bad_action 'bundle(symmetric_l4,60,hrt,ofport,members:1,2,3,4,5)' \ + "symmetric_l4,60,hrt,ofport,members:1,2,3,4,5: unknown algorithm \`hrt'" +bad_action 'bundle(symmetric_l4,60,hrw,odpport,members:1,2,3,4,5)' \ + "symmetric_l4,60,hrw,odpport,members:1,2,3,4,5: unknown member_type \`odpport'" +bad_action 'bundle_load(symmetric_l4,60,hrw,ofport,actset_output,members:1,2,3,4,5)' \ + "symmetric_l4,60,hrw,ofport,actset_output,members:1,2,3,4,5: experimenter OXM field 'actset_output' not supported" # mod_vlan_vid bad_action 'mod_vlan_vid:6000' '6000: not a valid VLAN VID' diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 88dd434e4..31064ed95 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -31,8 +31,8 @@ AT_CLEANUP AT_SETUP([ofproto-dpif - active-backup bonding (with primary)]) -dnl Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and -dnl p2 (p1 as primary) and br1 with interfaces p3, p4 and p8. +dnl Create br0 with members p1, p2 and p7, creating bond0 with p1 and +dnl p2 (p1 as primary) and br1 with members p3, p4 and p8. dnl toggle p1,p2 of bond0 up and down to test bonding in active-backup mode. dnl With p1 down and p2 up/active, bring p1 back up. Since p1 is the primary, dnl it should become active. @@ -81,7 +81,7 @@ recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff: ovs-appctl netdev-dummy/set-admin-state p1 up ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, @@ -91,13 +91,13 @@ downdelay: 0 ms lacp_status: off lacp_fallback_ab: false active-backup primary: p1 - + -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true -slave p2: enabled +member p2: enabled may_enable: true ]) @@ -118,18 +118,18 @@ OVS_VSWITCHD_START( add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy --]) AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg]) -dnl Make sure the initial primary interface is set +dnl Make sure the initial primary member is set OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: p1'`"]) -dnl Down the primary interface and verify that we switched. Then +dnl Down the primary member and verify that we switched. Then dnl bring the primary back and verify that we switched back to the dnl primary. ovs-appctl netdev-dummy/set-admin-state p1 down ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'slave p1: disabled'`"]) +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"]) ovs-appctl netdev-dummy/set-admin-state p1 up ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, @@ -139,39 +139,39 @@ downdelay: 0 ms lacp_status: off lacp_fallback_ab: false active-backup primary: p1 - + -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true -slave p2: enabled +member p2: enabled may_enable: true -slave p3: enabled +member p3: enabled may_enable: true ]) dnl Now delete the primary and verify that the output shows that the -dnl primary is no longer enslaved +dnl primary is no longer an member ovs-vsctl --id=@p1 get Interface p1 -- remove Port bond0 interfaces @p1 ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'active-backup primary: p1 (no such slave)'`"]) +OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'active-backup primary: p1 (no such member)'`"]) dnl Now re-add the primary and verify that the output shows that the dnl primary is available again. dnl -dnl First, get the UUIDs of the interfaces that exist on bond0. +dnl First, get the UUIDs of the members that exist on bond0. dnl Strip the trailing ] so that we can add a new UUID to the end. uuids=`ovs-vsctl get Port bond0 interfaces | sed -e 's/]//'` -dnl Create a new port "p1" and add its UUID to the set of interfaces +dnl Create a new port "p1" and add its UUID to the set of members dnl on bond0. ovs-vsctl \ --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \ set Port bond0 interfaces="$uuids, @p1]" ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, @@ -181,16 +181,16 @@ downdelay: 0 ms lacp_status: off lacp_fallback_ab: false active-backup primary: p1 - + -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true -slave p2: enabled +member p2: enabled may_enable: true -slave p3: enabled +member p3: enabled may_enable: true ]) @@ -198,7 +198,7 @@ slave p3: enabled dnl Switch to another primary ovs-vsctl set port bond0 other_config:bond-primary=p2 ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, @@ -208,16 +208,16 @@ downdelay: 0 ms lacp_status: off lacp_fallback_ab: false active-backup primary: p2 - + -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true -slave p2: enabled +member p2: enabled may_enable: true -slave p3: enabled +member p3: enabled may_enable: true ]) @@ -225,7 +225,7 @@ slave p3: enabled dnl Remove the "bond-primary" config directive from the bond. AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary]) ovs-appctl time/warp 100 -OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC], [0], [dnl +OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl ---- bond0 ---- bond_mode: active-backup bond may use recirculation: no, @@ -235,16 +235,16 @@ downdelay: 0 ms lacp_status: off lacp_fallback_ab: false active-backup primary: - + -slave p1: enabled - active slave +member p1: enabled + active member may_enable: true -slave p2: enabled +member p2: enabled may_enable: true -slave p3: enabled +member p3: enabled may_enable: true ]) @@ -253,8 +253,8 @@ OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - active-backup bonding (without primary)]) -dnl Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2 -dnl and br1 with interfaces p3, p4 and p8. +dnl Create br0 with members p1, p2 and p7, creating bond0 with p1 and p2 +dnl and br1 with members p3, p4 and p8. dnl toggle p1,p2 of bond0 up and down to test bonding in active-backup mode. OVS_VSWITCHD_START( [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\ @@ -300,8 +300,8 @@ OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - balance-slb bonding]) -# Create br0 with interfaces bond0(p1, p2, p3) and p7, -# and br1 with interfaces p4, p5, p6 and p8. +# Create br0 with members bond0(p1, p2, p3) and p7, +# and br1 with members p4, p5, p6 and p8. # p1 <-> p4, p2 <-> p5, p3 <-> p6 # Send some traffic, make sure the traffic are spread based on source mac. OVS_VSWITCHD_START( @@ -343,8 +343,8 @@ OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif - balance-tcp bonding]) -# Create br0 with interfaces bond0(p1, p2, p3) and p7, -# and br1 with interfaces bond1(p4, p5, p6) and p8. +# Create br0 with members bond0(p1, p2, p3) and p7, +# and br1 with members bond1(p4, p5, p6) and p8. # bond0 <-> bond1 # Send some traffic, make sure the traffic are spread based on L4 headers. OVS_VSWITCHD_START( @@ -2185,7 +2185,7 @@ cookie=0xd dl_src=60:66:66:66:00:02 actions=pop_mpls:0x0800,load:0xa000001->OXM_ cookie=0xd dl_src=60:66:66:66:00:03 actions=pop_mpls:0x0800,move:OXM_OF_IPV4_DST[[]]->OXM_OF_IPV4_SRC[[]],controller cookie=0xd dl_src=60:66:66:66:00:04 actions=pop_mpls:0x0800,push:OXM_OF_IPV4_DST[[]],pop:OXM_OF_IPV4_SRC[[]],controller cookie=0xd dl_src=60:66:66:66:00:05 actions=pop_mpls:0x0800,multipath(eth_src,50,modulo_n,1,0,OXM_OF_IPV4_SRC[[0..7]]),controller -cookie=0xd dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,OXM_OF_IPV4_SRC[[0..15]],slaves:1,2),controller +cookie=0xd dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,OXM_OF_IPV4_SRC[[0..15]],members:1,2),controller cookie=0xd dl_src=60:66:66:66:00:07 actions=pop_mpls:0x0800,learn(table=1,hard_timeout=60,eth_type=0x800,nw_proto=6,OXM_OF_IPV4_SRC[[]]=OXM_OF_IPV4_DST[[]]),controller cookie=0xd dl_src=60:66:66:66:00:08 actions=pop_mpls:0x0806,resubmit(1,1) @@ -3183,7 +3183,7 @@ AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:03 actions=pop_mpls:0x0800,move:NXM_OF_IP_DST[[]]->NXM_OF_IP_SRC[[]],CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:04 actions=pop_mpls:0x0800,push:NXM_OF_IP_DST[[]],pop:NXM_OF_IP_SRC[[]],CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:05 actions=pop_mpls:0x0800,multipath(eth_src,50,modulo_n,1,0,NXM_OF_IP_SRC[[0..7]]),CONTROLLER:65535 - cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,NXM_OF_IP_SRC[[0..15]],slaves:1,2),CONTROLLER:65535 + cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,NXM_OF_IP_SRC[[0..15]],members:1,2),CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:07 actions=pop_mpls:0x0800,learn(table=1,hard_timeout=60,eth_type=0x800,nw_proto=6,NXM_OF_IP_SRC[[]]=NXM_OF_IP_DST[[]]),CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:09 actions=resubmit(,2),CONTROLLER:65535 cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:0a actions=pop_mpls:0x0800,mod_nw_dst:10.0.0.1,CONTROLLER:65535 @@ -8634,8 +8634,8 @@ OVS_VSWITCHD_STOP AT_CLEANUP AT_SETUP([ofproto-dpif megaflow - normal, balance-tcp bonding]) -# Create bond0 on br0 with interfaces p0 and p1 -# and bond1 on br1 with interfaces p2 and p3 +# Create bond0 on br0 with members p0 and p1 +# and bond1 on br1 with members p2 and p3 # with p0 patched to p2 and p1 patched to p3. OVS_VSWITCHD_START( [add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \ @@ -9023,7 +9023,7 @@ AT_CHECK([ovs-appctl bfd/show | sed -n '/^.*Session State:.*/p'], [0], [dnl Local Session State: up Remote Session State: up ]) -# bond/show should show 'may-enable: true' for all slaves. +# bond/show should show 'may-enable: true' for all members. AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl may_enable: true may_enable: true @@ -11027,7 +11027,7 @@ AT_CHECK([ovs-vsctl add-port br0 p2 -- set int p2 type=dummy mtu_request=1600]) AT_CHECK([ovs-vsctl wait-until Interface p2 mtu=1600]) AT_CHECK([ovs-vsctl wait-until Interface br0 mtu=1600]) -# Explicitly set mtu_request on the internal interface. This should prevent +# Explicitly set mtu_request on the internal member. This should prevent # the MTU from being overriden. AT_CHECK([ovs-vsctl set int br0 mtu_request=1700]) AT_CHECK([ovs-vsctl wait-until Interface br0 mtu=1700]) diff --git a/tests/ofproto.at b/tests/ofproto.at index f56673625..08c0a20b6 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -6322,12 +6322,12 @@ AT_CHECK([strip_xids < stderr | sed '/FLOW_MOD/,$d'], [0], [dnl OFPT_ERROR: OFPBAC_BAD_SET_LEN ]) -AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata1[[0..31]], slaves:4,8)"], [1], [], [stderr]) +AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata1[[0..31]], members:4,8)"], [1], [], [stderr]) AT_CHECK([strip_xids < stderr | sed '/FLOW_MOD/,$d'], [0], [dnl OFPT_ERROR: NXFMFC_INVALID_TLV_FIELD ]) -AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata0[[32..63]], slaves:4,8)"], [1], [], [stderr]) +AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata0[[32..63]], members:4,8)"], [1], [], [stderr]) AT_CHECK([strip_xids < stderr | sed '/FLOW_MOD/,$d'], [0], [dnl OFPT_ERROR: OFPBAC_BAD_SET_LEN ]) diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index c8062c8ac..b6951f404 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -413,20 +413,20 @@ actions=multipath(eth_src, 50, hrw, 12, 0, NXM_NX_REG0[0..3]),multipath(symmetri table=1,actions=drop tun_id=0x1234000056780000/0xffff0000ffff0000,actions=drop metadata=0x1234ffff5678ffff/0xffff0000ffff0000,actions=drop -actions=bundle(eth_src,50,active_backup,ofport,slaves:1) -actions=bundle(symmetric_l4,60,hrw,ofport,slaves:2,3) -actions=bundle(symmetric_l4,60,hrw,ofport,slaves:) -actions=bundle(symmetric_l3,60,hrw,ofport,slaves:2,3) -actions=bundle(symmetric_l3,60,hrw,ofport,slaves:) -actions=output:1,bundle(eth_src,0,hrw,ofport,slaves:1),output:2 -actions=bundle_load(eth_src,50,active_backup,ofport,reg0,slaves:1) -actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) -actions=bundle_load(symmetric_l4,60,hrw,ofport,reg0[0..15],slaves:[2,3]) -actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:) -actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) -actions=bundle_load(symmetric_l3,60,hrw,ofport,reg0[0..15],slaves:[2,3]) -actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:) -actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],slaves:1),output:2 +actions=bundle(eth_src,50,active_backup,ofport,members:1) +actions=bundle(symmetric_l4,60,hrw,ofport,members:2,3) +actions=bundle(symmetric_l4,60,hrw,ofport,members:) +actions=bundle(symmetric_l3,60,hrw,ofport,members:2,3) +actions=bundle(symmetric_l3,60,hrw,ofport,members:) +actions=output:1,bundle(eth_src,0,hrw,ofport,members:1),output:2 +actions=bundle_load(eth_src,50,active_backup,ofport,reg0,members:1) +actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3) +actions=bundle_load(symmetric_l4,60,hrw,ofport,reg0[0..15],members:[2,3]) +actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],members:) +actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3) +actions=bundle_load(symmetric_l3,60,hrw,ofport,reg0[0..15],members:[2,3]) +actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],members:) +actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],members:1),output:2 actions=resubmit:1,resubmit(2),resubmit(,3),resubmit(2,3) send_flow_rem,actions=output:1,output:NXM_NX_REG0,output:2,output:reg1[16..31],output:3 check_overlap,actions=output:1,exit,output:2 @@ -469,20 +469,20 @@ NXT_FLOW_MOD: ADD table:255 actions=multipath(eth_src,50,hrw,12,0,NXM_NX_REG0[0. NXT_FLOW_MOD: ADD table:1 actions=drop NXT_FLOW_MOD: ADD table:255 tun_id=0x1234000056780000/0xffff0000ffff0000 actions=drop NXT_FLOW_MOD: ADD table:255 metadata=0x1234000056780000/0xffff0000ffff0000 actions=drop -NXT_FLOW_MOD: ADD table:255 actions=bundle(eth_src,50,active_backup,ofport,slaves:1) -NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,slaves:2,3) -NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,slaves:) -NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,slaves:2,3) -NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,slaves:) -NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle(eth_src,0,hrw,ofport,slaves:1),output:2 -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(eth_src,50,active_backup,ofport,NXM_NX_REG0[],slaves:1) -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:) -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3) -NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:) -NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],slaves:1),output:2 +NXT_FLOW_MOD: ADD table:255 actions=bundle(eth_src,50,active_backup,ofport,members:1) +NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,members:2,3) +NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,members:) +NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,members:2,3) +NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,members:) +NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle(eth_src,0,hrw,ofport,members:1),output:2 +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(eth_src,50,active_backup,ofport,NXM_NX_REG0[],members:1) +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3) +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3) +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],members:) +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3) +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3) +NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],members:) +NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],members:1),output:2 NXT_FLOW_MOD: ADD table:255 actions=resubmit:1,resubmit:2,resubmit(,3),resubmit(2,3) NXT_FLOW_MOD: ADD table:255 send_flow_rem actions=output:1,output:NXM_NX_REG0[],output:2,output:NXM_NX_REG1[16..31],output:3 NXT_FLOW_MOD: ADD table:255 check_overlap actions=output:1,exit,output:2 diff --git a/tests/test-bundle.c b/tests/test-bundle.c index 124ad5b43..53f78e86f 100644 --- a/tests/test-bundle.c +++ b/tests/test-bundle.c @@ -25,28 +25,28 @@ #include "util.h" #define N_FLOWS 50000 -#define MAX_SLAVES 8 /* Maximum supported by this test framework. */ +#define MAX_MEMBERS 8 /* Maximum supported by this test framework. */ -struct slave { - ofp_port_t slave_id; +struct member { + ofp_port_t member_id; bool enabled; size_t flow_count; }; -struct slave_group { - size_t n_slaves; - struct slave slaves[MAX_SLAVES]; +struct member_group { + size_t n_members; + struct member members[MAX_MEMBERS]; }; -static struct slave * -slave_lookup(struct slave_group *sg, ofp_port_t slave_id) +static struct member * +member_lookup(struct member_group *sg, ofp_port_t member_id) { size_t i; - for (i = 0; i < sg->n_slaves; i++) { - if (sg->slaves[i].slave_id == slave_id) { - return &sg->slaves[i]; + for (i = 0; i < sg->n_members; i++) { + if (sg->members[i].member_id == member_id) { + return &sg->members[i]; } } @@ -54,12 +54,12 @@ slave_lookup(struct slave_group *sg, ofp_port_t slave_id) } static bool -slave_enabled_cb(ofp_port_t slave_id, void *aux) +member_enabled_cb(ofp_port_t member_id, void *aux) { - struct slave *slave; + struct member *member; - slave = slave_lookup(aux, slave_id); - return slave ? slave->enabled : false; + member = member_lookup(aux, member_id); + return member ? member->enabled : false; } static struct ofpact_bundle * @@ -80,8 +80,8 @@ parse_bundle_actions(char *actions) bundle = ofpact_get_BUNDLE(xmemdup(action, action->len)); ofpbuf_uninit(&ofpacts); - if (bundle->n_slaves > MAX_SLAVES) { - ovs_fatal(0, "At most %u slaves are supported", MAX_SLAVES); + if (bundle->n_members > MAX_MEMBERS) { + ovs_fatal(0, "At most %u members are supported", MAX_MEMBERS); } return bundle; @@ -109,7 +109,7 @@ test_bundle_main(int argc, char *argv[]) struct ofpact_bundle *bundle; struct flow *flows; size_t i, n_permute, old_n_enabled; - struct slave_group sg; + struct member_group sg; int old_active; set_program_name(argv[0]); @@ -120,17 +120,17 @@ test_bundle_main(int argc, char *argv[]) bundle = parse_bundle_actions(argv[1]); - /* Generate 'slaves' array. */ - sg.n_slaves = 0; - for (i = 0; i < bundle->n_slaves; i++) { - ofp_port_t slave_id = bundle->slaves[i]; + /* Generate 'members' array. */ + sg.n_members = 0; + for (i = 0; i < bundle->n_members; i++) { + ofp_port_t member_id = bundle->members[i]; - if (slave_lookup(&sg, slave_id)) { - ovs_fatal(0, "Redundant slaves are not supported. "); + if (member_lookup(&sg, member_id)) { + ovs_fatal(0, "Redundant members are not supported. "); } - sg.slaves[sg.n_slaves].slave_id = slave_id; - sg.n_slaves++; + sg.members[sg.n_members].member_id = member_id; + sg.n_members++; } /* Generate flows. */ @@ -141,14 +141,14 @@ test_bundle_main(int argc, char *argv[]) } /* Cycles through each possible liveness permutation for the given - * n_slaves. The initial state is equivalent to all slaves down, so we + * n_members. The initial state is equivalent to all members down, so we * skip it by starting at i = 1. We do one extra iteration to cover * transitioning from the final state back to the initial state. */ old_n_enabled = 0; old_active = -1; - n_permute = 1 << sg.n_slaves; + n_permute = 1 << sg.n_members; for (i = 1; i <= n_permute + 1; i++) { - struct slave *slave; + struct member *member; size_t j, n_enabled, changed; double disruption, perfect; uint8_t mask; @@ -156,27 +156,27 @@ test_bundle_main(int argc, char *argv[]) mask = i % n_permute; - /* Gray coding ensures that in each iteration exactly one slave + /* Gray coding ensures that in each iteration exactly one member * changes its liveness. This makes the expected disruption a bit * easier to calculate, and is likely similar to how failures will be * experienced in the wild. */ mask = mask ^ (mask >> 1); - /* Initialize slaves. */ + /* Initialize members. */ n_enabled = 0; - for (j = 0; j < sg.n_slaves; j++) { - slave = &sg.slaves[j]; - slave->flow_count = 0; - slave->enabled = ((1 << j) & mask) != 0; + for (j = 0; j < sg.n_members; j++) { + member = &sg.members[j]; + member->flow_count = 0; + member->enabled = ((1 << j) & mask) != 0; - if (slave->enabled) { + if (member->enabled) { n_enabled++; } } active = -1; - for (j = 0; j < sg.n_slaves; j++) { - if (sg.slaves[j].enabled) { + for (j = 0; j < sg.n_members; j++) { + if (sg.members[j].enabled) { active = j; break; } @@ -185,19 +185,19 @@ test_bundle_main(int argc, char *argv[]) changed = 0; for (j = 0; j < N_FLOWS; j++) { struct flow *flow = &flows[j]; - ofp_port_t old_slave_id, ofp_port; + ofp_port_t old_member_id, ofp_port; struct flow_wildcards wc; - old_slave_id = u16_to_ofp(flow->regs[0]); - ofp_port = bundle_execute(bundle, flow, &wc, slave_enabled_cb, + old_member_id = u16_to_ofp(flow->regs[0]); + ofp_port = bundle_execute(bundle, flow, &wc, member_enabled_cb, &sg); flow->regs[0] = ofp_to_u16(ofp_port); if (ofp_port != OFPP_NONE) { - slave_lookup(&sg, ofp_port)->flow_count++; + member_lookup(&sg, ofp_port)->flow_count++; } - if (old_slave_id != ofp_port) { + if (old_member_id != ofp_port) { changed++; } } @@ -208,23 +208,23 @@ test_bundle_main(int argc, char *argv[]) if (old_n_enabled || n_enabled) { perfect = 1.0 / MAX(old_n_enabled, n_enabled); } else { - /* This will happen when 'sg.n_slaves' is 0. */ + /* This will happen when 'sg.n_members' is 0. */ perfect = 0; } } disruption = changed / (double)N_FLOWS; printf("%s: disruption=%.2f (perfect=%.2f)", - mask_str(mask, sg.n_slaves), disruption, perfect); + mask_str(mask, sg.n_members), disruption, perfect); - for (j = 0 ; j < sg.n_slaves; j++) { - slave = &sg.slaves[j]; + for (j = 0 ; j < sg.n_members; j++) { + member = &sg.members[j]; double flow_percent; - flow_percent = slave->flow_count / (double)N_FLOWS; + flow_percent = member->flow_count / (double)N_FLOWS; printf( " %.2f", flow_percent); - if (slave->enabled) { + if (member->enabled) { double perfect_fp; if (bundle->algorithm == NX_BD_ALG_ACTIVE_BACKUP) { @@ -234,16 +234,16 @@ test_bundle_main(int argc, char *argv[]) } if (fabs(flow_percent - perfect_fp) >= .01) { - fprintf(stderr, "%s: slave %d: flow_percentage=%.5f for" + fprintf(stderr, "%s: member %d: flow_percentage=%.5f for" " differs from perfect=%.5f by more than .01\n", - mask_str(mask, sg.n_slaves), slave->slave_id, + mask_str(mask, sg.n_members), member->member_id, flow_percent, perfect_fp); ok = false; } - } else if (slave->flow_count) { - fprintf(stderr, "%s: slave %d: disabled slave received" - " flows.\n", mask_str(mask, sg.n_slaves), - slave->slave_id); + } else if (member->flow_count) { + fprintf(stderr, "%s: member %d: disabled member received" + " flows.\n", mask_str(mask, sg.n_members), + member->member_id); ok = false; } } @@ -251,7 +251,7 @@ test_bundle_main(int argc, char *argv[]) if (fabs(disruption - perfect) >= .01) { fprintf(stderr, "%s: disruption=%.5f differs from perfect=%.5f by" - " more than .01\n", mask_str(mask, sg.n_slaves), + " more than .01\n", mask_str(mask, sg.n_members), disruption, perfect); ok = false; } diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c index 103b821bb..5ed7e8234 100644 --- a/vswitchd/bridge.c +++ b/vswitchd/bridge.c @@ -330,7 +330,8 @@ static void mirror_destroy(struct mirror *); static bool mirror_configure(struct mirror *); static void mirror_refresh_stats(struct mirror *); -static void iface_configure_lacp(struct iface *, struct lacp_slave_settings *); +static void iface_configure_lacp(struct iface *, + struct lacp_member_settings *); static bool iface_create(struct bridge *, const struct ovsrec_interface *, const struct ovsrec_port *); static bool iface_is_internal(const struct ovsrec_interface *iface, @@ -1197,11 +1198,11 @@ port_configure(struct port *port) /* Get name. */ s.name = port->name; - /* Get slaves. */ - s.n_slaves = 0; - s.slaves = xmalloc(ovs_list_size(&port->ifaces) * sizeof *s.slaves); + /* Get members. */ + s.n_members = 0; + s.members = xmalloc(ovs_list_size(&port->ifaces) * sizeof *s.members); LIST_FOR_EACH (iface, port_elem, &port->ifaces) { - s.slaves[s.n_slaves++] = iface->ofp_port; + s.members[s.n_members++] = iface->ofp_port; } /* Get VLAN tag. */ @@ -1270,16 +1271,16 @@ port_configure(struct port *port) if (s.lacp) { size_t i = 0; - s.lacp_slaves = xmalloc(s.n_slaves * sizeof *s.lacp_slaves); + s.lacp_members = xmalloc(s.n_members * sizeof *s.lacp_members); LIST_FOR_EACH (iface, port_elem, &port->ifaces) { - iface_configure_lacp(iface, &s.lacp_slaves[i++]); + iface_configure_lacp(iface, &s.lacp_members[i++]); } } else { - s.lacp_slaves = NULL; + s.lacp_members = NULL; } /* Get bond settings. */ - if (s.n_slaves > 1) { + if (s.n_members > 1) { s.bond = &bond_settings; port_configure_bond(port, &bond_settings); } else { @@ -1297,9 +1298,9 @@ port_configure(struct port *port) /* Clean up. */ free(s.cvlans); - free(s.slaves); + free(s.members); free(s.trunks); - free(s.lacp_slaves); + free(s.lacp_members); } /* Pick local port hardware address and datapath ID for 'br'. */ @@ -2277,8 +2278,8 @@ find_local_hw_addr(const struct bridge *br, struct eth_addr *ea, } else { /* Choose the interface whose MAC address will represent the port. * The Linux kernel bonding code always chooses the MAC address of - * the first slave added to a bond, and the Fedora networking - * scripts always add slaves to a bond in alphabetical order, so + * the first member added to a bond, and the Fedora networking + * scripts always add members to a bond in alphabetical order, so * for compatibility we choose the interface with the name that is * first in alphabetical order. */ LIST_FOR_EACH (candidate, port_elem, &port->ifaces) { @@ -2961,7 +2962,7 @@ port_refresh_bond_status(struct port *port, bool force_update) return; } - if (bond_get_changed_active_slave(port->name, &mac, force_update)) { + if (bond_get_changed_active_member(port->name, &mac, force_update)) { struct ds mac_s; ds_init(&mac_s); @@ -4505,7 +4506,7 @@ port_configure_lacp(struct port *port, struct lacp_settings *s) } static void -iface_configure_lacp(struct iface *iface, struct lacp_slave_settings *s) +iface_configure_lacp(struct iface *iface, struct lacp_member_settings *s) { int priority, portid, key; @@ -4601,9 +4602,9 @@ port_configure_bond(struct port *port, struct bond_settings *s) mac_s = port->cfg->bond_active_slave; if (!mac_s || !ovs_scan(mac_s, ETH_ADDR_SCAN_FMT, - ETH_ADDR_SCAN_ARGS(s->active_slave_mac))) { + ETH_ADDR_SCAN_ARGS(s->active_member_mac))) { /* OVSDB did not store the last active interface */ - s->active_slave_mac = eth_addr_zero; + s->active_member_mac = eth_addr_zero; } /* lb_output action is disabled by default. */ diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in index c06452928..50dad7208 100644 --- a/vswitchd/ovs-vswitchd.8.in +++ b/vswitchd/ovs-vswitchd.8.in @@ -198,46 +198,46 @@ These commands manage bonded ports on an Open vSwitch's bridges. To understand some of these commands, it is important to understand a detail of the bonding implementation called ``source load balancing'' (SLB). Instead of directly assigning Ethernet source addresses to -slaves, the bonding implementation computes a function that maps an +members, the bonding implementation computes a function that maps an 48-bit Ethernet source addresses into an 8-bit value (a ``MAC hash'' value). All of the Ethernet addresses that map to a single 8-bit -value are then assigned to a single slave. +value are then assigned to a single member. .IP "\fBbond/list\fR" -Lists all of the bonds, and their slaves, on each bridge. +Lists all of the bonds, and their members, on each bridge. . .IP "\fBbond/show\fR [\fIport\fR]" Lists all of the bond-specific information (updelay, downdelay, time until the next rebalance) about the given bonded \fIport\fR, or all bonded ports if no \fIport\fR is given. Also lists information about -each slave: whether it is enabled or disabled, the time to completion +each members: whether it is enabled or disabled, the time to completion of an updelay or downdelay if one is in progress, whether it is the -active slave, the hashes assigned to the slave. Any LACP information +active member, the hashes assigned to the member. Any LACP information related to this bond may be found using the \fBlacp/show\fR command. . -.IP "\fBbond/migrate\fR \fIport\fR \fIhash\fR \fIslave\fR" -Only valid for SLB bonds. Assigns a given MAC hash to a new slave. +.IP "\fBbond/migrate\fR \fIport\fR \fIhash\fR \fImember\fR" +Only valid for SLB bonds. Assigns a given MAC hash to a new member. \fIport\fR specifies the bond port, \fIhash\fR the MAC hash to be -migrated (as a decimal number between 0 and 255), and \fIslave\fR the -new slave to be assigned. +migrated (as a decimal number between 0 and 255), and \fImember\fR the +new member to be assigned. .IP The reassignment is not permanent: rebalancing or fail-over will -cause the MAC hash to be shifted to a new slave in the usual +cause the MAC hash to be shifted to a new member in the usual manner. .IP -A MAC hash cannot be migrated to a disabled slave. -.IP "\fBbond/set\-active\-slave\fR \fIport\fR \fIslave\fR" -Sets \fIslave\fR as the active slave on \fIport\fR. \fIslave\fR must +A MAC hash cannot be migrated to a disabled member. +.IP "\fBbond/set\-active\-member\fR \fIport\fR \fImember\fR" +Sets \fImember\fR as the active member on \fIport\fR. \fImember\fR must currently be enabled. .IP -The setting is not permanent: a new active slave will be selected -if \fIslave\fR becomes disabled. -.IP "\fBbond/enable\-slave\fR \fIport\fR \fIslave\fR" -.IQ "\fBbond/disable\-slave\fR \fIport\fR \fIslave\fR" -Enables (or disables) \fIslave\fR on the given bond \fIport\fR, skipping any +The setting is not permanent: a new active member will be selected +if \fImember\fR becomes disabled. +.IP "\fBbond/enable\-member\fR \fIport\fR \fImember\fR" +.IQ "\fBbond/disable\-member\fR \fIport\fR \fImember\fR" +Enables (or disables) \fImember\fR on the given bond \fIport\fR, skipping any updelay (or downdelay). .IP This setting is not permanent: it persists only until the carrier -status of \fIslave\fR changes. +status of \fImember\fR changes. .IP "\fBbond/hash\fR \fImac\fR [\fIvlan\fR] [\fIbasis\fR]" Returns the hash value which would be used for \fImac\fR with \fIvlan\fR and \fIbasis\fR if specified. @@ -245,7 +245,7 @@ and \fIbasis\fR if specified. .IP "\fBlacp/show\fR [\fIport\fR]" Lists all of the LACP related information about the given \fIport\fR: active or passive, aggregation key, system id, and system priority. Also -lists information about each slave: whether it is enabled or disabled, +lists information about each member: whether it is enabled or disabled, whether it is attached or detached, port id and priority, actor information, and partner information. If \fIport\fR is not specified, then displays detailed information about all interfaces with CFM @@ -253,7 +253,7 @@ enabled. . .IP "\fBlacp/stats-show\fR [\fIport\fR]" Lists various stats about LACP PDUs (number of RX/TX PDUs, bad PDUs received) -and slave state (number of time slave's state expired/defaulted and carrier +and member state (number of times its state expired/defaulted and carrier status changed) for the given \fIport\fR. If \fIport\fR is not specified, then displays stats of all interfaces with LACP enabled. .SS "DPCTL DATAPATH DEBUGGING COMMANDS" diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index a6b70a2f9..d0890b843 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -1949,15 +1949,16 @@
    balance-slb
    - Balances flows among slaves based on source MAC address and output - VLAN, with periodic rebalancing as traffic patterns change. + Balances flows among members based on source MAC address and + output VLAN, with periodic rebalancing as traffic patterns change.
    active-backup
    - Assigns all flows to one slave, failing over to a backup slave when - the active slave is disabled. This is the only bonding mode in which - interfaces may be plugged into different upstream switches. + Assigns all flows to one member, failing over to a backup + member when the active member is disabled. This is the + only bonding mode in which interfaces may be plugged into different + upstream switches.
    @@ -1971,8 +1972,8 @@
    balance-tcp
    - Balances flows among slaves based on L3 and L4 protocol information - such as IP addresses and TCP/UDP ports. + Balances flows among members based on L3 and L4 protocol + information such as IP addresses and TCP/UDP ports.
    @@ -1987,20 +1988,20 @@ - An integer hashed along with flows when choosing output slaves in load - balanced bonds. When changed, all flows will be assigned different - hash values possibly causing slave selection decisions to change. Does - not affect bonding modes which do not employ load balancing such as - active-backup. + An integer hashed along with flows when choosing output members + in load balanced bonds. When changed, all flows will be assigned + different hash values possibly causing member selection + decisions to change. Does not affect bonding modes which do not employ + load balancing such as active-backup. Enable/disable usage of optimized lb_output action for - balancing flows among output slaves in load balanced bonds in + balancing flows among output members in load balanced bonds in balance-tcp. When enabled, it uses optimized path for - balance-tcp mode by using rss hash and avoids recirculation. - This knob does not affect other balancing modes. + balance-tcp mode by using rss hash and avoids recirculation. This knob + does not affect other balancing modes. - For a bonded port, record the mac address of the current active slave. + For a bonded port, record the MAC address of the current active + member. @@ -2480,7 +2482,8 @@
  • For the local interface, the default is the lowest-numbered MAC address among the other bridge ports, either the value of the in its record, - if set, or its actual MAC (for bonded ports, the MAC of its slave + if set, or its actual MAC (for bonded ports, the MAC of its + member whose name is first in alphabetical order). Internal ports and bridge ports that are used as port mirroring destinations (see the table) are ignored.
  • -- GitLab From 04d140664a272fdbdd5352162ea9719b9c77cafe Mon Sep 17 00:00:00 2001 From: David Marchand Date: Thu, 19 Mar 2020 08:32:40 +0100 Subject: [PATCH 342/432] travis: Fix kernel download retry. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wget stops retrying to download a file when hitting fatal http errors like 503. But if a previous try had resulted in a partially downloaded ${file}, the next wget call tries to download to ${file}.1. Example: +wget https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.16.18.tar.xz --2020-03-18 20:51:42-- https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.16.18.tar.xz Resolving cdn.kernel.org (cdn.kernel.org)... 151.101.1.176, 151.101.65.176, 151.101.129.176, ... Connecting to cdn.kernel.org (cdn.kernel.org)|151.101.1.176|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 103076276 (98M) [application/x-xz] Saving to: ‘linux-4.16.18.tar.xz’ linux-4.16.18.tar.x 0%[ ] 13.07K --.-KB/s in 0s 2020-03-18 20:54:44 (133 MB/s) - Read error at byte 13383/103076276 (Connection reset by peer). Retrying. --2020-03-18 20:54:45-- (try: 2) https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.16.18.tar.xz Connecting to cdn.kernel.org (cdn.kernel.org)|151.101.1.176|:443... connected. HTTP request sent, awaiting response... 503 first byte timeout 2020-03-18 20:55:46 ERROR 503: first byte timeout. +wget https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.16.18.tar.xz --2020-03-18 20:55:46-- https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.16.18.tar.xz Resolving cdn.kernel.org (cdn.kernel.org)... 151.101.1.176, 151.101.65.176, 151.101.129.176, ... Connecting to cdn.kernel.org (cdn.kernel.org)|151.101.1.176|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 103076276 (98M) [application/x-xz] Saving to: ‘linux-4.16.18.tar.xz.1’ linux-4.16.18.tar.x 100%[===================>] 98.30M 186MB/s in 0.5s 2020-03-18 20:55:56 (186 MB/s) - ‘linux-4.16.18.tar.xz.1’ saved [103076276/103076276] Fixes: 048674b45f4b ("travis: Retry kernel download on 503 first byte timeout.") Signed-off-by: David Marchand Acked-by: Kevin Traynor Signed-off-by: Ilya Maximets --- .travis/linux-build.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 6981d1d47..60d8931f3 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -34,7 +34,9 @@ function install_kernel() url="${base_url}/linux-${version}.tar.xz" # Download kernel sources. Try direct link on CDN failure. - wget ${url} || wget ${url} || wget ${url/cdn/www} + wget ${url} || + (rm -f linux-${version}.tar.xz && wget ${url}) || + (rm -f linux-${version}.tar.xz && wget ${url/cdn/www}) tar xvf linux-${version}.tar.xz > /dev/null pushd linux-${version} -- GitLab From dd8ca104acd76497f380828ca31e2bd6f49907ca Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Wed, 14 Oct 2020 13:31:04 +0300 Subject: [PATCH 343/432] netdev-tc-offloads: Don't delete ufid mapping if fail to delete filter tc_replace_flower may fail, so the return value must be checked. If not zero, ufid can't be deleted. Otherwise the operations on this filter may fail because its ufid is not found. Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 18ff380f9..e828a8683 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -198,7 +198,9 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid) int err; err = tc_del_filter(id); - del_ufid_tc_mapping(ufid); + if (!err) { + del_ufid_tc_mapping(ufid); + } return err; } -- GitLab From 6182c695cb9aee1e2a6e672dcf5528a3f467ca1a Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 20 Oct 2020 13:00:20 +0200 Subject: [PATCH 344/432] raft: Report jsonrpc backlog in kilobytes. While sending snapshots backlog on raft connections could quickly grow over 4GB and this will overflow raft-backlog counter. Let's report it in kB instead. (Using kB and not KB to match with ru_maxrss counter reported by kernel) Fixes: 3423cd97f88f ("ovsdb: Add raft memory usage to memory report.") Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- ovsdb/raft.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 708b0624c..3411323aa 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -1020,13 +1020,14 @@ void raft_get_memory_usage(const struct raft *raft, struct simap *usage) { struct raft_conn *conn; + uint64_t backlog = 0; int cnt = 0; LIST_FOR_EACH (conn, list_node, &raft->conns) { - simap_increase(usage, "raft-backlog", - jsonrpc_session_get_backlog(conn->js)); + backlog += jsonrpc_session_get_backlog(conn->js); cnt++; } + simap_increase(usage, "raft-backlog-kB", backlog / 1000); simap_increase(usage, "raft-connections", cnt); } -- GitLab From a87d827ecb993f3aa9913d9736542ed613ebb25c Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 20 Oct 2020 11:54:36 +0200 Subject: [PATCH 345/432] NEWS: Move GTP-U entry to correct release. GTP-U support was released in 2.14, not 2.13. Fixes: 3c6d05a02e0f ("userspace: Add GTP-U support.") Acked-by: Greg Rose Signed-off-by: Ilya Maximets --- NEWS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index 0a7a8f7fb..8bb5bdc3f 100644 --- a/NEWS +++ b/NEWS @@ -59,6 +59,9 @@ v2.14.0 - 17 Aug 2020 - Tunnels: TC Flower offload * Tunnel Local endpoint address masked match are supported. * Tunnel Romte endpoint address masked match are supported. + - GTP-U Tunnel Protocol + * Add two new fields: tun_gtpu_flags, tun_gtpu_msgtype. + * Only support for userspace datapath. v2.13.0 - 14 Feb 2020 @@ -106,9 +109,6 @@ v2.13.0 - 14 Feb 2020 - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and type filter supports new filters: "dpdk" and "partially-offloaded". - - GTP-U Tunnel Protocol - * Add two new fields: tun_gtpu_flags, tun_gtpu_msgtype. - * Only support for userspace datapath. v2.12.0 - 03 Sep 2019 --------------------- -- GitLab From 50f603dc4bf09125e924d850a11078068b7d68b0 Mon Sep 17 00:00:00 2001 From: Leonid Ryzhyk Date: Wed, 28 Nov 2018 18:41:34 -0800 Subject: [PATCH 346/432] packets: Un-inline functions needed by DDlog. DDlog uses these functions from Rust, but Rust can't use inline functions (since it doesn't compile C headers but only links against a C-compatible ABI). Thus, move the implementations of these functions to a .c file. I don't think any of these functions is likely to be an important part of a "fast path" in OVS, but if that's wrong, then we could take another approach. Signed-off-by: Leonid Ryzhyk Co-authored-by: Ben Pfaff Signed-off-by: Ben Pfaff Acked-by: Numan Siddique --- lib/packets.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/packets.h | 97 ++++++--------------------------------------------- 2 files changed, 108 insertions(+), 86 deletions(-) diff --git a/lib/packets.c b/lib/packets.c index 9d7cc5024..4a7643c5d 100644 --- a/lib/packets.c +++ b/lib/packets.c @@ -75,6 +75,29 @@ dpid_from_string(const char *s, uint64_t *dpidp) return *dpidp != 0; } +uint64_t +eth_addr_to_uint64(const struct eth_addr ea) +{ + return (((uint64_t) ntohs(ea.be16[0]) << 32) + | ((uint64_t) ntohs(ea.be16[1]) << 16) + | ntohs(ea.be16[2])); +} + +void +eth_addr_from_uint64(uint64_t x, struct eth_addr *ea) +{ + ea->be16[0] = htons(x >> 32); + ea->be16[1] = htons((x & 0xFFFF0000) >> 16); + ea->be16[2] = htons(x & 0xFFFF); +} + +void +eth_addr_mark_random(struct eth_addr *ea) +{ + ea->ea[0] &= ~1; /* Unicast. */ + ea->ea[0] |= 2; /* Private. */ +} + /* Returns true if 'ea' is a reserved address, that a bridge must never * forward, false otherwise. * @@ -524,6 +547,79 @@ eth_format_masked(const struct eth_addr eth, } } +void +in6_addr_solicited_node(struct in6_addr *addr, const struct in6_addr *ip6) +{ + union ovs_16aligned_in6_addr *taddr = + (union ovs_16aligned_in6_addr *) addr; + memset(taddr->be16, 0, sizeof(taddr->be16)); + taddr->be16[0] = htons(0xff02); + taddr->be16[5] = htons(0x1); + taddr->be16[6] = htons(0xff00); + memcpy(&addr->s6_addr[13], &ip6->s6_addr[13], 3); +} + +/* + * Generates ipv6 EUI64 address from the given eth addr + * and prefix and stores it in 'lla' + */ +void +in6_generate_eui64(struct eth_addr ea, const struct in6_addr *prefix, + struct in6_addr *lla) +{ + union ovs_16aligned_in6_addr *taddr = + (union ovs_16aligned_in6_addr *) lla; + union ovs_16aligned_in6_addr *prefix_taddr = + (union ovs_16aligned_in6_addr *) prefix; + taddr->be16[0] = prefix_taddr->be16[0]; + taddr->be16[1] = prefix_taddr->be16[1]; + taddr->be16[2] = prefix_taddr->be16[2]; + taddr->be16[3] = prefix_taddr->be16[3]; + taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]); + taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff); + taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]); + taddr->be16[7] = ea.be16[2]; +} + +/* Generates ipv6 link local address from the given eth addr + * with prefix 'fe80::/64' and stores it in 'lla'. */ +void +in6_generate_lla(struct eth_addr ea, struct in6_addr *lla) +{ + union ovs_16aligned_in6_addr *taddr = + (union ovs_16aligned_in6_addr *) lla; + memset(taddr->be16, 0, sizeof(taddr->be16)); + taddr->be16[0] = htons(0xfe80); + taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]); + taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff); + taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]); + taddr->be16[7] = ea.be16[2]; +} + +/* Returns true if 'addr' is a link local address. Otherwise, false. */ +bool +in6_is_lla(struct in6_addr *addr) +{ +#ifdef s6_addr32 + return addr->s6_addr32[0] == htonl(0xfe800000) && !(addr->s6_addr32[1]); +#else + return addr->s6_addr[0] == 0xfe && addr->s6_addr[1] == 0x80 && + !(addr->s6_addr[2] | addr->s6_addr[3] | addr->s6_addr[4] | + addr->s6_addr[5] | addr->s6_addr[6] | addr->s6_addr[7]); +#endif +} + +void +ipv6_multicast_to_ethernet(struct eth_addr *eth, const struct in6_addr *ip6) +{ + eth->ea[0] = 0x33; + eth->ea[1] = 0x33; + eth->ea[2] = ip6->s6_addr[12]; + eth->ea[3] = ip6->s6_addr[13]; + eth->ea[4] = ip6->s6_addr[14]; + eth->ea[5] = ip6->s6_addr[15]; +} + /* Given the IP netmask 'netmask', returns the number of bits of the IP address * that it specifies, that is, the number of 1-bits in 'netmask'. * @@ -957,6 +1053,7 @@ eth_compose(struct dp_packet *b, const struct eth_addr eth_dst, void *data; struct eth_header *eth; + dp_packet_clear(b); /* The magic 2 here ensures that the L3 header (when it is added later) diff --git a/lib/packets.h b/lib/packets.h index 395bc869e..481bc22fa 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -281,12 +281,7 @@ static inline bool eth_addr_equal_except(const struct eth_addr a, || ((a.be16[2] ^ b.be16[2]) & mask.be16[2])); } -static inline uint64_t eth_addr_to_uint64(const struct eth_addr ea) -{ - return (((uint64_t) ntohs(ea.be16[0]) << 32) - | ((uint64_t) ntohs(ea.be16[1]) << 16) - | ntohs(ea.be16[2])); -} +uint64_t eth_addr_to_uint64(const struct eth_addr ea); static inline uint64_t eth_addr_vlan_to_uint64(const struct eth_addr ea, uint16_t vlan) @@ -294,12 +289,7 @@ static inline uint64_t eth_addr_vlan_to_uint64(const struct eth_addr ea, return (((uint64_t)vlan << 48) | eth_addr_to_uint64(ea)); } -static inline void eth_addr_from_uint64(uint64_t x, struct eth_addr *ea) -{ - ea->be16[0] = htons(x >> 32); - ea->be16[1] = htons((x & 0xFFFF0000) >> 16); - ea->be16[2] = htons(x & 0xFFFF); -} +void eth_addr_from_uint64(uint64_t x, struct eth_addr *ea); static inline struct eth_addr eth_addr_invert(const struct eth_addr src) { @@ -312,11 +302,7 @@ static inline struct eth_addr eth_addr_invert(const struct eth_addr src) return dst; } -static inline void eth_addr_mark_random(struct eth_addr *ea) -{ - ea->ea[0] &= ~1; /* Unicast. */ - ea->ea[0] |= 2; /* Private. */ -} +void eth_addr_mark_random(struct eth_addr *ea); static inline void eth_addr_random(struct eth_addr *ea) { @@ -1211,80 +1197,19 @@ in6_addr_get_mapped_ipv4(const struct in6_addr *addr) } } -static inline void -in6_addr_solicited_node(struct in6_addr *addr, const struct in6_addr *ip6) -{ - union ovs_16aligned_in6_addr *taddr = - (union ovs_16aligned_in6_addr *) addr; - memset(taddr->be16, 0, sizeof(taddr->be16)); - taddr->be16[0] = htons(0xff02); - taddr->be16[5] = htons(0x1); - taddr->be16[6] = htons(0xff00); - memcpy(&addr->s6_addr[13], &ip6->s6_addr[13], 3); -} +void in6_addr_solicited_node(struct in6_addr *addr, + const struct in6_addr *ip6); -/* - * Generates ipv6 EUI64 address from the given eth addr - * and prefix and stores it in 'lla' - */ -static inline void -in6_generate_eui64(struct eth_addr ea, struct in6_addr *prefix, - struct in6_addr *lla) -{ - union ovs_16aligned_in6_addr *taddr = - (union ovs_16aligned_in6_addr *) lla; - union ovs_16aligned_in6_addr *prefix_taddr = - (union ovs_16aligned_in6_addr *) prefix; - taddr->be16[0] = prefix_taddr->be16[0]; - taddr->be16[1] = prefix_taddr->be16[1]; - taddr->be16[2] = prefix_taddr->be16[2]; - taddr->be16[3] = prefix_taddr->be16[3]; - taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]); - taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff); - taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]); - taddr->be16[7] = ea.be16[2]; -} +void in6_generate_eui64(struct eth_addr ea, const struct in6_addr *prefix, + struct in6_addr *lla); -/* - * Generates ipv6 link local address from the given eth addr - * with prefix 'fe80::/64' and stores it in 'lla' - */ -static inline void -in6_generate_lla(struct eth_addr ea, struct in6_addr *lla) -{ - union ovs_16aligned_in6_addr *taddr = - (union ovs_16aligned_in6_addr *) lla; - memset(taddr->be16, 0, sizeof(taddr->be16)); - taddr->be16[0] = htons(0xfe80); - taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]); - taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff); - taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]); - taddr->be16[7] = ea.be16[2]; -} +void in6_generate_lla(struct eth_addr ea, struct in6_addr *lla); /* Returns true if 'addr' is a link local address. Otherwise, false. */ -static inline bool -in6_is_lla(struct in6_addr *addr) -{ -#ifdef s6_addr32 - return addr->s6_addr32[0] == htonl(0xfe800000) && !(addr->s6_addr32[1]); -#else - return addr->s6_addr[0] == 0xfe && addr->s6_addr[1] == 0x80 && - !(addr->s6_addr[2] | addr->s6_addr[3] | addr->s6_addr[4] | - addr->s6_addr[5] | addr->s6_addr[6] | addr->s6_addr[7]); -#endif -} +bool in6_is_lla(struct in6_addr *addr); -static inline void -ipv6_multicast_to_ethernet(struct eth_addr *eth, const struct in6_addr *ip6) -{ - eth->ea[0] = 0x33; - eth->ea[1] = 0x33; - eth->ea[2] = ip6->s6_addr[12]; - eth->ea[3] = ip6->s6_addr[13]; - eth->ea[4] = ip6->s6_addr[14]; - eth->ea[5] = ip6->s6_addr[15]; -} +void ipv6_multicast_to_ethernet(struct eth_addr *eth, + const struct in6_addr *ip6); static inline bool dl_type_is_ip_any(ovs_be16 dl_type) { -- GitLab From 91bdb33e041ef3e91b19a2a2e1562b95685c37d1 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 23 Oct 2020 20:20:04 +0200 Subject: [PATCH 347/432] raft: Fix error leak on failure while saving snapshot. Error should be destroyed before return. Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/raft.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 3411323aa..728d60175 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -3987,7 +3987,7 @@ raft_handle_install_snapshot_request__( struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start, &new_snapshot); if (error) { - char *error_s = ovsdb_error_to_string(error); + char *error_s = ovsdb_error_to_string_free(error); VLOG_WARN("could not save snapshot: %s", error_s); free(error_s); return false; -- GitLab From 93023e80bd13ec1f09831eba484cf4621582d1a5 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sun, 25 Oct 2020 01:08:03 +0200 Subject: [PATCH 348/432] raft: Avoid annoying debug logs if raft is connected. If debug logs enabled, "raft_is_connected: true" printed on every call to raft_is_connected() which is way too frequently. These messages are not very informative and only litters the log. Let's log only disconnected state in a rate-limited way and only log positive case once at the moment cluster becomes connected. Fixes: 923f01cad678 ("raft.c: Set candidate_retrying if no leader elected since last election.") Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- ovsdb/raft.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 728d60175..657eed813 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -1044,13 +1044,22 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage) bool raft_is_connected(const struct raft *raft) { + static bool last_state = false; bool ret = (!raft->candidate_retrying && !raft->joining && !raft->leaving && !raft->left && !raft->failed && raft->ever_had_leader); - VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false"); + + if (!ret) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_DBG_RL(&rl, "raft_is_connected: false"); + } else if (!last_state) { + VLOG_DBG("raft_is_connected: true"); + } + last_state = ret; + return ret; } -- GitLab From 3630ab86f4912192b91fe8db526eecc5872f6864 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 15 May 2020 09:36:37 -0700 Subject: [PATCH 349/432] ovsdb-idl: Add comment with program name to ovsdb_idl_loop transactions. This can make it easier to see what daemon is committing transactions. Sometimes, in OVN especially, it can be hard to guess. Signed-off-by: Ben Pfaff Acked-by: Dumitru Ceara --- lib/ovsdb-idl.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index d8f221ca6..fdb9d85f5 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -5521,6 +5521,9 @@ ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop) || ovsdb_idl_get_seqno(loop->idl) == loop->skip_seqno ? NULL : ovsdb_idl_txn_create(loop->idl)); + if (loop->open_txn) { + ovsdb_idl_txn_add_comment(loop->open_txn, "%s", program_name); + } return loop->open_txn; } -- GitLab From 7e38188160294df43dbbbc0cf6cfd42d02881fcf Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sun, 25 Oct 2020 01:16:09 +0200 Subject: [PATCH 350/432] raft: Add log length to the memory report. In many cases a big part of a memory consumed by ovsdb-server process is a raft log, so it's important to add its length to the memory report. Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- ovsdb/raft.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 657eed813..ac85c6b67 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -1029,6 +1029,7 @@ raft_get_memory_usage(const struct raft *raft, struct simap *usage) } simap_increase(usage, "raft-backlog-kB", backlog / 1000); simap_increase(usage, "raft-connections", cnt); + simap_increase(usage, "raft-log", raft->log_end - raft->log_start); } /* Returns true if 'raft' has completed joining its cluster, has not left or -- GitLab From f38f98a2c0dd7fcaf20fbe11d1e67a9b2afc0b2a Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sat, 24 Oct 2020 02:25:48 +0200 Subject: [PATCH 351/432] ovsdb-server: Reclaim heap memory after compaction. Compaction happens at most once in 10 minutes. That is a big time interval for a heavy loaded ovsdb-server in cluster mode. In 10 minutes raft logs could grow up to tens of thousands of entries with tens of gigabytes in total size. While compaction cleans up raft log entries, the memory in many cases is not returned to the system, but kept in the heap of running ovsdb-server process, and it could stay in this condition for a really long time. In the end one performance spike could lead to a fast growth of the raft log and this memory will never (for a really long time) be released to the system even if the database if empty. Simple example how to reproduce with OVN sandbox: 1. make sandbox SANDBOXFLAGS='--nbdb-model=clustered --sbdb-model=clustered' 2. Run following script that creates 1 port group, adds 4000 acls and removes all of that in the end: # cat ../memory-test.sh pg_name=my_port_group export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach --log-file -vsocket_util:off) ovn-nbctl pg-add $pg_name for i in $(seq 1 4000); do echo "Iteration: $i" ovn-nbctl --log acl-add $pg_name from-lport $i udp drop done ovn-nbctl acl-del $pg_name ovn-nbctl pg-del $pg_name ovs-appctl -t $(pwd)/sandbox/nb1 memory/show ovn-appctl -t ovn-nbctl exit --- 3. Stopping one of Northbound DB servers: ovs-appctl -t $(pwd)/sandbox/nb1 exit Make sure that ovsdb-server didn't compact the database before it was stopped. Now we have a db file on disk that contains 4000 fairly big transactions inside. 4. Trying to start same ovsdb-server with this file. # cd sandbox && ovsdb-server <...> nb1.db At this point ovsdb-server reads all the transactions from db file and performs all of them as fast as it can one by one. When it finishes this, raft log contains 4000 entries and ovsdb-server consumes (on my system) ~13GB of memory while database is empty. And libc will likely never return this memory back to system, or, at least, will hold it for a really long time. This patch adds a new command 'ovsdb-server/memory-trim-on-compaction'. It's disabled by default, but once enabled, ovsdb-server will call 'malloc_trim(0)' after every successful compaction to try to return unused heap memory back to system. This is glibc-specific, so we need to detect function availability in a build time. Disabled by default since it adds from 1% to 30% (depending on the current state) to the snapshot creation time and, also, next memory allocations will likely require requests to kernel and that might be slower. Could be enabled by default later if considered broadly beneficial. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1888829 Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- NEWS | 3 +++ configure.ac | 1 + ovsdb/ovsdb-server.1.in | 4 ++++ ovsdb/ovsdb-server.c | 41 +++++++++++++++++++++++++++++++++++++++-- ovsdb/ovsdb.c | 12 +++++++++++- ovsdb/ovsdb.h | 3 ++- 6 files changed, 60 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 8bb5bdc3f..2860a8e9c 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,9 @@ Post-v2.14.0 - OVSDB: * New unixctl command 'ovsdb-server/get-db-storage-status' to show the status of the storage that's backing a database. + * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'. + If turned on, ovsdb-server will try to reclaim all the unused memory + after every DB compaction back to OS. Disabled by default. - DPDK: * Removed support for vhost-user dequeue zero-copy. - The environment variable OVS_UNBOUND_CONF, if set, is now used diff --git a/configure.ac b/configure.ac index 8d37af9db..126a1d9d1 100644 --- a/configure.ac +++ b/configure.ac @@ -100,6 +100,7 @@ OVS_CHECK_IF_DL OVS_CHECK_STRTOK_R OVS_CHECK_LINUX_AF_XDP AC_CHECK_DECLS([sys_siglist], [], [], [[#include ]]) +AC_CHECK_DECLS([malloc_trim], [], [], [[#include ]]) AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec], [], [], [[#include ]]) AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include ]]) diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in index 6667553df..07a36cc7d 100644 --- a/ovsdb/ovsdb-server.1.in +++ b/ovsdb/ovsdb-server.1.in @@ -206,6 +206,10 @@ but not before 100 commits have been added or 10 minutes have elapsed since the last compaction. It will also be compacted automatically after 24 hours since the last compaction if 100 commits were added regardless of its size. +.IP "\fBovsdb\-server/memory-trim-on-compaction\fR \fIon\fR|\fIoff\fR" +If this option is \fIon\fR, ovsdb-server will try to reclaim all unused +heap memory back to the system after each successful database compaction +to reduce the memory consumption of the process. \fIoff\fR by default. . .IP "\fBovsdb\-server/reconnect\fR" Makes \fBovsdb\-server\fR drop all of the JSON\-RPC diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 73a155b3f..0e60e2b87 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -76,8 +76,12 @@ static char *ssl_protocols; static char *ssl_ciphers; static bool bootstrap_ca_cert; +/* Try to reclaim heap memory back to system after DB compaction. */ +static bool trim_memory = false; + static unixctl_cb_func ovsdb_server_exit; static unixctl_cb_func ovsdb_server_compact; +static unixctl_cb_func ovsdb_server_memory_trim_on_compaction; static unixctl_cb_func ovsdb_server_reconnect; static unixctl_cb_func ovsdb_server_perf_counters_clear; static unixctl_cb_func ovsdb_server_perf_counters_show; @@ -243,7 +247,7 @@ main_loop(struct server_config *config, xasprintf("removing database %s because storage " "disconnected permanently", node->name)); } else if (ovsdb_storage_should_snapshot(db->db->storage)) { - log_and_free_error(ovsdb_snapshot(db->db)); + log_and_free_error(ovsdb_snapshot(db->db, trim_memory)); } } if (run_process) { @@ -410,6 +414,9 @@ main(int argc, char *argv[]) unixctl_command_register("exit", "", 0, 0, ovsdb_server_exit, &exiting); unixctl_command_register("ovsdb-server/compact", "", 0, 1, ovsdb_server_compact, &all_dbs); + unixctl_command_register("ovsdb-server/memory-trim-on-compaction", + "on|off", 1, 1, + ovsdb_server_memory_trim_on_compaction, NULL); unixctl_command_register("ovsdb-server/reconnect", "", 0, 0, ovsdb_server_reconnect, jsonrpc); @@ -1492,7 +1499,8 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc, VLOG_INFO("compacting %s database by user request", node->name); - struct ovsdb_error *error = ovsdb_snapshot(db->db); + struct ovsdb_error *error = ovsdb_snapshot(db->db, + trim_memory); if (error) { char *s = ovsdb_error_to_string(error); ds_put_format(&reply, "%s\n", s); @@ -1515,6 +1523,35 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc, ds_destroy(&reply); } +/* "ovsdb-server/memory-trim-on-compaction": controls whether ovsdb-server + * tries to reclaim heap memory back to system using malloc_trim() after + * compaction. */ +static void +ovsdb_server_memory_trim_on_compaction(struct unixctl_conn *conn, + int argc OVS_UNUSED, + const char *argv[], + void *arg OVS_UNUSED) +{ + const char *command = argv[1]; + +#if !HAVE_DECL_MALLOC_TRIM + unixctl_command_reply_error(conn, "memory trimming is not supported"); + return; +#endif + + if (!strcmp(command, "on")) { + trim_memory = true; + } else if (!strcmp(command, "off")) { + trim_memory = false; + } else { + unixctl_command_reply_error(conn, "invalid argument"); + return; + } + VLOG_INFO("memory trimming after compaction %s.", + trim_memory ? "enabled" : "disabled"); + unixctl_command_reply(conn, NULL); +} + /* "ovsdb-server/reconnect": makes ovsdb-server drop all of its JSON-RPC * connections and reconnect. */ static void diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c index 2da117cb3..9042658fa 100644 --- a/ovsdb/ovsdb.c +++ b/ovsdb/ovsdb.c @@ -17,6 +17,10 @@ #include "ovsdb.h" +#if HAVE_DECL_MALLOC_TRIM +#include +#endif + #include "column.h" #include "file.h" #include "monitor.h" @@ -515,7 +519,7 @@ ovsdb_get_table(const struct ovsdb *db, const char *name) } struct ovsdb_error * OVS_WARN_UNUSED_RESULT -ovsdb_snapshot(struct ovsdb *db) +ovsdb_snapshot(struct ovsdb *db, bool trim_memory OVS_UNUSED) { if (!db->storage) { return NULL; @@ -527,6 +531,12 @@ ovsdb_snapshot(struct ovsdb *db) schema, data); json_destroy(schema); json_destroy(data); + +#if HAVE_DECL_MALLOC_TRIM + if (!error && trim_memory) { + malloc_trim(0); + } +#endif return error; } diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h index 5c30a83d9..72e127c84 100644 --- a/ovsdb/ovsdb.h +++ b/ovsdb/ovsdb.h @@ -112,7 +112,8 @@ struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *, long long int elapsed_msec, long long int *timeout_msec); -struct ovsdb_error *ovsdb_snapshot(struct ovsdb *) OVS_WARN_UNUSED_RESULT; +struct ovsdb_error *ovsdb_snapshot(struct ovsdb *, bool trim_memory) + OVS_WARN_UNUSED_RESULT; void ovsdb_replace(struct ovsdb *dst, struct ovsdb *src); -- GitLab From 83fbd2e9dc5d85bee43b7597d4c3b403d4d1c484 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 20 Oct 2020 18:22:25 +0200 Subject: [PATCH 352/432] raft: Avoid having more than one snapshot in-flight. Previous commit 8c2c503bdb0d ("raft: Avoid sending equal snapshots.") took a "safe" approach to not send only exactly same snapshot installation requests. However, it doesn't make much sense to send more than one snapshot at a time. If obsolete snapshot installed, leader will re-send the most recent one. With this change leader will have only 1 snapshot in-flight per connection. This will reduce backlogs on raft connections in case new snapshot created while 'install_snapshot_request' is in progress or if election timer changed in that period. Also, not tracking the exact 'install_snapshot_request' we've sent allows to simplify the code. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1888829 Fixes: 8c2c503bdb0d ("raft: Avoid sending equal snapshots.") Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- ovsdb/raft-private.c | 1 - ovsdb/raft-private.h | 4 ++-- ovsdb/raft.c | 42 ++++++++++++++++-------------------------- 3 files changed, 18 insertions(+), 29 deletions(-) diff --git a/ovsdb/raft-private.c b/ovsdb/raft-private.c index 9468fdaf4..26d39a087 100644 --- a/ovsdb/raft-private.c +++ b/ovsdb/raft-private.c @@ -137,7 +137,6 @@ raft_server_destroy(struct raft_server *s) if (s) { free(s->address); free(s->nickname); - free(s->last_install_snapshot_request); free(s); } } diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h index 1f366b4ab..76b097b89 100644 --- a/ovsdb/raft-private.h +++ b/ovsdb/raft-private.h @@ -84,8 +84,8 @@ struct raft_server { bool replied; /* Reply to append_request was received from this node during current election_timeout interval. */ - /* Copy of the last install_snapshot_request sent to this server. */ - struct raft_install_snapshot_request *last_install_snapshot_request; + /* install_snapshot_request has been sent, but there is no response yet. */ + bool install_snapshot_request_in_progress; /* For use in adding and removing servers: */ struct uuid requester_sid; /* Nonzero if requested via RPC. */ diff --git a/ovsdb/raft.c b/ovsdb/raft.c index ac85c6b67..f94a3eed8 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -1437,12 +1437,11 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn) && jsonrpc_session_is_connected(conn->js)); if (reconnected) { - /* Clear 'last_install_snapshot_request' since it might not reach the - * destination or server was restarted. */ + /* Clear 'install_snapshot_request_in_progress' since it might not + * reach the destination or server was restarted. */ struct raft_server *server = raft_find_server(raft, &conn->sid); if (server) { - free(server->last_install_snapshot_request); - server->last_install_snapshot_request = NULL; + server->install_snapshot_request_in_progress = false; } } @@ -2564,6 +2563,7 @@ raft_server_init_leader(struct raft *raft, struct raft_server *s) s->match_index = 0; s->phase = RAFT_PHASE_STABLE; s->replied = false; + s->install_snapshot_request_in_progress = false; } static void @@ -3320,31 +3320,19 @@ raft_send_install_snapshot_request(struct raft *raft, } }; - if (s->last_install_snapshot_request) { - struct raft_install_snapshot_request *old, *new; - - old = s->last_install_snapshot_request; - new = &rpc.install_snapshot_request; - if ( old->term == new->term - && old->last_index == new->last_index - && old->last_term == new->last_term - && old->last_servers == new->last_servers - && old->data == new->data - && old->election_timer == new->election_timer - && uuid_equals(&old->last_eid, &new->last_eid)) { - static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + if (s->install_snapshot_request_in_progress) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); - VLOG_WARN_RL(&rl, "not sending exact same install_snapshot_request" - " to server %s again", s->nickname); - return; - } + VLOG_INFO_RL(&rl, "not sending snapshot to server %s, " + "already in progress", s->nickname); + return; } - free(s->last_install_snapshot_request); - CONST_CAST(struct raft_server *, s)->last_install_snapshot_request - = xmemdup(&rpc.install_snapshot_request, - sizeof rpc.install_snapshot_request); - raft_send(raft, &rpc); + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); + VLOG_INFO_RL(&rl, "sending snapshot to server %s, %"PRIu64":%"PRIu64".", + s->nickname, raft->term, raft->log_start - 1); + CONST_CAST(struct raft_server *, s)->install_snapshot_request_in_progress + = raft_send(raft, &rpc); } static void @@ -4061,6 +4049,8 @@ raft_handle_install_snapshot_reply( } } + s->install_snapshot_request_in_progress = false; + if (rpy->last_index != raft->log_start - 1 || rpy->last_term != raft->snap.term) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5); -- GitLab From 2eebece5a3b5fe8b5d546f13c7d79af29324bf99 Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Tue, 3 Nov 2020 11:12:26 +0100 Subject: [PATCH 353/432] Documentation: Fix rendering of extra repo info for RHEL 8. In commit a82083ee3091 ("Documentation: Add extra repo info for RHEL 8") a newline was missing to correctly generate the code block to add codeready-builder repository. This commit adds the missing newline to correctly generate the code block with the RHEL 8 codeready-builder instructions. Fixes: a82083ee3091 ("Documentation: Add extra repo info for RHEL 8") Acked-by: Greg Rose Signed-off-by: Timothy Redaelli Signed-off-by: Ilya Maximets --- Documentation/intro/install/fedora.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/intro/install/fedora.rst b/Documentation/intro/install/fedora.rst index e5324e1df..4a2f3507c 100644 --- a/Documentation/intro/install/fedora.rst +++ b/Documentation/intro/install/fedora.rst @@ -70,6 +70,7 @@ repositories to help yum-builddep, e.g.:: $ subscription-manager repos --enable=rhel-7-server-optional-rpms or for RHEL 8:: + $ subscription-manager repos \ --enable=codeready-builder-for-rhel-8-x86_64-rpms -- GitLab From c4bc03d872db5fe6f804fc9ddbbec29e28335cb5 Mon Sep 17 00:00:00 2001 From: William Tu Date: Wed, 4 Nov 2020 15:16:15 -0800 Subject: [PATCH 354/432] ovs-bugtool: Fix crash when enable --ovs. When enabling '--ovs' or when not using '-y', ovs-bugtool crashes due to Traceback (most recent call last): File "/usr/local/sbin/ovs-bugtool", line 1410, in sys.exit(main()) File "/usr/local/sbin/ovs-bugtool", line 690, in main for (k, v) in data.items(): RuntimeError: dictionary changed size during iteration The patch fixes it by making a copy of the key and value. VMware-BZ: #2663359 Fixes: 1ca0323e7c29 ("Require Python 3 and remove support for Python 2.") Acked-by: Greg Rose Signed-off-by: William Tu Signed-off-by: Ilya Maximets --- utilities/bugtool/ovs-bugtool.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in index ddb5bc8dc..fa62cbe94 100755 --- a/utilities/bugtool/ovs-bugtool.in +++ b/utilities/bugtool/ovs-bugtool.in @@ -686,8 +686,8 @@ exclude those logs from the archive. ovs_info_caps = [CAP_NETWORK_STATUS, CAP_SYSTEM_LOGS, CAP_OPENVSWITCH_LOGS, CAP_NETWORK_CONFIG] ovs_info_list = ['process-tree'] - # We cannot use iteritems, since we modify 'data' as we pass through - for (k, v) in data.items(): + # We cannot use items(), since we modify 'data' as we pass through + for (k, v) in list(data.items()): cap = v['cap'] if 'filename' in v: info = k[0] @@ -707,8 +707,8 @@ exclude those logs from the archive. pass # permit the user to filter out data - # We cannot use iteritems, since we modify 'data' as we pass through - for (k, v) in data.items(): + # We cannot use items(), since we modify 'data' as we pass through + for (k, v) in list(data.items()): cap = v['cap'] if 'filename' in v: key = k[0] -- GitLab From eca34ebd7c418c0351eb92ae615d07edc31a9404 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 21 Oct 2020 03:32:49 +0200 Subject: [PATCH 355/432] raft: Set threshold on backlog for raft connections. RAFT messages could be fairly big. If something abnormal happens to one of the servers in a cluster it may not be able to process all the incoming messages in a timely manner. This results in jsonrpc backlog growth on the sender's side. For example if follower gets many new clients at once that it needs to serve, or it decides to take a snapshot in a period of high number of database changes. If backlog grows large enough it becomes harder and harder for follower to process incoming raft messages, it sends outdated replies and starts receiving snapshots and the whole raft log from the leader. Sometimes backlog grows too high (60GB in this example): jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:, num of msgs: 15370, backlog: 61731060773. In this case OS might actually decide to kill the sender to free some memory. Anyway, It could take a lot of time for such a server to catch up with the rest of the cluster if it has so much data to receive and process. Introducing backlog thresholds for jsonrpc connections. If sending backlog will exceed particular values (500 messages or 4GB in size), connection will be dropped and re-created. This will allow to drop all the current backlog and start over increasing chances of cluster recovery. Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1888829 Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- NEWS | 2 ++ lib/jsonrpc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++- lib/jsonrpc.h | 6 ++++++ ovsdb/raft.c | 5 +++++ 4 files changed, 72 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 2860a8e9c..ebdf8758b 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,8 @@ Post-v2.14.0 * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'. If turned on, ovsdb-server will try to reclaim all the unused memory after every DB compaction back to OS. Disabled by default. + * Maximum backlog on RAFT connections limited to 500 messages or 4GB. + Once threshold reached, connection is dropped (and re-established). - DPDK: * Removed support for vhost-user dequeue zero-copy. - The environment variable OVS_UNBOUND_CONF, if set, is now used diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c index ecbc939fe..08aaff061 100644 --- a/lib/jsonrpc.c +++ b/lib/jsonrpc.c @@ -50,6 +50,10 @@ struct jsonrpc { struct ovs_list output; /* Contains "struct ofpbuf"s. */ size_t output_count; /* Number of elements in "output". */ size_t backlog; + + /* Limits. */ + size_t max_output; /* 'output_count' disconnection threshold. */ + size_t max_backlog; /* 'backlog' disconnection threshold. */ }; /* Rate limit for error messages. */ @@ -178,6 +182,17 @@ jsonrpc_get_backlog(const struct jsonrpc *rpc) return rpc->status ? 0 : rpc->backlog; } +/* Sets thresholds for send backlog. If send backlog contains more than + * 'max_n_msgs' messages or is larger than 'max_backlog_bytes' bytes, + * connection will be dropped. */ +void +jsonrpc_set_backlog_threshold(struct jsonrpc *rpc, + size_t max_n_msgs, size_t max_backlog_bytes) +{ + rpc->max_output = max_n_msgs; + rpc->max_backlog = max_backlog_bytes; +} + /* Returns the number of bytes that have been received on 'rpc''s underlying * stream. (The value wraps around if it exceeds UINT_MAX.) */ unsigned int @@ -261,9 +276,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg) rpc->backlog += length; if (rpc->output_count >= 50) { - VLOG_INFO_RL(&rl, "excessive sending backlog, jsonrpc: %s, num of" + static struct vlog_rate_limit bl_rl = VLOG_RATE_LIMIT_INIT(5, 5); + bool disconnect = false; + + VLOG_INFO_RL(&bl_rl, "excessive sending backlog, jsonrpc: %s, num of" " msgs: %"PRIuSIZE", backlog: %"PRIuSIZE".", rpc->name, rpc->output_count, rpc->backlog); + if (rpc->max_output && rpc->output_count > rpc->max_output) { + disconnect = true; + VLOG_WARN("sending backlog exceeded maximum number of messages (%" + PRIuSIZE" > %"PRIuSIZE"), disconnecting, jsonrpc: %s.", + rpc->output_count, rpc->max_output, rpc->name); + } else if (rpc->max_backlog && rpc->backlog > rpc->max_backlog) { + disconnect = true; + VLOG_WARN("sending backlog exceeded maximum size (%"PRIuSIZE" > %" + PRIuSIZE" bytes), disconnecting, jsonrpc: %s.", + rpc->backlog, rpc->max_backlog, rpc->name); + } + if (disconnect) { + jsonrpc_error(rpc, E2BIG); + } } if (rpc->backlog == length) { @@ -787,6 +819,10 @@ struct jsonrpc_session { int last_error; unsigned int seqno; uint8_t dscp; + + /* Limits for jsonrpc. */ + size_t max_n_msgs; + size_t max_backlog_bytes; }; static void @@ -842,6 +878,8 @@ jsonrpc_session_open_multiple(const struct svec *remotes, bool retry) s->dscp = 0; s->last_error = 0; + jsonrpc_session_set_backlog_threshold(s, 0, 0); + const char *name = reconnect_get_name(s->reconnect); if (!pstream_verify_name(name)) { reconnect_set_passive(s->reconnect, true, time_msec()); @@ -882,6 +920,7 @@ jsonrpc_session_open_unreliably(struct jsonrpc *jsonrpc, uint8_t dscp) s->pstream = NULL; s->seqno = 1; + jsonrpc_session_set_backlog_threshold(s, 0, 0); return s; } @@ -970,6 +1009,8 @@ jsonrpc_session_run(struct jsonrpc_session *s) } reconnect_connected(s->reconnect, time_msec()); s->rpc = jsonrpc_open(stream); + jsonrpc_set_backlog_threshold(s->rpc, s->max_n_msgs, + s->max_backlog_bytes); s->seqno++; } else if (error != EAGAIN) { reconnect_listen_error(s->reconnect, time_msec(), error); @@ -1010,6 +1051,8 @@ jsonrpc_session_run(struct jsonrpc_session *s) if (!error) { reconnect_connected(s->reconnect, time_msec()); s->rpc = jsonrpc_open(s->stream); + jsonrpc_set_backlog_threshold(s->rpc, s->max_n_msgs, + s->max_backlog_bytes); s->stream = NULL; s->seqno++; } else if (error != EAGAIN) { @@ -1250,3 +1293,18 @@ jsonrpc_session_set_dscp(struct jsonrpc_session *s, uint8_t dscp) jsonrpc_session_force_reconnect(s); } } + +/* Sets thresholds for send backlog. If send backlog contains more than + * 'max_n_msgs' messages or is larger than 'max_backlog_bytes' bytes, + * connection will be closed (then reconnected, if that feature is enabled). */ +void +jsonrpc_session_set_backlog_threshold(struct jsonrpc_session *s, + size_t max_n_msgs, + size_t max_backlog_bytes) +{ + s->max_n_msgs = max_n_msgs; + s->max_backlog_bytes = max_backlog_bytes; + if (s->rpc) { + jsonrpc_set_backlog_threshold(s->rpc, max_n_msgs, max_backlog_bytes); + } +} diff --git a/lib/jsonrpc.h b/lib/jsonrpc.h index a44114e8d..d75d66b86 100644 --- a/lib/jsonrpc.h +++ b/lib/jsonrpc.h @@ -51,6 +51,9 @@ void jsonrpc_wait(struct jsonrpc *); int jsonrpc_get_status(const struct jsonrpc *); size_t jsonrpc_get_backlog(const struct jsonrpc *); +void jsonrpc_set_backlog_threshold(struct jsonrpc *, size_t max_n_msgs, + size_t max_backlog_bytes); + unsigned int jsonrpc_get_received_bytes(const struct jsonrpc *); const char *jsonrpc_get_name(const struct jsonrpc *); @@ -140,6 +143,9 @@ void jsonrpc_session_set_probe_interval(struct jsonrpc_session *, int probe_interval); void jsonrpc_session_set_dscp(struct jsonrpc_session *, uint8_t dscp); +void jsonrpc_session_set_backlog_threshold(struct jsonrpc_session *, + size_t max_n_msgs, + size_t max_backlog_bytes); const char *jsonrpc_session_get_id(const struct jsonrpc_session *); #endif /* jsonrpc.h */ diff --git a/ovsdb/raft.c b/ovsdb/raft.c index f94a3eed8..67c714ff4 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -925,6 +925,9 @@ raft_reset_ping_timer(struct raft *raft) raft->ping_timeout = time_msec() + raft->election_timer / 3; } +#define RAFT_MAX_BACKLOG_N_MSGS 500 +#define RAFT_MAX_BACKLOG_BYTES UINT32_MAX + static void raft_add_conn(struct raft *raft, struct jsonrpc_session *js, const struct uuid *sid, bool incoming) @@ -940,6 +943,8 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, conn->incoming = incoming; conn->js_seqno = jsonrpc_session_get_seqno(conn->js); jsonrpc_session_set_probe_interval(js, 0); + jsonrpc_session_set_backlog_threshold(js, RAFT_MAX_BACKLOG_N_MSGS, + RAFT_MAX_BACKLOG_BYTES); } /* Starts the local server in an existing Raft cluster, using the local copy of -- GitLab From 80e3becdc1eea9b92253a391c0071e6218dda7d8 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Sun, 25 Oct 2020 02:45:05 +0200 Subject: [PATCH 356/432] raft: Make backlog thresholds configurable. New appctl 'cluster/set-backlog-threshold' to configure thresholds on backlog of raft jsonrpc connections. Could be used, for example, in some extreme conditions where size of a database expected to be very large, i.e. comparable with default 4GB threshold. Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- NEWS | 1 + ovsdb/ovsdb-server.1.in | 5 ++++ ovsdb/raft.c | 55 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index ebdf8758b..c0819bf93 100644 --- a/NEWS +++ b/NEWS @@ -8,6 +8,7 @@ Post-v2.14.0 after every DB compaction back to OS. Disabled by default. * Maximum backlog on RAFT connections limited to 500 messages or 4GB. Once threshold reached, connection is dropped (and re-established). + Use the 'cluster/set-backlog-threshold' command to change limits. - DPDK: * Removed support for vhost-user dequeue zero-copy. - The environment variable OVS_UNBOUND_CONF, if set, is now used diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in index 07a36cc7d..5a7f3ba13 100644 --- a/ovsdb/ovsdb-server.1.in +++ b/ovsdb/ovsdb-server.1.in @@ -381,6 +381,11 @@ This command must be executed on the leader. It initiates the change to the cluster. To see if the change takes effect (committed), use \fBcluster/status\fR to show the current setting. Once a change is committed, it persists at server restarts. +.IP "\fBcluster/set\-backlog\-threshold \fIdb\fR \fIn_msgs\fR \fIn_bytes\fR" +Sets the backlog limits for \fIdb\fR's RAFT connections to a maximum of +\fIn_msgs\fR messages or \fIn_bytes\fR bytes. If the backlog on one of the +connections reaches the limit, it will be disconnected (and re-established). +Values are checked only if the backlog contains more than 50 messages. . .so lib/vlog-unixctl.man .so lib/memory-unixctl.man diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 67c714ff4..760dfca6d 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -305,6 +305,12 @@ struct raft { bool ever_had_leader; /* There has been leader elected since the raft is initialized, meaning it is ever connected. */ + + /* Connection backlog limits. */ +#define DEFAULT_MAX_BACKLOG_N_MSGS 500 +#define DEFAULT_MAX_BACKLOG_N_BYTES UINT32_MAX + size_t conn_backlog_max_n_msgs; /* Number of messages. */ + size_t conn_backlog_max_n_bytes; /* Number of bytes. */ }; /* All Raft structures. */ @@ -412,6 +418,9 @@ raft_alloc(void) raft->election_timer = ELECTION_BASE_MSEC; + raft->conn_backlog_max_n_msgs = DEFAULT_MAX_BACKLOG_N_MSGS; + raft->conn_backlog_max_n_bytes = DEFAULT_MAX_BACKLOG_N_BYTES; + return raft; } @@ -925,9 +934,6 @@ raft_reset_ping_timer(struct raft *raft) raft->ping_timeout = time_msec() + raft->election_timer / 3; } -#define RAFT_MAX_BACKLOG_N_MSGS 500 -#define RAFT_MAX_BACKLOG_BYTES UINT32_MAX - static void raft_add_conn(struct raft *raft, struct jsonrpc_session *js, const struct uuid *sid, bool incoming) @@ -943,8 +949,8 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js, conn->incoming = incoming; conn->js_seqno = jsonrpc_session_get_seqno(conn->js); jsonrpc_session_set_probe_interval(js, 0); - jsonrpc_session_set_backlog_threshold(js, RAFT_MAX_BACKLOG_N_MSGS, - RAFT_MAX_BACKLOG_BYTES); + jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs, + raft->conn_backlog_max_n_bytes); } /* Starts the local server in an existing Raft cluster, using the local copy of @@ -4717,6 +4723,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn, unixctl_command_reply(conn, "change of election timer initiated."); } +static void +raft_unixctl_set_backlog_threshold(struct unixctl_conn *conn, + int argc OVS_UNUSED, const char *argv[], + void *aux OVS_UNUSED) +{ + const char *cluster_name = argv[1]; + unsigned long long n_msgs, n_bytes; + struct raft_conn *r_conn; + + struct raft *raft = raft_lookup_by_name(cluster_name); + if (!raft) { + unixctl_command_reply_error(conn, "unknown cluster"); + return; + } + + if (!str_to_ullong(argv[2], 10, &n_msgs) + || !str_to_ullong(argv[3], 10, &n_bytes)) { + unixctl_command_reply_error(conn, "invalid argument"); + return; + } + + if (n_msgs < 50 || n_msgs > SIZE_MAX || n_bytes > SIZE_MAX) { + unixctl_command_reply_error(conn, "values out of range"); + return; + } + + raft->conn_backlog_max_n_msgs = n_msgs; + raft->conn_backlog_max_n_bytes = n_bytes; + + LIST_FOR_EACH (r_conn, list_node, &raft->conns) { + jsonrpc_session_set_backlog_threshold(r_conn->js, n_msgs, n_bytes); + } + + unixctl_command_reply(conn, NULL); +} + static void raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED, int argc OVS_UNUSED, const char *argv[], @@ -4777,6 +4819,9 @@ raft_init(void) raft_unixctl_kick, NULL); unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2, raft_unixctl_change_election_timer, NULL); + unixctl_command_register("cluster/set-backlog-threshold", + "DB N_MSGS N_BYTES", 3, 3, + raft_unixctl_set_backlog_threshold, NULL); unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1, raft_unixctl_failure_test, NULL); ovsthread_once_done(&once); -- GitLab From 1090a949ac920b4e7ee901cee36008408a1c2386 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 23 Sep 2020 13:48:15 -0700 Subject: [PATCH 357/432] ovsdb: Remove read permission of *.db from others. Currently, when ovsdb *.db is created by ovsdb-tool it grants read permission to others. This may incur security concerns, for example, IPsec Pre-shared keys are stored in ovs-vsitchd.conf.db. This patch addresses the concerns by removing permission for others. Reported-by: Antonin Bas Acked-by: Mark Gray Signed-off-by: Yi-Hung Wei Signed-off-by: Ilya Maximets --- ovsdb/log.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ovsdb/log.c b/ovsdb/log.c index 41af77679..4a28fa3db 100644 --- a/ovsdb/log.c +++ b/ovsdb/log.c @@ -212,7 +212,7 @@ ovsdb_log_open(const char *name, const char *magic, if (!strcmp(name, "/dev/stdin") && open_mode == OVSDB_LOG_READ_ONLY) { fd = dup(STDIN_FILENO); } else { - fd = open(name, flags, 0666); + fd = open(name, flags, 0660); } if (fd < 0) { const char *op = (open_mode == OVSDB_LOG_CREATE_EXCL ? "create" -- GitLab From 588821eacf03e85263dc1f3b9755746a859c68d9 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Mon, 12 Oct 2020 14:27:35 +0000 Subject: [PATCH 358/432] netdev-offload-dpdk: Preserve HW statistics for modified flows. In case of a flow modification, preserve the HW statistics of the old HW flow to the new one. Fixes: 3c7330ebf036 ("netdev-offload-dpdk: Support offload of output action.") Signed-off-by: Eli Britstein Reviewed-by: Gaetan Rivet Acked-by: Sriharsha Basavapatna Tested-by: Emma Finn Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 4d19f93cd..17b08ca43 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -78,7 +78,7 @@ ufid_to_rte_flow_data_find(const ovs_u128 *ufid) return NULL; } -static inline void +static inline struct ufid_to_rte_flow_data * ufid_to_rte_flow_associate(const ovs_u128 *ufid, struct rte_flow *rte_flow, bool actions_offloaded) { @@ -103,6 +103,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid, cmap_insert(&ufid_to_rte_flow, CONST_CAST(struct cmap_node *, &data->node), hash); + return data; } static inline void @@ -1424,7 +1425,7 @@ out: return flow; } -static int +static struct ufid_to_rte_flow_data * netdev_offload_dpdk_add_flow(struct netdev *netdev, struct match *match, struct nlattr *nl_actions, @@ -1433,12 +1434,11 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, struct offload_info *info) { struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; + struct ufid_to_rte_flow_data *flows_data = NULL; bool actions_offloaded = true; struct rte_flow *flow; - int ret = 0; - ret = parse_flow_match(&patterns, match); - if (ret) { + if (parse_flow_match(&patterns, match)) { VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported", netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid)); goto out; @@ -1456,16 +1456,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev, } if (!flow) { - ret = -1; goto out; } - ufid_to_rte_flow_associate(ufid, flow, actions_offloaded); + flows_data = ufid_to_rte_flow_associate(ufid, flow, actions_offloaded); VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT, netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); out: free_flow_patterns(&patterns); - return ret; + return flows_data; } static int @@ -1499,14 +1498,19 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, struct dpif_flow_stats *stats) { struct ufid_to_rte_flow_data *rte_flow_data; + struct dpif_flow_stats old_stats; + bool modification = false; int ret; /* * If an old rte_flow exists, it means it's a flow modification. * Here destroy the old rte flow first before adding a new one. + * Keep the stats for the newly created rule. */ rte_flow_data = ufid_to_rte_flow_data_find(ufid); if (rte_flow_data && rte_flow_data->rte_flow) { + old_stats = rte_flow_data->stats; + modification = true; ret = netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow_data->rte_flow); if (ret < 0) { @@ -1514,11 +1518,18 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match, } } + rte_flow_data = netdev_offload_dpdk_add_flow(netdev, match, actions, + actions_len, ufid, info); + if (!rte_flow_data) { + return -1; + } + if (modification) { + rte_flow_data->stats = old_stats; + } if (stats) { - memset(stats, 0, sizeof *stats); + *stats = rte_flow_data->stats; } - return netdev_offload_dpdk_add_flow(netdev, match, actions, - actions_len, ufid, info); + return 0; } static int -- GitLab From 2fe34c03078f1fe01a39b4d963a9a367ba468bad Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Thu, 15 Oct 2020 11:33:59 +0800 Subject: [PATCH 359/432] dpctl: Add the option 'pmd' for dump-flows. "ovs-appctl dpctl/dump-flows" added the option "pmd" which allow user to dump pmd specified. That option is useful to dump rules of pmd when we have a large number of rules in dp. Signed-off-by: Tonghao Zhang Acked-by: Gaetan Rivet Signed-off-by: Ilya Maximets --- NEWS | 3 +++ lib/dpctl.c | 20 ++++++++++++++++---- lib/dpctl.man | 6 +++++- tests/pmd.at | 9 +++++++++ 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index c0819bf93..a542c68ca 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,9 @@ Post-v2.14.0 Use the 'cluster/set-backlog-threshold' command to change limits. - DPDK: * Removed support for vhost-user dequeue zero-copy. + - Userspace datapath: + * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which + restricts a flow dump to a single PMD thread if set. - The environment variable OVS_UNBOUND_CONF, if set, is now used as the DNS resolver's (unbound) configuration file. - Linux datapath: diff --git a/lib/dpctl.c b/lib/dpctl.c index 2f859a753..33202813b 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -980,6 +980,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) struct dpif_flow_dump *flow_dump; struct dpif_flow f; int pmd_id = PMD_ID_NULL; + bool pmd_id_filter = false; int lastargc = 0; int error; @@ -996,6 +997,16 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) goto out_free; } types_list = xstrdup(argv[--argc] + 5); + } else if (!strncmp(argv[argc - 1], "pmd=", 4)) { + if (!ovs_scan(argv[--argc], "pmd=%d", &pmd_id)) { + error = EINVAL; + goto out_free; + } + + if (pmd_id == -1) { + pmd_id = NON_PMD_CORE_ID; + } + pmd_id_filter = true; } } @@ -1070,7 +1081,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) /* If 'pmd_id' is specified, overlapping flows could be dumped from * different pmd threads. So, separates dumps from different pmds * by printing a title line. */ - if (pmd_id != f.pmd_id) { + if (!pmd_id_filter && pmd_id != f.pmd_id) { if (f.pmd_id == NON_PMD_CORE_ID) { ds_put_format(&ds, "flow-dump from the main thread:\n"); } else { @@ -1079,7 +1090,8 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p) } pmd_id = f.pmd_id; } - if (flow_passes_type_filter(&f, &dump_types)) { + if (pmd_id == f.pmd_id && + flow_passes_type_filter(&f, &dump_types)) { format_dpif_flow(&ds, &f, portno_names, dpctl_p); dpctl_print(dpctl_p, "%s\n", ds_cstr(&ds)); } @@ -2522,8 +2534,8 @@ static const struct dpctl_command all_commands[] = { { "set-if", "dp iface...", 2, INT_MAX, dpctl_set_if, DP_RW }, { "dump-dps", "", 0, 0, dpctl_dump_dps, DP_RO }, { "show", "[dp...]", 0, INT_MAX, dpctl_show, DP_RO }, - { "dump-flows", "[dp] [filter=..] [type=..]", - 0, 3, dpctl_dump_flows, DP_RO }, + { "dump-flows", "[dp] [filter=..] [type=..] [pmd=..]", + 0, 4, dpctl_dump_flows, DP_RO }, { "add-flow", "[dp] flow actions", 2, 3, dpctl_add_flow, DP_RW }, { "mod-flow", "[dp] flow actions", 2, 3, dpctl_mod_flow, DP_RW }, { "get-flow", "[dp] ufid", 1, 2, dpctl_get_flow, DP_RO }, diff --git a/lib/dpctl.man b/lib/dpctl.man index 727d1f7be..0f6327786 100644 --- a/lib/dpctl.man +++ b/lib/dpctl.man @@ -104,7 +104,7 @@ default. When multiple datapaths exist, then a datapath name is required. . .TP -.DO "[\fB\-m \fR| \fB\-\-more\fR] [\fB\-\-names \fR| \fB\-\-no\-names\fR]" \*(DX\fBdump\-flows\fR "[\fIdp\fR] [\fBfilter=\fIfilter\fR] [\fBtype=\fItype\fR]" +.DO "[\fB\-m \fR| \fB\-\-more\fR] [\fB\-\-names \fR| \fB\-\-no\-names\fR]" \*(DX\fBdump\-flows\fR "[\fIdp\fR] [\fBfilter=\fIfilter\fR] [\fBtype=\fItype\fR] [\fBpmd=\fIpmd\fR]" Prints to the console all flow entries in datapath \fIdp\fR's flow table. Without \fB\-m\fR or \fB\-\-more\fR, output omits match fields that a flow wildcards entirely; with \fB\-m\fR or \fB\-\-more\fR, @@ -118,6 +118,10 @@ The \fIfilter\fR is also useful to match wildcarded fields in the datapath flow. As an example, \fBfilter='tcp,tp_src=100'\fR will match the datapath flow containing '\fBtcp(src=80/0xff00,dst=8080/0xff)\fR'. .IP +If \fBpmd=\fIpmd\fR is specified, only displays flows of the specified pmd. +Using \fBpmd=\fI-1\fR will restrict the dump to flows from the main thread. +This option is only supported by the \fBuserspace datapath\fR. +.IP If \fBtype=\fItype\fR is specified, only displays flows of the specified types. This option supported only for \fBovs\-appctl dpctl/dump\-flows\fR. \fItype\fR is a comma separated list, which can contain any of the following: diff --git a/tests/pmd.at b/tests/pmd.at index 5b612f88f..cc5371d5a 100644 --- a/tests/pmd.at +++ b/tests/pmd.at @@ -707,6 +707,15 @@ recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_typ recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234), packets:0, bytes:0, used:never, actions:2 ]) +dnl Check pmd filtering option. +AT_CHECK([ovs-appctl dpctl/dump-flows dummy@dp0 pmd=0], [0], [dnl +recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234), packets:0, bytes:0, used:never, actions:2 +]) + +AT_CHECK([ovs-appctl dpctl/dump-flows dummy@dp0 pmd=-1], [0], [dnl +recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234), packets:0, bytes:0, used:never, actions:2 +]) + AT_CHECK([ovs-appctl dpctl/del-flow dummy@dp0 'in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234)'], [0], [dnl ]) -- GitLab From ed8cf18733fd1f4865d8f16fa06180bf5a772ebe Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 28 Sep 2020 04:34:44 +0200 Subject: [PATCH 360/432] releases: Mark 2.13 as a new LTS release. 2.5 release is 4.5 years old and I'm not aware of anyone who actually uses it today. Release process documentation says that there is no strict time period for nominating a new LTS release and that usually it happens once in a two years. So, proposing to nominate 2.13 as our new LTS release since it's a first release that doesn't include OVN inside, so we will formally not have to support it in this repository in case there are major issues that might be hard to fix. Suggested-by: Ben Pfaff Acked-by: Flavio Leitner Acked-by: Ian Stokes Acked-by: Kevin Traynor Reviewed-by: Simon Horman Signed-off-by: Ilya Maximets --- Documentation/faq/releases.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index dcba97e16..3623e3f40 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -32,7 +32,7 @@ Q: What does it mean for an Open vSwitch release to be LTS (long-term support)? If a significant bug is identified in an LTS release, we will provide an updated release that includes the fix. Releases that are not LTS may not be fixed and may just be supplanted by the next major release. The current - LTS release is 2.5.x. + LTS release is 2.13.x. For more information on the Open vSwitch release process, refer to :doc:`/internals/release-process`. -- GitLab From 15177c4dadcc0470cf668cfbd9691d728f5f0721 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 28 Sep 2020 04:34:45 +0200 Subject: [PATCH 361/432] release-process: Add transition period for LTS releases. While LTS change happens, according to release-process.rst, we're immediately dropping support for the old LTS and, according to backporting-patches.rst could stop backporting bug fixes to branches older than new LTS. While this might be OK for an upstream project (some upstream projects like QEMU doesn't support anything at all except the last release) it doesn't sound like a user-friendly policy. Below addition to the release process might make the process a bit smoother in terms that we will continue support of branches a little bit longer even after changing current LTS, i.e. providing at least a minimal transition period (1 release frame) for users of old LTS. Effectively, this change means that we will support branch-2.5 until 2.15 release, i.e. we will provide the last release, if any, on branch-2.5 somewhere around Feb 2021. (I don't actually expect many fixes there) Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner Acked-by: Kevin Traynor --- Documentation/internals/release-process.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst index 63080caab..6352af0dc 100644 --- a/Documentation/internals/release-process.rst +++ b/Documentation/internals/release-process.rst @@ -75,10 +75,13 @@ Scheduling`_ for the timing of each stage: and so on. The process is the same for these additional release as for a .0 release. -At most two release branches are formally maintained at any given time: the -latest release and the latest release designed as LTS. An LTS release is one -that the OVS project has designated as being maintained for a longer period of -time. Currently, an LTS release is maintained until the next LTS is chosen. +At most three release branches are formally maintained at any given time: the +latest release, the latest release designed as LTS and a previous LTS release +during the transition period. An LTS release is one that the OVS project has +designated as being maintained for a longer period of time. +Currently, an LTS release is maintained until the next major release after the +new LTS is chosen. This one release time frame is a transition period which is +intended for users to upgrade from old LTS to new one. There is not currently a strict guideline on how often a new LTS release is chosen, but so far it has been about every 2 years. That could change based on the current state of OVS development. For example, we do not want to designate -- GitLab From 8c6944f6913c0c819e495bee8a5e74c218dc72b2 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 28 Sep 2020 04:34:46 +0200 Subject: [PATCH 362/432] release-process: Standardize designation of new LTS releases. Standardize that we will mark a new release as LTS every two years to avoid situation where we have a really old LTS branch that no-one actually uses, but we have to support and provide releases for it. This will also make release process more predictable, so users will be able to rely on it and plan their upgrades accordingly. As a bonus, 2 years support cycle kind of aligns with 2 years support cycle of DPDK LTS releases. Still keeping a window for us to discuss and avoid marking some particular release as LTS in case of significant issues with it. Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner Acked-by: Kevin Traynor --- Documentation/internals/release-process.rst | 32 +++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst index 6352af0dc..8a655b33b 100644 --- a/Documentation/internals/release-process.rst +++ b/Documentation/internals/release-process.rst @@ -82,12 +82,32 @@ designated as being maintained for a longer period of time. Currently, an LTS release is maintained until the next major release after the new LTS is chosen. This one release time frame is a transition period which is intended for users to upgrade from old LTS to new one. -There is not currently a strict guideline on how often a new LTS release is -chosen, but so far it has been about every 2 years. That could change based on -the current state of OVS development. For example, we do not want to designate -a new release as LTS that includes disruptive internal changes, as that may -make it harder to support for a longer period of time. Discussion about -choosing the next LTS release occurs on the OVS development mailing list. + +New LTS release is chosen every 2 years. The process is that current latest +stable release becomes an LTS release at the same time the next major release +is out. That could change based on the current state of OVS development. For +example, we do not want to designate a new release as LTS that includes +disruptive internal changes, as that may make it harder to support for a longer +period of time. Discussion about skipping designation of the next LTS release +occurs on the OVS development mailing list. + +LTS designation schedule example (depends on current state of development): + ++---------+--------------+--------------------------------------------------+ +| Version | Release Date | Actions | ++---------+--------------+--------------------------------------------------+ +| 2.14 | Aug 2020 | 2.14 - new latest stable, 2.13 stable ⟶ new LTS | ++---------+--------------+--------------------------------------------------+ +| 2.15 | Feb 2021 | 2.12 - new latest stable, 2.5 LTS ⟶ EOL | ++---------+--------------+--------------------------------------------------+ +| 2.16 | Aug 2021 | 2.16 - new latest stable | ++---------+--------------+--------------------------------------------------+ +| 2.17 | Feb 2022 | 2.17 - new latest stable | ++---------+--------------+--------------------------------------------------+ +| 2.18 | Aug 2022 | 2.18 - new latest stable, 2.17 stable ⟶ new LTS | ++---------+--------------+--------------------------------------------------+ +| 2.19 | Feb 2023 | 2.19 - new latest stable, 2.13 LTS ⟶ EOL | ++---------+--------------+--------------------------------------------------+ Release Numbering ----------------- -- GitLab From 193995f81c07347847190b03bd9da23948d497a6 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 28 Sep 2020 04:34:47 +0200 Subject: [PATCH 363/432] release-process: Policy for unmaintained branches. While only 2 branches are formally maintained (LTS and latest release), OVS team usually provides stable releases for other branches too, at least for branches between LTS and latest. When transition period ends for an old LTS, we, according to backporting-patches.rst, could stop backporting bug fixes to branches older than new LTS. While this might be OK for an upstream project it doesn't sound like a user-friendly policy just because it means that we're dropping support for branches released less than a year ago. Below addition to the release process might make the process a bit smoother in terms that we will not drop support for not so old branches even after the transition period, if committers will follow the "as far as it goes" backporting policy. And we will provide stable releases for these branches for at least 2 years (these releases could be less frequent than releases on LTS branches). After 2 year period (4 releases) committers are still free to backport fixes they think are needed on older branches, however we will likely not provide actual releases on these branches, unless it's specially requested and discussed. Additionally, "4 releases" policy aligns with the DPDK LTS support policy, i.e. we will be able to validate and release last OVS releases with the last available DPDK LTS, e.g. OVS 2.11 last stable release will likely be released with the 18.11 EOL release validated. Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner Acked-by: Kevin Traynor --- .../internals/contributing/backporting-patches.rst | 3 ++- Documentation/internals/release-process.rst | 9 +++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Documentation/internals/contributing/backporting-patches.rst b/Documentation/internals/contributing/backporting-patches.rst index e8f4f271c..162e9d209 100644 --- a/Documentation/internals/contributing/backporting-patches.rst +++ b/Documentation/internals/contributing/backporting-patches.rst @@ -69,7 +69,8 @@ targeted to the `master` branch, using the ``Fixes`` tag described in :doc:`submitting-patches`. The maintainer first applies the patch to `master`, then backports the patch to each older affected tree, as far back as it goes or at least to all currently supported branches. This is usually each branch back -to the most recent LTS release branch. +to the oldest maintained LTS release branch or the last 4 release branches if +the oldest LTS is newer. If the fix only affects a particular branch and not `master`, contributors should submit the change with the target branch listed in the subject line of diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst index 8a655b33b..fb39ccb5d 100644 --- a/Documentation/internals/release-process.rst +++ b/Documentation/internals/release-process.rst @@ -109,6 +109,15 @@ LTS designation schedule example (depends on current state of development): | 2.19 | Feb 2023 | 2.19 - new latest stable, 2.13 LTS ⟶ EOL | +---------+--------------+--------------------------------------------------+ +While branches other than LTS and the latest release are not formally +maintained, the OVS project usually provides stable releases for these branches +for at least 2 years, i.e. stable releases are provided for the last 4 +release branches. However, these branches may not include all the fixes that +LTS has in case backporting is not straightforward and developers are not +willing to spend their time on that (this mostly affects branches that are +older than the LTS, because backporting to LTS implies backporting to all +intermediate branches). + Release Numbering ----------------- -- GitLab From ecadc3a30b4b9e1682017456444dc1c3488f9998 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 11 Nov 2020 12:07:55 +0200 Subject: [PATCH 364/432] netdev-offload-tc: Use single 'once' variable for probing tc features There is no need for a 'once' variable per probe. Signed-off-by: Roi Dayan Reviewed-by: Paul Blakey Signed-off-by: Simon Horman --- lib/netdev-offload-tc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index e828a8683..2a772a971 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1988,8 +1988,7 @@ probe_tc_block_support(int ifindex) static int netdev_tc_init_flow_api(struct netdev *netdev) { - static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER; - static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; + static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev); uint32_t block_id = 0; struct tcf_id id; @@ -2014,16 +2013,13 @@ netdev_tc_init_flow_api(struct netdev *netdev) /* make sure there is no ingress/egress qdisc */ tc_add_del_qdisc(ifindex, false, 0, hook); - if (ovsthread_once_start(&block_once)) { + if (ovsthread_once_start(&once)) { probe_tc_block_support(ifindex); /* Need to re-fetch block id as it depends on feature availability. */ block_id = get_block_id_from_netdev(netdev); - ovsthread_once_done(&block_once); - } - if (ovsthread_once_start(&multi_mask_once)) { probe_multi_mask_per_prio(ifindex); - ovsthread_once_done(&multi_mask_once); + ovsthread_once_done(&once); } error = tc_add_del_qdisc(ifindex, true, block_id, hook); -- GitLab From 568781d48cdcf3dab94b28e958ec2cc8db580192 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 11 Nov 2020 12:04:56 +0200 Subject: [PATCH 365/432] AUTHORS: Update Roi Dayan Signed-off-by: Roi Dayan Signed-off-by: Simon Horman --- .mailmap | 1 + AUTHORS.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 85373d113..dc3b2094d 100644 --- a/.mailmap +++ b/.mailmap @@ -67,6 +67,7 @@ Ralf Spenneberg Rami Rosen Ramu Ramamurthy Robert Åkerblom-Andersson +Roi Dayan Romain Lenglet Romain Lenglet Russell Bryant diff --git a/AUTHORS.rst b/AUTHORS.rst index 9e9d210a2..10f0f272e 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -331,7 +331,7 @@ Robert Åkerblom-Andersson Robert.nr1@gmail.com Robert Wojciechowicz robertx.wojciechowicz@intel.com Rob Hoes rob.hoes@citrix.com Rohith Basavaraja rohith.basavaraja@gmail.com -Roi Dayan roid@mellanox.com +Roi Dayan roid@nvidia.com Róbert Mulik robert.mulik@ericsson.com Romain Lenglet romain.lenglet@berabera.info Roni Bar Yanai roniba@mellanox.com -- GitLab From 78f05eb787ac2ecf7ffa16e1b7214ab8b4dd5a32 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Wed, 11 Nov 2020 04:25:29 -0500 Subject: [PATCH 366/432] Documentation: update IPsec tutorial for F32 F32 requires the "python3-openvswitch" package now. Also, the iptables chain "IN_FedoraServer_allow" does not exist on Fedora 32. Signed-off-by: Mark Gray Acked-by: Eric Garver Acked-by: Ian Stokes Signed-off-by: Ian Stokes --- Documentation/tutorials/ipsec.rst | 108 +++++++++++++++--------------- 1 file changed, 55 insertions(+), 53 deletions(-) diff --git a/Documentation/tutorials/ipsec.rst b/Documentation/tutorials/ipsec.rst index b4c323513..ebc0ae429 100644 --- a/Documentation/tutorials/ipsec.rst +++ b/Documentation/tutorials/ipsec.rst @@ -42,7 +42,7 @@ Installing OVS and IPsec Packages --------------------------------- OVS IPsec has .deb and .rpm packages. You should use the right package -based on your Linux distribution. This tutorial uses Ubuntu 16.04 and Fedora 27 +based on your Linux distribution. This tutorial uses Ubuntu 16.04 and Fedora 32 as examples. Ubuntu @@ -59,8 +59,8 @@ Ubuntu 2. Install the related packages:: - $ apt-get install dkms strongswan - $ dpkg -i libopenvswitch_*.deb openvswitch-common_*.deb \ + # apt-get install dkms strongswan + # dpkg -i libopenvswitch_*.deb openvswitch-common_*.deb \ openvswitch-switch_*.deb openvswitch-datapath-dkms_*.deb \ python-openvswitch_*.deb openvswitch-pki_*.deb \ openvswitch-ipsec_*.deb @@ -71,23 +71,25 @@ Ubuntu Fedora ~~~~~~ -1. Follow :doc:`/intro/install/fedora` to build RPM packages. +1. Install the related packages. Fedora 32 does not require installation of + the out-of-tree kernel module:: -2. Install the related packages:: + # dnf install python3-openvswitch libreswan \ + openvswitch openvswitch-ipsec + +2. Install firewall rules to allow ESP and IKE traffic:: - $ dnf install python2-openvswitch libreswan \ - "kernel-devel-uname-r == $(uname -r)" - $ rpm -i openvswitch-*.rpm openvswitch-kmod-*.rpm \ - openvswitch-openvswitch-ipsec-*.rpm + # systemctl start firewalld + # firewall-cmd --add-service ipsec -3. Install firewall rules to allow ESP and IKE traffic:: + Or to make permanent:: - $ iptables -A IN_FedoraServer_allow -p esp -j ACCEPT - $ iptables -A IN_FedoraServer_allow -p udp --dport 500 -j ACCEPT + # systemctl enable firewalld + # firewall-cmd --permanent --add-service ipsec -4. Run the openvswitch-ipsec service:: +3. Run the openvswitch-ipsec service:: - $ systemctl start openvswitch-ipsec.service + # systemctl start openvswitch-ipsec.service .. note:: @@ -97,47 +99,47 @@ Fedora Configuring IPsec tunnel ------------------------ -Suppose you want to build IPsec tunnel between two hosts. Assume `host_1`'s +Suppose you want to build an IPsec tunnel between two hosts. Assume `host_1`'s external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure `host_1` and `host_2` can ping each other via these external IPs. 0. Set up some variables to make life easier. On both hosts, set ``ip_1`` and ``ip_2`` variables, e.g.:: - $ ip_1=1.1.1.1 - $ ip_2=2.2.2.2 + # ip_1=1.1.1.1 + # ip_2=2.2.2.2 1. Set up OVS bridges in both hosts. In `host_1`:: - $ ovs-vsctl add-br br-ipsec - $ ip addr add 192.0.0.1/24 dev br-ipsec - $ ip link set br-ipsec up + # ovs-vsctl add-br br-ipsec + # ip addr add 192.0.0.1/24 dev br-ipsec + # ip link set br-ipsec up In `host_2`:: - $ ovs-vsctl add-br br-ipsec - $ ip addr add 192.0.0.2/24 dev br-ipsec - $ ip link set br-ipsec up + # ovs-vsctl add-br br-ipsec + # ip addr add 192.0.0.2/24 dev br-ipsec + # ip link set br-ipsec up 2. Set up IPsec tunnel. - There are three authentication methods. You can choose one to set up your - IPsec tunnel. + There are three authentication methods. Choose one method to set up your + IPsec tunnel and follow the steps below. a) Using pre-shared key: In `host_1`:: - $ ovs-vsctl add-port br-ipsec tun -- \ + # ovs-vsctl add-port br-ipsec tun -- \ set interface tun type=gre \ options:remote_ip=$ip_2 \ options:psk=swordfish In `host_2`:: - $ ovs-vsctl add-port br-ipsec tun -- \ + # ovs-vsctl add-port br-ipsec tun -- \ set interface tun type=gre \ options:remote_ip=$ip_1 \ options:psk=swordfish @@ -156,15 +158,15 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure In `host_1`:: - $ ovs-pki req -u host_1 - $ ovs-pki self-sign host_1 - $ scp host_1-cert.pem $ip_2:/etc/keys/host_1-cert.pem + # ovs-pki req -u host_1 + # ovs-pki self-sign host_1 + # scp host_1-cert.pem $ip_2:/etc/keys/host_1-cert.pem In `host_2`:: - $ ovs-pki req -u host_2 - $ ovs-pki self-sign host_2 - $ scp host_2-cert.pem $ip_1:/etc/keys/host_2-cert.pem + # ovs-pki req -u host_2 + # ovs-pki self-sign host_2 + # scp host_2-cert.pem $ip_1:/etc/keys/host_2-cert.pem .. note:: @@ -176,20 +178,20 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure In `host_1`:: - $ ovs-vsctl set Open_vSwitch . \ + # ovs-vsctl set Open_vSwitch . \ other_config:certificate=/etc/keys/host_1-cert.pem \ other_config:private_key=/etc/keys/host_1-privkey.pem - $ ovs-vsctl add-port br-ipsec tun -- \ + # ovs-vsctl add-port br-ipsec tun -- \ set interface tun type=gre \ options:remote_ip=$ip_2 \ options:remote_cert=/etc/keys/host_2-cert.pem In `host_2`:: - $ ovs-vsctl set Open_vSwitch . \ + # ovs-vsctl set Open_vSwitch . \ other_config:certificate=/etc/keys/host_2-cert.pem \ other_config:private_key=/etc/keys/host_2-privkey.pem - $ ovs-vsctl add-port br-ipsec tun -- \ + # ovs-vsctl add-port br-ipsec tun -- \ set interface tun type=gre \ options:remote_ip=$ip_1 \ options:remote_cert=/etc/keys/host_1-cert.pem @@ -207,29 +209,29 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure In `host_1`:: - $ ovs-pki init + # ovs-pki init Generate certificate requests and copy the certificate request of `host_2` to `host_1`. In `host_1`:: - $ ovs-pki req -u host_1 + # ovs-pki req -u host_1 In `host_2`:: - $ ovs-pki req -u host_2 - $ scp host_2-req.pem $ip_1:/etc/keys/host_2-req.pem + # ovs-pki req -u host_2 + # scp host_2-req.pem $ip_1:/etc/keys/host_2-req.pem Sign the certificate requests with the CA key. Copy `host_2`'s signed certificate and the CA certificate to `host_2`. In `host_1`:: - $ ovs-pki sign host_1 switch - $ ovs-pki sign host_2 switch - $ scp host_2-cert.pem $ip_2:/etc/keys/host_2-cert.pem - $ scp /var/lib/openvswitch/pki/switchca/cacert.pem \ + # ovs-pki sign host_1 switch + # ovs-pki sign host_2 switch + # scp host_2-cert.pem $ip_2:/etc/keys/host_2-cert.pem + # scp /var/lib/openvswitch/pki/switchca/cacert.pem \ $ip_2:/etc/keys/cacert.pem .. note:: @@ -243,22 +245,22 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure In `host_1`:: - $ ovs-vsctl set Open_vSwitch . \ + # ovs-vsctl set Open_vSwitch . \ other_config:certificate=/etc/keys/host_1-cert.pem \ other_config:private_key=/etc/keys/host_1-privkey.pem \ other_config:ca_cert=/etc/keys/cacert.pem - $ ovs-vsctl add-port br-ipsec tun -- \ + # ovs-vsctl add-port br-ipsec tun -- \ set interface tun type=gre \ options:remote_ip=$ip_2 \ options:remote_name=host_2 In `host_2`:: - $ ovs-vsctl set Open_vSwitch . \ + # ovs-vsctl set Open_vSwitch . \ other_config:certificate=/etc/keys/host_2-cert.pem \ other_config:private_key=/etc/keys/host_2-privkey.pem \ other_config:ca_cert=/etc/keys/cacert.pem - $ ovs-vsctl add-port br-ipsec tun -- \ + # ovs-vsctl add-port br-ipsec tun -- \ set interface tun type=gre \ options:remote_ip=$ip_1 \ options:remote_name=host_1 @@ -276,8 +278,8 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure Now you should have an IPsec GRE tunnel running between two hosts. To verify it, in `host_1`:: - $ ping 192.0.0.2 & - $ tcpdump -ni any net $ip_2 + # ping 192.0.0.2 & + # tcpdump -ni any net $ip_2 You should be able to see that ESP packets are being sent from `host_1` to `host_2`. @@ -289,7 +291,7 @@ The ``ovs-monitor-ipsec`` daemon manages and monitors the IPsec tunnel state. Use the following ``ovs-appctl`` command to view ``ovs-monitor-ipsec`` internal representation of tunnel configuration:: - $ ovs-appctl -t ovs-monitor-ipsec tunnels/show + # ovs-appctl -t ovs-monitor-ipsec tunnels/show If there is misconfiguration, then ``ovs-appctl`` should indicate why. For example:: @@ -324,7 +326,7 @@ For example:: If you don't see any active connections, try to run the following command to refresh the ``ovs-monitor-ipsec`` daemon:: - $ ovs-appctl -t ovs-monitor-ipsec refresh + # ovs-appctl -t ovs-monitor-ipsec refresh You can also check the logs of the ``ovs-monitor-ipsec`` daemon and the IKE daemon to locate issues. ``ovs-monitor-ipsec`` outputs log messages to -- GitLab From 943c4a325045cd3982100cf3367f2e6375be3a71 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Wed, 11 Nov 2020 04:25:30 -0500 Subject: [PATCH 367/432] python: set ovs.dirs variables with build system values ovs/dirs.py should be auto-generated using the template ovs/dirs.py.template at build time. This will set the ovs.dirs python variables with a value specified by the environment or, if the environment variable is not set, from the build system. Signed-off-by: Mark Gray Acked-By: Timothy Redaelli Signed-off-by: Ian Stokes --- lib/automake.mk | 2 +- python/automake.mk | 13 +++++++------ python/ovs/.gitignore | 1 + python/ovs/dirs.py | 31 ------------------------------- 4 files changed, 9 insertions(+), 38 deletions(-) delete mode 100644 python/ovs/dirs.py diff --git a/lib/automake.mk b/lib/automake.mk index 380a67228..8eeb6c3f6 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -575,7 +575,7 @@ MAN_FRAGMENTS += \ OVSIDL_BUILT += lib/vswitch-idl.c lib/vswitch-idl.h lib/vswitch-idl.ovsidl EXTRA_DIST += lib/vswitch-idl.ann -lib/vswitch-idl.ovsidl: vswitchd/vswitch.ovsschema lib/vswitch-idl.ann +lib/vswitch-idl.ovsidl: vswitchd/vswitch.ovsschema lib/vswitch-idl.ann python/ovs/dirs.py $(AM_V_GEN)$(OVSDB_IDLC) annotate $(srcdir)/vswitchd/vswitch.ovsschema $(srcdir)/lib/vswitch-idl.ann > $@.tmp && mv $@.tmp $@ lib/dirs.c: lib/dirs.c.in Makefile diff --git a/python/automake.mk b/python/automake.mk index 2f08c7701..c4382ec60 100644 --- a/python/automake.mk +++ b/python/automake.mk @@ -107,12 +107,13 @@ ALL_LOCAL += $(srcdir)/python/ovs/dirs.py $(srcdir)/python/ovs/dirs.py: python/ovs/dirs.py.template $(AM_V_GEN)sed \ -e '/^##/d' \ - -e 's,[@]pkgdatadir[@],/usr/local/share/openvswitch,g' \ - -e 's,[@]RUNDIR[@],/var/run,g' \ - -e 's,[@]LOGDIR[@],/usr/local/var/log,g' \ - -e 's,[@]bindir[@],/usr/local/bin,g' \ - -e 's,[@]sysconfdir[@],/usr/local/etc,g' \ - -e 's,[@]DBDIR[@],/usr/local/etc/openvswitch,g' \ + -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ + -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ + -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ + -e 's,[@]bindir[@],$(bindir),g' \ + -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ + -e 's,[@]DBDIR[@],$(sysconfdir)/openvswitch,g' \ < $? > $@.tmp && \ mv $@.tmp $@ EXTRA_DIST += python/ovs/dirs.py.template +CLEANFILES += python/ovs/dirs.py diff --git a/python/ovs/.gitignore b/python/ovs/.gitignore index 985278646..51030beca 100644 --- a/python/ovs/.gitignore +++ b/python/ovs/.gitignore @@ -1 +1,2 @@ version.py +dir.py diff --git a/python/ovs/dirs.py b/python/ovs/dirs.py deleted file mode 100644 index c67aecbb4..000000000 --- a/python/ovs/dirs.py +++ /dev/null @@ -1,31 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at: -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# The @variables@ in this file are replaced by default directories for -# use in python/ovs/dirs.py in the source directory and replaced by the -# configured directories for use in the installed python/ovs/dirs.py. -# -import os - -# Note that the use of """ is to aid in dealing with paths with quotes in them. -PKGDATADIR = os.environ.get("OVS_PKGDATADIR", """/usr/local/share/openvswitch""") -RUNDIR = os.environ.get("OVS_RUNDIR", """/var/run""") -LOGDIR = os.environ.get("OVS_LOGDIR", """/usr/local/var/log""") -BINDIR = os.environ.get("OVS_BINDIR", """/usr/local/bin""") - -DBDIR = os.environ.get("OVS_DBDIR") -if not DBDIR: - sysconfdir = os.environ.get("OVS_SYSCONFDIR") - if sysconfdir: - DBDIR = "%s/openvswitch" % sysconfdir - else: - DBDIR = """/usr/local/etc/openvswitch""" -- GitLab From bb9303899b8bd7a9251a723393959c923d0c0196 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 19 Oct 2020 17:14:37 +0200 Subject: [PATCH 368/432] odp-util: Fix overflow of nested netlink attributes. Length of nested attributes must be checked before storing to the header. If current length exceeds the maximum value parsing should fail, otherwise the length value will be truncated leading to corrupted netlink message and out-of-bound memory accesses: ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6310002cc838 at pc 0x000000575470 bp 0x7ffc6c322d60 sp 0x7ffc6c322d58 READ of size 1 at 0x6310002cc838 thread T0 SCARINESS: 12 (1-byte-read-heap-buffer-overflow) #0 0x57546f in format_generic_odp_key lib/odp-util.c:2738:39 #1 0x559e70 in check_attr_len lib/odp-util.c:3572:13 #2 0x56581a in format_odp_key_attr lib/odp-util.c:4392:9 #3 0x5563b9 in format_odp_action lib/odp-util.c:1192:9 #4 0x555d75 in format_odp_actions lib/odp-util.c:1279:13 ... Fix that by checking the length of nested netlink attributes before updating 'nla_len' inside the header. Additionally introduced assertion inside nl_msg_end_nested() to catch this kind of issues before actual overflow happened. Credit to OSS-Fuzz. Reported-at: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20003 Fixes: 65da723b40a5 ("odp-util: Format tunnel attributes directly from netlink.") Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- lib/netlink.c | 1 + lib/odp-util.c | 17 ++++++++++------- tests/tunnel.at | 29 +++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/lib/netlink.c b/lib/netlink.c index de3ebcd0e..26ab20bb4 100644 --- a/lib/netlink.c +++ b/lib/netlink.c @@ -498,6 +498,7 @@ void nl_msg_end_nested(struct ofpbuf *msg, size_t offset) { struct nlattr *attr = ofpbuf_at_assert(msg, offset, sizeof *attr); + ovs_assert(!nl_attr_oversized(msg->size - offset - NLA_HDRLEN)); attr->nla_len = msg->size - offset; } diff --git a/lib/odp-util.c b/lib/odp-util.c index 0bd2f9aa8..252a91bfa 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -5557,13 +5557,16 @@ gtpu_to_attr(struct ofpbuf *a, const void *data_) do { \ len = 0; -#define SCAN_END_NESTED() \ - SCAN_FINISH(); \ - nl_msg_end_nested(key, key_offset); \ - if (mask) { \ - nl_msg_end_nested(mask, mask_offset); \ - } \ - return s - start; \ +#define SCAN_END_NESTED() \ + SCAN_FINISH(); \ + if (nl_attr_oversized(key->size - key_offset - NLA_HDRLEN)) { \ + return -E2BIG; \ + } \ + nl_msg_end_nested(key, key_offset); \ + if (mask) { \ + nl_msg_end_nested(mask, mask_offset); \ + } \ + return s - start; \ } #define SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, ATTR, FUNC) \ diff --git a/tests/tunnel.at b/tests/tunnel.at index e08fd1e04..b8ae7caa9 100644 --- a/tests/tunnel.at +++ b/tests/tunnel.at @@ -132,6 +132,35 @@ tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recir OVS_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([tunnel - too long nested attributes]) +OVS_VSWITCHD_START([add-port br0 p1 \ + -- set Interface p1 type=gre options:remote_ip=1.1.1.1 ofport_request=1 \ + -- add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2]) + +AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl + br0 65534/100: (dummy-internal) + p1 1/1: (gre: remote_ip=1.1.1.1) + p2 2/2: (dummy) +]) + +dst_single="dst=1.1.1.1" +dst_rep=${dst_single} +dnl Size of one OVS_TUNNEL_KEY_ATTR_IPV4_DST is 4 bytes + NLA_HDRLEN (4 bytes). +dnl One nested message has room for UINT16_MAX - NLA_HDRLEN (4) bytes, i.e. +dnl (UINT16_MAX - NLA_HDRLEN) / (4 + NLA_HDRLEN) = 8191.375 of dst addresses. +for i in `seq 1 8192` ; do + dst_rep="${dst_rep},${dst_single}" +done + +AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(${dst_rep})" "2" 2>&1 | dnl + sed "s/${dst_single},//g"], [], [dnl +ovs-vswitchd: parsing flow key (syntax error at tunnel(dst=1.1.1.1)) (Argument list too long) +ovs-appctl: ovs-vswitchd: server returned an error +]) + +OVS_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([tunnel - output]) OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \ options:remote_ip=1.1.1.1 options:local_ip=2.2.2.2 \ -- GitLab From be4b7719dc0d730a8ed8e9c7b4c492c25a258b8a Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 10 Nov 2020 13:03:53 +0100 Subject: [PATCH 369/432] ovsdb-idlc: Return expected sequence number while setting conditions. ovsdb_idl_set_condition() returns a sequence number that can be used to check if the requested conditions are acknowledged by the server. However, database bindings do not return this value to the user, making it impossible to check if the conditions are accepted. Acked-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- ovsdb/ovsdb-idlc.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index 698fe25f3..b13195606 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -389,7 +389,7 @@ bool %(s)s_is_updated(const struct %(s)s *, enum %(s)s_column_id); args = ['%(type)s%(name)s' % member for member in members] print('%s);' % ', '.join(args)) - print('void %(s)s_set_condition(struct ovsdb_idl *, struct ovsdb_idl_condition *);' % {'s': structName}) + print('unsigned int %(s)s_set_condition(struct ovsdb_idl *, struct ovsdb_idl_condition *);' % {'s': structName}) print("") @@ -1416,10 +1416,10 @@ struct %(s)s * print("\nstruct ovsdb_idl_column %s_columns[%s_N_COLUMNS];" % ( structName, structName.upper())) print(""" -void +unsigned int %(s)s_set_condition(struct ovsdb_idl *idl, struct ovsdb_idl_condition *condition) { - ovsdb_idl_set_condition(idl, &%(p)stable_%(tl)s, condition); + return ovsdb_idl_set_condition(idl, &%(p)stable_%(tl)s, condition); }""" % {'p': prefix, 's': structName, 'tl': tableName.lower()}) -- GitLab From a1d2c5f5d9ed3c7116e76048c042c47dc85aa43c Mon Sep 17 00:00:00 2001 From: Renat Nurgaliyev Date: Sun, 15 Nov 2020 15:52:38 +0100 Subject: [PATCH 370/432] sha1: Fix algorithm for data bigger than 512 megabytes. In modern systems, size_t is 64 bits. There is a 32 bit overflow check in sha1_update(), which will not work correctly, because compiler will do an automatic cast to 64 bits, since size_t type variable is in the expression. We do want however to lose data, since this is the whole idea of this overflow check. Because of this, computation of SHA-1 checksum will always be incorrect for any data, that is bigger than 512 megabytes, which in bits is the boundary of 32 bits integer. In practice it means that any OVSDB transaction, bigger or equal to 512 megabytes, is considered corrupt and ovsdb-server will refuse to work with the database file. This is especially critical for OVN southbound database, since it tends to grow rapidly. Fixes: 5eccf359391f ("Replace SHA-1 library with one that is clearly licensed.") Signed-off-by: Renat Nurgaliyev Signed-off-by: Ilya Maximets --- lib/sha1.c | 4 ++-- lib/sha1.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/sha1.c b/lib/sha1.c index 4f48ef210..87360d9cd 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -197,7 +197,7 @@ sha1_init(struct sha1_ctx *sha_info) * inputLen: The length of the input buffer. */ void -sha1_update(struct sha1_ctx *ctx, const void *buffer_, size_t count) +sha1_update(struct sha1_ctx *ctx, const void *buffer_, uint32_t count) { const uint8_t *buffer = buffer_; unsigned int i; @@ -274,7 +274,7 @@ sha1_final(struct sha1_ctx *ctx, uint8_t digest[SHA1_DIGEST_SIZE]) /* Computes the hash of 'n' bytes in 'data' into 'digest'. */ void -sha1_bytes(const void *data, size_t n, uint8_t digest[SHA1_DIGEST_SIZE]) +sha1_bytes(const void *data, uint32_t n, uint8_t digest[SHA1_DIGEST_SIZE]) { struct sha1_ctx ctx; diff --git a/lib/sha1.h b/lib/sha1.h index eda265dfc..a635ff768 100644 --- a/lib/sha1.h +++ b/lib/sha1.h @@ -45,9 +45,9 @@ struct sha1_ctx { }; void sha1_init(struct sha1_ctx *); -void sha1_update(struct sha1_ctx *, const void *, size_t); +void sha1_update(struct sha1_ctx *, const void *, uint32_t size); void sha1_final(struct sha1_ctx *, uint8_t digest[SHA1_DIGEST_SIZE]); -void sha1_bytes(const void *, size_t, uint8_t digest[SHA1_DIGEST_SIZE]); +void sha1_bytes(const void *, uint32_t size, uint8_t digest[SHA1_DIGEST_SIZE]); #define SHA1_FMT \ "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" \ -- GitLab From 955b120df845903b9ecc7c1637766230db354897 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Thu, 12 Nov 2020 19:54:50 -0500 Subject: [PATCH 371/432] lldp: validate a bit more received LLDP frames Upstream commit: commit 3aeae72b97716fddac290634fad02b952d981f17 Author: Vincent Bernat Date: Tue, 1 Oct 2019 21:42:42 +0200 lldp: validate a bit more received LLDP frames Notably, we ensure the order and unicity of Chassis ID, Port ID and TTL TLV. For Chassis ID and Port ID, we also ensure the maximum size does not exceed 256. Fix https://github.com/vincentbernat/lldpd/issues/351 Fixes: be53a5c447c3 ("auto-attach: Initial support for Auto-Attach standard") Signed-off-by: Aaron Conole Co-authored-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/lldp/lldp.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c index 74f747fcd..e61ce6774 100644 --- a/lib/lldp/lldp.c +++ b/lib/lldp/lldp.c @@ -341,6 +341,12 @@ lldp_send(struct lldpd *global OVS_UNUSED, return dp_packet_size(p); } +#define CHECK_TLV_MAX_SIZE(x, name) \ + do { if (tlv_size > (x)) { \ + VLOG_WARN(name " TLV too large received on %s", \ + hardware->h_ifname); \ + goto malformed; \ + } } while (0) int lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, @@ -359,7 +365,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, int length, af; bool gotend = false; bool ttl_received = false; - int tlv_size, tlv_type, tlv_subtype; + int tlv_size, tlv_type, tlv_subtype, tlv_count = 0; u_int8_t *pos, *tlv; void *b; struct lldpd_aa_isid_vlan_maps_tlv *isid_vlan_map = NULL; @@ -411,6 +417,31 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, hardware->h_ifname); goto malformed; } + /* Check order for mandatory TLVs */ + tlv_count++; + switch (tlv_type) { + case LLDP_TLV_CHASSIS_ID: + if (tlv_count != 1) { + VLOG_WARN("first TLV should be a chassis ID on %s, not %d", + hardware->h_ifname, tlv_type); + goto malformed; + } + break; + case LLDP_TLV_PORT_ID: + if (tlv_count != 2) { + VLOG_WARN("second TLV should be a port ID on %s, not %d", + hardware->h_ifname, tlv_type); + goto malformed; + } + break; + case LLDP_TLV_TTL: + if (tlv_count != 3) { + VLOG_WARN("third TLV should be a TTL on %s, not %d", + hardware->h_ifname, tlv_type); + goto malformed; + } + break; + } switch (tlv_type) { case LLDP_TLV_END: @@ -428,7 +459,8 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, case LLDP_TLV_CHASSIS_ID: case LLDP_TLV_PORT_ID: - CHECK_TLV_SIZE(2, "Port Id"); + CHECK_TLV_SIZE(2, "Port/Chassis Id"); + CHECK_TLV_MAX_SIZE(256, "Port/Chassis Id"); tlv_subtype = PEEK_UINT8; if (tlv_subtype == 0 || tlv_subtype > 7) { VLOG_WARN("unknown subtype for tlv id received on %s", @@ -438,10 +470,22 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, b = xzalloc(tlv_size - 1); PEEK_BYTES(b, tlv_size - 1); if (tlv_type == LLDP_TLV_PORT_ID) { + if (port->p_id != NULL) { + VLOG_WARN("Port ID TLV received twice on %s", + hardware->h_ifname); + free(b); + goto malformed; + } port->p_id_subtype = tlv_subtype; port->p_id = b; port->p_id_len = tlv_size - 1; } else { + if (chassis->c_id != NULL) { + VLOG_WARN("Chassis ID TLV received twice on %s", + hardware->h_ifname); + free(b); + goto malformed; + } chassis->c_id_subtype = tlv_subtype; chassis->c_id = b; chassis->c_id_len = tlv_size - 1; @@ -449,6 +493,11 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, break; case LLDP_TLV_TTL: + if (ttl_received) { + VLOG_WARN("TTL TLV received twice on %s", + hardware->h_ifname); + goto malformed; + } CHECK_TLV_SIZE(2, "TTL"); chassis->c_ttl = PEEK_UINT16; ttl_received = true; -- GitLab From 1c8e46d1050f5d902b0d1b65ba43aacf8a1805b7 Mon Sep 17 00:00:00 2001 From: Jonas Johansson Date: Thu, 12 Nov 2020 19:54:51 -0500 Subject: [PATCH 372/432] lldp: Fix size of PEEK_DISCARD_UINT32() Upstream commit: commit a8d8006c06d9ac16ebcf33295cbd625c0847ca9b Author: Jonas Johansson Date: Thu, 21 Apr 2016 11:50:06 +0200 Fix size of PEEK_DISCARD_UINT32() Signed-off-by: Jonas Johansson Fixes: be53a5c447c3 ("auto-attach: Initial support for Auto-Attach standard") Reported-by: Jonas Rudloff Reported-at: https://github.com/openvswitch/ovs/pull/336 Signed-off-by: Fabrizio D'Angelo Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/lldp/lldp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c index e61ce6774..593c5e1c3 100644 --- a/lib/lldp/lldp.c +++ b/lib/lldp/lldp.c @@ -59,7 +59,7 @@ VLOG_DEFINE_THIS_MODULE(lldp); } while (0) #define PEEK_DISCARD_UINT8 PEEK_DISCARD(1) #define PEEK_DISCARD_UINT16 PEEK_DISCARD(2) -#define PEEK_DISCARD_UINT32 PEEK_DISCARD(3) +#define PEEK_DISCARD_UINT32 PEEK_DISCARD(4) #define PEEK_CMP(value, bytes) \ (length -= (bytes), \ pos += (bytes), \ -- GitLab From bb5a9937fa8e04e71052fb50e23894448d19678f Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Thu, 12 Nov 2020 19:54:52 -0500 Subject: [PATCH 373/432] lldp: fix a buffer overflow when handling management address TLV Upstream commit: commit a8d8006c06d9ac16ebcf33295cbd625c0847ca9b Author: Vincent Bernat Date: Sun, 4 Oct 2015 01:50:38 +0200 lldp: fix a buffer overflow when handling management address TLV When a remote device was advertising a too large management address while still respecting TLV boundaries, lldpd would crash due to a buffer overflow. However, the buffer being a static one, this buffer overflow is not exploitable if hardening was not disabled. This bug exists since version 0.5.6. Fixes: be53a5c447c3 ("auto-attach: Initial support for Auto-Attach standard") Reported-by: Jonas Rudloff Reported-at: https://github.com/openvswitch/ovs/pull/335 Co-authored-by: Fabrizio D'Angelo Signed-off-by: Fabrizio D'Angelo Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/lldp/lldp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c index 593c5e1c3..628d0f863 100644 --- a/lib/lldp/lldp.c +++ b/lib/lldp/lldp.c @@ -530,6 +530,11 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, case LLDP_TLV_MGMT_ADDR: CHECK_TLV_SIZE(1, "Management address"); addr_str_length = PEEK_UINT8; + if (addr_str_length > sizeof(addr_str_buffer)) { + VLOG_WARN("too large management address on %s", + hardware->h_ifname); + goto malformed; + } CHECK_TLV_SIZE(1 + addr_str_length, "Management address"); PEEK_BYTES(addr_str_buffer, addr_str_length); addr_length = addr_str_length - 1; @@ -554,7 +559,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, break; case LLDP_TLV_ORG: - CHECK_TLV_SIZE(4, "Organisational"); + CHECK_TLV_SIZE(1 + sizeof orgid, "Organisational"); PEEK_BYTES(orgid, sizeof orgid); tlv_subtype = PEEK_UINT8; if (memcmp(dot1, orgid, sizeof orgid) == 0) { -- GitLab From b2c3c7824049a0044a9aa9805f085d8e5e4a0eae Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Thu, 12 Nov 2020 19:54:53 -0500 Subject: [PATCH 374/432] lldp: increase statsTLVsUnrecognizedTotal on unknown TLV Upstream commit: commit 109bcd423cd560545ec7940d73a50c5584aebb0c Author: Vincent Bernat Date: Sat, 6 Apr 2019 21:17:25 +0200 This was done for organization TLVs, but not for other TLVs. Fix https://github.com/vincentbernat/lldpd/issues/323 Fixes: be53a5c447c3 ("auto-attach: Initial support for Auto-Attach standard") Signed-off-by: Fabrizio D'Angelo Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/lldp/lldp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c index 628d0f863..e5755307f 100644 --- a/lib/lldp/lldp.c +++ b/lib/lldp/lldp.c @@ -679,6 +679,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s, VLOG_WARN("unknown tlv (%d) received on %s", tlv_type, hardware->h_ifname); + hardware->h_rx_unrecognized_cnt++; goto malformed; } if (pos > tlv + tlv_size) { -- GitLab From 965f2e47e6f7890c9fb6493f57bc09d4bd4e5c09 Mon Sep 17 00:00:00 2001 From: Vincent Bernat Date: Thu, 12 Nov 2020 19:54:54 -0500 Subject: [PATCH 375/432] lldp: correctly increase discarded count Upstream commit: commit 32f0deeebc9172c3f5f4a4d02aab32e6904947f6 Date: Sat, 18 Feb 2017 20:11:47 +0100 lldpd: correctly increase discarded count When a frame cannot be decoded but has been guessed, increase the discarded count. Fix https://github.com/vincentbernat/lldpd/issues/223 Fixes: be53a5c447c3 ("auto-attach: Initial support for Auto-Attach standard") Co-authored-by: Fabrizio D'Angelo Signed-off-by: Fabrizio D'Angelo Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/lldp/lldpd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/lldp/lldpd.c b/lib/lldp/lldpd.c index 19e930526..34738535d 100644 --- a/lib/lldp/lldpd.c +++ b/lib/lldp/lldpd.c @@ -244,6 +244,7 @@ lldpd_decode(struct lldpd *cfg, char *frame, int s, if (s < sizeof(struct eth_header) + 4) { /* Too short, just discard it */ + hw->h_rx_discarded_cnt++; return; } @@ -284,6 +285,7 @@ lldpd_decode(struct lldpd *cfg, char *frame, int s, VLOG_DBG("function for %s protocol did not " "decode this frame", cfg->g_protocols[i].name); + hw->h_rx_discarded_cnt++; return; } chassis->c_protocol = port->p_protocol = cfg->g_protocols[i].mode; -- GitLab From 103f0a0dd170fe14ea0e3ecdb969c21ba1b1ffad Mon Sep 17 00:00:00 2001 From: Fabrizio D'Angelo Date: Thu, 12 Nov 2020 19:54:55 -0500 Subject: [PATCH 376/432] AUTHORS: Add Fabrizio D'Angelo. Signed-off-by: Fabrizio D'Angelo Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 10f0f272e..7282ca607 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -145,6 +145,7 @@ Eric Sesterhenn eric.sesterhenn@lsexperts.de Ethan J. Jackson ejj@eecs.berkeley.edu Ethan Rahn erahn@arista.com Eziz Durdyyev ezizdurdy@gmail.com +Fabrizio D'Angelo fdangelo@redhat.com Flavio Fernandes flavio@flaviof.com Flavio Leitner fbl@redhat.com Francesco Fusco ffusco@redhat.com -- GitLab From 1f66e1a861d4a822842edacf80c3fc8650ceda24 Mon Sep 17 00:00:00 2001 From: Sriharsha Basavapatna Date: Tue, 20 Oct 2020 14:03:52 -0400 Subject: [PATCH 377/432] netdev-offload-dpdk: Pass L4 proto-id to match in the L3 rte_flow_item. The offload layer clears the L4 protocol mask in the L3 item, when the L4 item is passed for matching, as an optimization. This can be confusing while parsing the headers in the PMD. Also, the datapath flow specifies this field to be matched. This optimization is best left to the PMD. This patch restores the code to pass the L4 protocol type in L3 match. Signed-off-by: Sriharsha Basavapatna Acked-by: Eli Britstein Tested-by: Emma Finn Signed-off-by: Ilya Maximets --- lib/netdev-offload-dpdk.c | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c index 17b08ca43..01c52e1de 100644 --- a/lib/netdev-offload-dpdk.c +++ b/lib/netdev-offload-dpdk.c @@ -677,7 +677,6 @@ static int parse_flow_match(struct flow_patterns *patterns, struct match *match) { - uint8_t *next_proto_mask = NULL; struct flow *consumed_masks; uint8_t proto = 0; @@ -783,7 +782,6 @@ parse_flow_match(struct flow_patterns *patterns, /* Save proto for L4 protocol setup. */ proto = spec->hdr.next_proto_id & mask->hdr.next_proto_id; - next_proto_mask = &mask->hdr.next_proto_id; } /* If fragmented, then don't HW accelerate - for now. */ if (match->wc.masks.nw_frag & match->flow.nw_frag) { @@ -826,7 +824,6 @@ parse_flow_match(struct flow_patterns *patterns, /* Save proto for L4 protocol setup. */ proto = spec->hdr.proto & mask->hdr.proto; - next_proto_mask = &mask->hdr.proto; } if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && @@ -859,11 +856,6 @@ parse_flow_match(struct flow_patterns *patterns, consumed_masks->tcp_flags = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask); - - /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */ - if (next_proto_mask) { - *next_proto_mask = 0; - } } else if (proto == IPPROTO_UDP) { struct rte_flow_item_udp *spec, *mask; @@ -880,11 +872,6 @@ parse_flow_match(struct flow_patterns *patterns, consumed_masks->tp_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask); - - /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */ - if (next_proto_mask) { - *next_proto_mask = 0; - } } else if (proto == IPPROTO_SCTP) { struct rte_flow_item_sctp *spec, *mask; @@ -901,11 +888,6 @@ parse_flow_match(struct flow_patterns *patterns, consumed_masks->tp_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask); - - /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */ - if (next_proto_mask) { - *next_proto_mask = 0; - } } else if (proto == IPPROTO_ICMP) { struct rte_flow_item_icmp *spec, *mask; @@ -922,11 +904,6 @@ parse_flow_match(struct flow_patterns *patterns, consumed_masks->tp_dst = 0; add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask); - - /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */ - if (next_proto_mask) { - *next_proto_mask = 0; - } } add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); -- GitLab From 08ec09725ab1c2fab62da30643db3c41fe85fab3 Mon Sep 17 00:00:00 2001 From: Terry Wilson Date: Tue, 15 Sep 2020 16:29:06 -0500 Subject: [PATCH 378/432] python: Don't raise an Exception on failure to connect via SSL. With other socket types, trying to connect and failing will return an error code, but if an SSL Stream is used, then when check_connection_completion(sock) is called, SSL will raise an exception that doesn't derive from socket.error which is handled. This adds handling for SSL.SysCallError which has the same arguments as socket.error (errno, string). A future enhancement could be to go through SSLStream class and implement error checking for all of the possible exceptions similar to how lib/stream-ssl.c's interpret_ssl_error() works across the various methods that are implemented. Fixes: d90ed7d65ba8 ("python: Add SSL support to the python ovs client library") Signed-off-by: Terry Wilson Acked-by: Thomas Neuman Acked-by: Mark Michelson Signed-off-by: Ilya Maximets --- python/ovs/stream.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/python/ovs/stream.py b/python/ovs/stream.py index e9bb0c854..f5a520862 100644 --- a/python/ovs/stream.py +++ b/python/ovs/stream.py @@ -132,6 +132,10 @@ class Stream(object): IPTOS_PREC_INTERNETCONTROL = 0xc0 DSCP_DEFAULT = IPTOS_PREC_INTERNETCONTROL >> 2 + @staticmethod + def check_connection_completion(sock): + return ovs.socket_util.check_connection_completion(sock) + @staticmethod def open(name, dscp=DSCP_DEFAULT): """Attempts to connect a stream to a remote peer. 'name' is a @@ -189,7 +193,7 @@ class Stream(object): if error: return error, None else: - err = ovs.socket_util.check_connection_completion(sock) + err = cls.check_connection_completion(sock) if err == errno.EAGAIN or err == errno.EINPROGRESS: status = errno.EAGAIN err = 0 @@ -261,7 +265,7 @@ class Stream(object): def __scs_connecting(self): if self.socket is not None: - retval = ovs.socket_util.check_connection_completion(self.socket) + retval = self.check_connection_completion(self.socket) assert retval != errno.EINPROGRESS elif sys.platform == 'win32': if self.retry_connect: @@ -761,6 +765,13 @@ Stream.register_method("tcp", TCPStream) class SSLStream(Stream): + @staticmethod + def check_connection_completion(sock): + try: + return Stream.check_connection_completion(sock) + except SSL.SysCallError as e: + return ovs.socket_util.get_exception_errno(e) + @staticmethod def needs_probes(): return True -- GitLab From f9b0107dd04a4cb05877d9bb8bd200e0b344fadc Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 10 Nov 2020 12:51:48 +0100 Subject: [PATCH 379/432] netdev-dpdk: Add ability to set MAC address. It is possible to set the MAC address of DPDK ports by calling rte_eth_dev_default_mac_addr_set(). OvS does not actually call this function for non-internal ports, but the implementation is exposed to be used in a later commit. Signed-off-by: Ilya Maximets Signed-off-by: Gaetan Rivet --- lib/netdev-dpdk.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 0b830be78..084f97807 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -2910,19 +2910,45 @@ netdev_dpdk_eth_send(struct netdev *netdev, int qid, return 0; } +static int +netdev_dpdk_set_etheraddr__(struct netdev_dpdk *dev, const struct eth_addr mac) + OVS_REQUIRES(dev->mutex) +{ + int err = 0; + + if (dev->type == DPDK_DEV_ETH) { + struct rte_ether_addr ea; + + memcpy(ea.addr_bytes, mac.ea, ETH_ADDR_LEN); + err = -rte_eth_dev_default_mac_addr_set(dev->port_id, &ea); + } + if (!err) { + dev->hwaddr = mac; + } else { + VLOG_WARN("%s: Failed to set requested mac("ETH_ADDR_FMT"): %s", + netdev_get_name(&dev->up), ETH_ADDR_ARGS(mac), + rte_strerror(err)); + } + + return err; +} + static int netdev_dpdk_set_etheraddr(struct netdev *netdev, const struct eth_addr mac) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); + int err = 0; ovs_mutex_lock(&dev->mutex); if (!eth_addr_equals(dev->hwaddr, mac)) { - dev->hwaddr = mac; - netdev_change_seq_changed(netdev); + err = netdev_dpdk_set_etheraddr__(dev, mac); + if (!err) { + netdev_change_seq_changed(netdev); + } } ovs_mutex_unlock(&dev->mutex); - return 0; + return err; } static int -- GitLab From f4336f504b17c0a2d9f47eb1ae1b7b140e3cc72a Mon Sep 17 00:00:00 2001 From: Gaetan Rivet Date: Tue, 10 Nov 2020 12:51:49 +0100 Subject: [PATCH 380/432] netdev-dpdk: Add option to configure VF MAC address. In some cloud topologies, using DPDK VF representors in guest requires configuring a VF before it is assigned to the guest. A first basic option for such configuration is setting the VF MAC address. Add a key 'dpdk-vf-mac' to the 'options' column of the Interface table. This option can be used as such: $ ovs-vsctl add-port br0 dpdk-rep0 -- set Interface dpdk-rep0 type=dpdk \ options:dpdk-vf-mac=00:11:22:33:44:55 Suggested-by: Ilya Maximets Acked-by: Eli Britstein Acked-by: Kevin Traynor Signed-off-by: Gaetan Rivet Signed-off-by: Ilya Maximets --- Documentation/topics/dpdk/phy.rst | 51 +++++++++++++++++++++++ NEWS | 2 + lib/netdev-dpdk.c | 69 +++++++++++++++++++++++++++++++ vswitchd/vswitch.xml | 18 ++++++++ 4 files changed, 140 insertions(+) diff --git a/Documentation/topics/dpdk/phy.rst b/Documentation/topics/dpdk/phy.rst index 55a98e2b0..7ee3eacff 100644 --- a/Documentation/topics/dpdk/phy.rst +++ b/Documentation/topics/dpdk/phy.rst @@ -379,6 +379,57 @@ an eth device whose mac address is ``00:11:22:33:44:55``:: $ ovs-vsctl add-port br0 dpdk-mac -- set Interface dpdk-mac type=dpdk \ options:dpdk-devargs="class=eth,mac=00:11:22:33:44:55" +Representor specific configuration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In some topologies, a VF must be configured before being assigned to a +guest (VM) machine. This configuration is done through VF-specific fields +in the ``options`` column of the ``Interface`` table. + +.. important:: + + Some DPDK port use `bifurcated drivers `__, + which means that a kernel netdevice remains when Open vSwitch is stopped. + + In such case, any configuration applied to a VF would remain set on the + kernel netdevice, and be inherited from it when Open vSwitch is restarted, + even if the options described in this section are unset from Open vSwitch. + +.. _bifurcated-drivers: http://doc.dpdk.org/guides/linux_gsg/linux_drivers.html#bifurcated-driver + +- Configure the VF MAC address:: + + $ ovs-vsctl set Interface dpdk-rep0 options:dpdk-vf-mac=00:11:22:33:44:55 + +The requested MAC address is assigned to the port and is listed as part of +its options:: + + $ ovs-appctl dpctl/show + [...] + port 3: dpdk-rep0 (dpdk: configured_rx_queues=1, ..., dpdk-vf-mac=00:11:22:33:44:55, ...) + + $ ovs-vsctl show + [...] + Port dpdk-rep0 + Interface dpdk-rep0 + type: dpdk + options: {dpdk-devargs="", dpdk-vf-mac="00:11:22:33:44:55"} + + $ ovs-vsctl get Interface dpdk-rep0 status + {dpdk-vf-mac="00:11:22:33:44:55", ...} + + $ ovs-vsctl list Interface dpdk-rep0 | grep 'mac_in_use\|options' + mac_in_use : "00:11:22:33:44:55" + options : {dpdk-devargs="", dpdk-vf-mac="00:11:22:33:44:55"} + +The value listed as ``dpdk-vf-mac`` is only a request from the user and is +possibly not yet applied. + +When the requested configuration is successfully applied to the port, +this MAC address is then also shown in the column ``mac_in_use`` of +the ``Interface`` table. On failure however, ``mac_in_use`` will keep its +previous value, which will thus differ from ``dpdk-vf-mac``. + Jumbo Frames ------------ diff --git a/NEWS b/NEWS index a542c68ca..185555848 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ Post-v2.14.0 - Userspace datapath: * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which restricts a flow dump to a single PMD thread if set. + * New 'options:dpdk-vf-mac' field for DPDK interface of VF ports, + that allows configuring the MAC address of a VF representor. - The environment variable OVS_UNBOUND_CONF, if set, is now used as the DNS resolver's (unbound) configuration file. - Linux datapath: diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 084f97807..75dffefb8 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -522,6 +522,9 @@ struct netdev_dpdk { * otherwise interrupt mode is used. */ bool requested_lsc_interrupt_mode; bool lsc_interrupt_mode; + + /* VF configuration. */ + struct eth_addr requested_hwaddr; ); PADDED_MEMBERS(CACHE_LINE_SIZE, @@ -1692,6 +1695,16 @@ out: return ret; } +static bool +dpdk_port_is_representor(struct netdev_dpdk *dev) + OVS_REQUIRES(dev->mutex) +{ + struct rte_eth_dev_info dev_info; + + rte_eth_dev_info_get(dev->port_id, &dev_info); + return (*dev_info.dev_flags) & RTE_ETH_DEV_REPRESENTOR; +} + static int netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) { @@ -1726,6 +1739,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args) } smap_add(args, "lsc_interrupt_mode", dev->lsc_interrupt_mode ? "true" : "false"); + + if (dpdk_port_is_representor(dev)) { + smap_add_format(args, "dpdk-vf-mac", ETH_ADDR_FMT, + ETH_ADDR_ARGS(dev->requested_hwaddr)); + } } ovs_mutex_unlock(&dev->mutex); @@ -1905,6 +1923,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args, {RTE_FC_RX_PAUSE, RTE_FC_FULL } }; const char *new_devargs; + const char *vf_mac; int err = 0; ovs_mutex_lock(&dpdk_mutex); @@ -1975,6 +1994,28 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args, goto out; } + vf_mac = smap_get(args, "dpdk-vf-mac"); + if (vf_mac) { + struct eth_addr mac; + + if (!dpdk_port_is_representor(dev)) { + VLOG_WARN_BUF(errp, "'%s' is trying to set the VF MAC '%s' " + "but 'options:dpdk-vf-mac' is only supported for " + "VF representors.", + netdev_get_name(netdev), vf_mac); + } else if (!eth_addr_from_string(vf_mac, &mac)) { + VLOG_WARN_BUF(errp, "interface '%s': cannot parse VF MAC '%s'.", + netdev_get_name(netdev), vf_mac); + } else if (eth_addr_is_multicast(mac)) { + VLOG_WARN_BUF(errp, + "interface '%s': cannot set VF MAC to multicast " + "address '%s'.", netdev_get_name(netdev), vf_mac); + } else if (!eth_addr_equals(dev->requested_hwaddr, mac)) { + dev->requested_hwaddr = mac; + netdev_request_reconfigure(netdev); + } + } + lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false); if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) { dev->requested_lsc_interrupt_mode = lsc_interrupt_mode; @@ -3647,6 +3688,7 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args) struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); struct rte_eth_dev_info dev_info; uint32_t link_speed; + uint32_t dev_flags; if (!rte_eth_dev_is_valid_port(dev->port_id)) { return ENODEV; @@ -3656,6 +3698,7 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args) ovs_mutex_lock(&dev->mutex); rte_eth_dev_info_get(dev->port_id, &dev_info); link_speed = dev->link.link_speed; + dev_flags = *dev_info.dev_flags; ovs_mutex_unlock(&dev->mutex); const struct rte_bus *bus; const struct rte_pci_device *pci_dev; @@ -3703,6 +3746,11 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args) smap_add(args, "link_speed", netdev_dpdk_link_speed_to_str__(link_speed)); + if (dev_flags & RTE_ETH_DEV_REPRESENTOR) { + smap_add_format(args, "dpdk-vf-mac", ETH_ADDR_FMT, + ETH_ADDR_ARGS(dev->hwaddr)); + } + return 0; } @@ -4939,6 +4987,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev) && dev->lsc_interrupt_mode == dev->requested_lsc_interrupt_mode && dev->rxq_size == dev->requested_rxq_size && dev->txq_size == dev->requested_txq_size + && eth_addr_equals(dev->hwaddr, dev->requested_hwaddr) && dev->socket_id == dev->requested_socket_id && dev->started && !dev->reset_needed) { /* Reconfiguration is unnecessary */ @@ -4970,6 +5019,14 @@ netdev_dpdk_reconfigure(struct netdev *netdev) dev->txq_size = dev->requested_txq_size; rte_free(dev->tx_q); + + if (!eth_addr_equals(dev->hwaddr, dev->requested_hwaddr)) { + err = netdev_dpdk_set_etheraddr__(dev, dev->requested_hwaddr); + if (err) { + goto out; + } + } + err = dpdk_eth_dev_init(dev); if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) { netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO; @@ -4981,6 +5038,18 @@ netdev_dpdk_reconfigure(struct netdev *netdev) } } + /* If both requested and actual hwaddr were previously + * unset (initialized to 0), then first device init above + * will have set actual hwaddr to something new. + * This would trigger spurious MAC reconfiguration unless + * the requested MAC is kept in sync. + * + * This is harmless in case requested_hwaddr was + * configured by the user, as netdev_dpdk_set_etheraddr__() + * will have succeeded to get to this point. + */ + dev->requested_hwaddr = dev->hwaddr; + dev->tx_q = netdev_dpdk_alloc_txq(netdev->n_txq); if (!dev->tx_q) { err = ENOMEM; diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index d0890b843..89a876796 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -3275,6 +3275,24 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \ descriptors will be used by default.

    + + +

    + Ethernet address to set for this VF interface. If unset then the + default MAC address is used: +

    +
      +
    • + For most drivers, the default MAC address assigned by their + hardware. +
    • +
    • + For bifurcated drivers, the MAC currently used by the kernel + netdevice. +
    • +
    +

    This option may only be used with dpdk VF representors.

    +
    -- GitLab From 84029cb5d4433bc6fcc4a28e3f8e397714d483ab Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Fri, 18 Sep 2020 19:19:35 +0200 Subject: [PATCH 381/432] ofp-actions: Fix userspace support for mpls_ttl. Currently mpls_ttl is ignored when a flow is added because MFF_MPLS_TTL is not handled in nx_put_raw(). This commit adds the correct handling of MFF_MPLS_TTL in nx_put_raw(). Fixes: bef3f465bcd5 ("openflow: Support matching and modifying MPLS TTL field.") Signed-off-by: Timothy Redaelli Signed-off-by: Ilya Maximets --- lib/nx-match.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/nx-match.c b/lib/nx-match.c index 3ffd7d9d7..440f5f763 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -1133,6 +1133,11 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, mpls_lse_to_bos(flow->mpls_lse[0])); } + if (match->wc.masks.mpls_lse[0] & htonl(MPLS_TTL_MASK)) { + nxm_put_8(&ctx, MFF_MPLS_TTL, oxm, + mpls_lse_to_ttl(flow->mpls_lse[0])); + } + if (match->wc.masks.mpls_lse[0] & htonl(MPLS_LABEL_MASK)) { nxm_put_32(&ctx, MFF_MPLS_LABEL, oxm, htonl(mpls_lse_to_label(flow->mpls_lse[0]))); -- GitLab From 0062a04d8701113f5adb1c336404f9427f4e1a1e Mon Sep 17 00:00:00 2001 From: Timothy Redaelli Date: Mon, 26 Oct 2020 13:55:20 +0100 Subject: [PATCH 382/432] tests: Add parse-flow tests for MPLS fields. Currently "ovs-ofctl parse-flows (NXM)" test doesn't test MPLS fields at all. This commit adds a test for the the 4 MPLS fields (mpls_label, mpls_tc, mpls_bos and mpls_ttl) to "ovs-ofctl parse-flows (NXM)" test. Signed-off-by: Timothy Redaelli Signed-off-by: Ilya Maximets --- tests/ovs-ofctl.at | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at index b6951f404..5ddca67e7 100644 --- a/tests/ovs-ofctl.at +++ b/tests/ovs-ofctl.at @@ -101,6 +101,7 @@ for test_case in \ 'mpls,mpls_label=5 NXM,OXM,OpenFlow11' \ 'mpls,mpls_tc=1 NXM,OXM,OpenFlow11' \ 'mpls,mpls_bos=0 NXM,OXM' \ + 'mpls,mpls_ttl=5 NXM,OXM' \ 'ip,ip_src=1.2.3.4 any' \ 'ip,ip_src=192.168.0.0/24 any' \ 'ip,ip_src=192.0.168.0/255.0.255.0 NXM,OXM,OpenFlow11' \ @@ -434,6 +435,7 @@ tcp,actions=fin_timeout(idle_timeout=5,hard_timeout=15) actions=controller(max_len=123,reason=invalid_ttl,id=555) actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678,sampling_port=56789) +mpls,mpls_label=5,mpls_tc=1,mpls_ttl=1,mpls_bos=0,actions=drop ip,actions=ct(commit,zone=5) ip,actions=ct(commit,exec(load(1->NXM_NX_CT_MARK[]))) ip,actions=ct(commit,exec(load(0x1->NXM_NX_CT_LABEL[]))) @@ -490,6 +492,7 @@ NXT_FLOW_MOD: ADD table:255 tcp actions=fin_timeout(idle_timeout=5,hard_timeout= NXT_FLOW_MOD: ADD table:255 actions=controller(reason=invalid_ttl,max_len=123,id=555) NXT_FLOW_MOD: ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678) NXT_FLOW_MOD: ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678,sampling_port=56789) +NXT_FLOW_MOD: ADD table:255 mpls,mpls_label=5,mpls_tc=1,mpls_ttl=1,mpls_bos=0 actions=drop NXT_FLOW_MOD: ADD table:255 ip actions=ct(commit,zone=5) NXT_FLOW_MOD: ADD table:255 ip actions=ct(commit,exec(load:0x1->NXM_NX_CT_MARK[])) NXT_FLOW_MOD: ADD table:255 ip actions=ct(commit,exec(load:0x1->NXM_NX_CT_LABEL[0..63],load:0->NXM_NX_CT_LABEL[64..127])) -- GitLab From 922553cb9766e4e2f35a7774211289e3bb042e86 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 12 Nov 2020 15:10:37 -0800 Subject: [PATCH 383/432] compat: Remove stale code. Remove stale and unused code left over after support for kernels older than 3.10 was removed. Fixes: 8063e0958780 ("datapath: Drop support for kernel older than 3.10") Signed-off-by: Greg Rose Acked-by: Yi-Hung Wei Signed-off-by: Ilya Maximets --- acinclude.m4 | 2 -- datapath/linux/compat/include/linux/percpu.h | 6 ------ datapath/linux/compat/include/linux/skbuff.h | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 1460289ca..9c2236f9e 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -975,8 +975,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/net/sock.h], [sk_no_check_tx]) OVS_GREP_IFELSE([$KSRC/include/linux/udp.h], [no_check6_tx]) - OVS_GREP_IFELSE([$KSRC/include/linux/utsrelease.h], [el6], - [OVS_DEFINE([HAVE_RHEL6_PER_CPU])]) OVS_FIND_PARAM_IFELSE([$KSRC/include/net/protocol.h], [udp_add_offload], [net], [OVS_DEFINE([HAVE_UDP_ADD_OFFLOAD_TAKES_NET])]) diff --git a/datapath/linux/compat/include/linux/percpu.h b/datapath/linux/compat/include/linux/percpu.h index 7c346aa31..a039142e2 100644 --- a/datapath/linux/compat/include/linux/percpu.h +++ b/datapath/linux/compat/include/linux/percpu.h @@ -7,12 +7,6 @@ #define this_cpu_ptr(ptr) per_cpu_ptr(ptr, smp_processor_id()) #endif -#ifdef HAVE_RHEL6_PER_CPU -#undef this_cpu_read -#undef this_cpu_inc -#undef this_cpu_dec -#endif - #if !defined this_cpu_read #define this_cpu_read(ptr) percpu_read(ptr) #endif diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index 204ce5497..bc73255d5 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -278,7 +278,7 @@ static inline void skb_clear_hash(struct sk_buff *skb) #ifdef HAVE_RXHASH skb->rxhash = 0; #endif -#if defined(HAVE_L4_RXHASH) && !defined(HAVE_RHEL_OVS_HOOK) +#if defined(HAVE_L4_RXHASH) skb->l4_rxhash = 0; #endif } -- GitLab From f365b41f9dff9340e7da56aeabb8ceb18037c693 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 12 Nov 2020 15:10:38 -0800 Subject: [PATCH 384/432] compat: Fix build issue on RHEL 7.7. RHEL 7.2 introduced a KABI fixup in struct sk_buff for the name change of l4_rxhash to l4_hash. Then patch 9ba57fc7cccc ("datapath: Add hash info to upcall") introduced a compile error by using l4_hash and not fixing up the HAVE_L4_RXHASH configuration flag. Remove all references to HAVE_L4_RXHASH and always use l4_hash to resolve the issue. This will break compilation on RHEL 7.0 and RHEL 7.1 but dropping support for these older kernels shouldn't be a problem. Fixes: 9ba57fc7cccc ("datapath: Add hash info to upcall") Signed-off-by: Greg Rose Acked-by: Yi-Hung Wei Signed-off-by: Ilya Maximets --- acinclude.m4 | 2 -- datapath/datapath.c | 4 ---- datapath/linux/compat/include/linux/skbuff.h | 10 ++-------- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 9c2236f9e..ddf4b71e1 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -877,8 +877,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_clear_hash]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [int.skb_zerocopy(], [OVS_DEFINE([HAVE_SKB_ZEROCOPY])]) - OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [u8.*l4_rxhash], - [OVS_DEFINE([HAVE_L4_RXHASH])]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_ensure_writable]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_vlan_pop]) OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [__skb_vlan_pop]) diff --git a/datapath/datapath.c b/datapath/datapath.c index 52a59f135..8e9b9a706 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -529,11 +529,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, hash |= OVS_PACKET_HASH_SW_BIT; #endif -#ifdef HAVE_L4_RXHASH - if (skb->l4_rxhash) -#else if (skb->l4_hash) -#endif hash |= OVS_PACKET_HASH_L4_BIT; if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) { diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h index bc73255d5..396a5e406 100644 --- a/datapath/linux/compat/include/linux/skbuff.h +++ b/datapath/linux/compat/include/linux/skbuff.h @@ -278,9 +278,7 @@ static inline void skb_clear_hash(struct sk_buff *skb) #ifdef HAVE_RXHASH skb->rxhash = 0; #endif -#if defined(HAVE_L4_RXHASH) - skb->l4_rxhash = 0; -#endif + skb->l4_hash = 0; } #endif @@ -371,7 +369,7 @@ static inline void skb_pop_mac_header(struct sk_buff *skb) #ifndef HAVE_SKB_CLEAR_HASH_IF_NOT_L4 static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) { - if (!skb->l4_rxhash) + if (!skb->l4_hash) skb_clear_hash(skb); } #endif @@ -465,11 +463,7 @@ __skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4) #else skb->hash = hash; #endif -#if defined(HAVE_L4_RXHASH) - skb->l4_rxhash = is_l4; -#else skb->l4_hash = is_l4; -#endif #ifdef HAVE_SW_HASH skb->sw_hash = is_sw; #endif -- GitLab From 42da9cbc3aea5ce33beab1f2d86dc3e175603cad Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Thu, 12 Nov 2020 15:10:39 -0800 Subject: [PATCH 385/432] compat: Fix compile warning. In ../compat/nf_conntrack_reasm.c nf_frags_cache_name is declared if OVS_NF_DEFRAG6_BACKPORT is defined. However, later in the patch it is only used if HAVE_INET_FRAGS_WITH_FRAGS_WORK is defined and HAVE_INET_FRAGS_RND is not defined. This will cause a compile warning about unused variables. Fix it up by using the same defines that enable its use to decide if it should be declared and avoid the compiler warning. Fixes: 4a90b277baca ("compat: Fixup ipv6 fragmentation on 4.9.135+ kernels") Signed-off-by: Greg Rose Acked-by: Yi-Hung Wei Signed-off-by: Ilya Maximets --- datapath/linux/compat/nf_conntrack_reasm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c index ced9fba98..77b4b2548 100644 --- a/datapath/linux/compat/nf_conntrack_reasm.c +++ b/datapath/linux/compat/nf_conntrack_reasm.c @@ -57,10 +57,13 @@ #include #include "datapath.h" -#ifdef OVS_NF_DEFRAG6_BACKPORT +#if defined(HAVE_INET_FRAGS_WITH_FRAGS_WORK) || !defined(HAVE_INET_FRAGS_RND) static const char nf_frags_cache_name[] = "ovs-frag6"; +#endif + +#ifdef OVS_NF_DEFRAG6_BACKPORT struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; -- GitLab From 6fcef9088b6657f7a68cdded172f5bb5b053d848 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 4 Sep 2020 13:51:26 +0200 Subject: [PATCH 386/432] ovsdb-idl.at: Return stream open_block python tests. Invocations of CHECK_STREAM_OPEN_BLOCK_PY was accidentally removed during python2 to python3 conversion. So, these tests was not checked since that time. This change returns tests back. CHECK_STREAM_OPEN_BLOCK_PY needed updates, so instead I refactored function for C tests to be able to perform python tests too. Also, added test for python with IPv6. Fixes: 1ca0323e7c29 ("Require Python 3 and remove support for Python 2.") Signed-off-by: Ilya Maximets Acked-by: Gaetan Rivet --- tests/ovsdb-idl.at | 36 ++++++++++++++---------------------- 1 file changed, 14 insertions(+), 22 deletions(-) diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index b46258591..a6d16176c 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -1788,33 +1788,25 @@ OVSDB_CHECK_IDL_COMPOUND_INDEX_WITH_REF([set, simple3 idl-compound-index-with-re ]]) m4_define([CHECK_STREAM_OPEN_BLOCK], - [AT_SETUP([Check Stream open block - C - $1]) - AT_SKIP_IF([test "$1" = "tcp6" && test "$IS_WIN32" = "yes"]) - AT_SKIP_IF([test "$1" = "tcp6" && test "$HAVE_IPV6" = "no"]) - AT_KEYWORDS([Check Stream open block $1]) - AT_CHECK([ovsdb_start_idltest "ptcp:0:$2"]) + [AT_SETUP([Check stream open block - $1 - $3]) + AT_SKIP_IF([test "$3" = "tcp6" && test "$IS_WIN32" = "yes"]) + AT_SKIP_IF([test "$3" = "tcp6" && test "$HAVE_IPV6" = "no"]) + AT_KEYWORDS([ovsdb server stream open_block $3]) + AT_CHECK([ovsdb_start_idltest "ptcp:0:$4"]) PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) WRONG_PORT=$(($TCP_PORT + 101)) - AT_CHECK([test-stream tcp:$2:$TCP_PORT], [0], [ignore]) - AT_CHECK([test-stream tcp:$2:$WRONG_PORT], [1], [ignore], [ignore]) + AT_CHECK([$2 tcp:$4:$TCP_PORT], [0], [ignore]) + AT_CHECK([$2 tcp:$4:$WRONG_PORT], [1], [ignore], [ignore]) OVSDB_SERVER_SHUTDOWN - AT_CHECK([test-stream tcp:$2:$TCP_PORT], [1], [ignore], [ignore]) + AT_CHECK([$2 tcp:$4:$TCP_PORT], [1], [ignore], [ignore]) AT_CLEANUP]) -CHECK_STREAM_OPEN_BLOCK([tcp], [127.0.0.1]) -CHECK_STREAM_OPEN_BLOCK([tcp6], [[[::1]]]) - -m4_define([CHECK_STREAM_OPEN_BLOCK_PY], - [AT_SETUP([$1 - Python3]) - AT_KEYWORDS([Check PY Stream open block - $3]) - AT_CHECK([ovsdb_start_idltest "ptcp:0:127.0.0.1"]) - PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT]) - WRONG_PORT=$(($TCP_PORT + 101)) - AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT], [0], [ignore]) - AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$WRONG_PORT], [1], [ignore]) - OVSDB_SERVER_SHUTDOWN - AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT], [1], [ignore]) - AT_CLEANUP]) +CHECK_STREAM_OPEN_BLOCK([C], [test-stream], [tcp], [127.0.0.1]) +CHECK_STREAM_OPEN_BLOCK([C], [test-stream], [tcp6], [[[::1]]]) +CHECK_STREAM_OPEN_BLOCK([Python3], [$PYTHON3 $srcdir/test-stream.py], + [tcp], [127.0.0.1]) +CHECK_STREAM_OPEN_BLOCK([Python3], [$PYTHON3 $srcdir/test-stream.py], + [tcp6], [[[::1]]]) # same as OVSDB_CHECK_IDL but uses Python IDL implementation with tcp # with multiple remotes to assert the idl connects to the leader of the Raft cluster -- GitLab From 12eb2f67df055faab7603b987c9f3c044cc199a8 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Tue, 20 Oct 2020 11:07:07 -0400 Subject: [PATCH 387/432] ovsdb-idl: Fix *_is_new() IDL functions. Currently all functions of the type *_is_new() always return 'false'. This patch resolves this issue by using the 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' instead of the 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' to determine if a row is new and by resetting the 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' on clear. Further to this, the code is also updated to match the following behaviour: When a row is inserted, the 'OVSDB_IDL_CHANGE_INSERT' 'change_seqno' is updated to match the new database change_seqno. The 'OVSDB_IDL_CHANGE_MODIFY' 'change_seqno' is not set for inserted rows (only for updated rows). At the end of a run, ovsdb_idl_db_track_clear() should be called to clear all tracking information, this includes resetting all row 'change_seqno' to zero. This will ensure that subsequent runs will not see a previously 'new' row. add_tracked_change_for_references() is updated to only track rows that reference the current row. Also, update unit tests in order to test the *_is_new(), *_is_delete() functions. Suggested-by: Dumitru Ceara Reported-at: https://bugzilla.redhat.com/1883562 Fixes: ca545a787ac0 ("ovsdb-idl.c: Increase seqno for change-tracking of table references.") Signed-off-by: Mark Gray Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 42 +++++++++++++++++++++++++++++------------- ovsdb/ovsdb-idlc.in | 22 +++++++++++++++++++--- tests/ovsdb-idl.at | 5 ++++- tests/test-ovsdb.c | 32 ++++++++++++++++++++++++-------- 4 files changed, 76 insertions(+), 25 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index fdb9d85f5..ede4060cc 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -1959,6 +1959,11 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db) free(row->updated); row->updated = NULL; } + + row->change_seqno[OVSDB_IDL_CHANGE_INSERT] = + row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] = + row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0; + ovs_list_remove(&row->track_node); ovs_list_init(&row->track_node); if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) { @@ -2684,22 +2689,25 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table, return OVSDB_IDL_UPDATE_DB_CHANGED; } -/* Recursively add rows to tracked change lists for current row - * and the rows that reference this row. */ +/* Recursively add rows to tracked change lists for all rows that reference + 'row'. */ static void add_tracked_change_for_references(struct ovsdb_idl_row *row) { - if (ovs_list_is_empty(&row->track_node) && - ovsdb_idl_track_is_set(row->table)) { - ovs_list_push_back(&row->table->track_list, - &row->track_node); - row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] - = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY] - = row->table->db->change_seqno + 1; - - const struct ovsdb_idl_arc *arc; - LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) { - add_tracked_change_for_references(arc->src); + const struct ovsdb_idl_arc *arc; + LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) { + struct ovsdb_idl_row *ref = arc->src; + + if (ovs_list_is_empty(&ref->track_node) && + ovsdb_idl_track_is_set(ref->table)) { + ovs_list_push_back(&ref->table->track_list, + &ref->track_node); + + ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY] + = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY] + = ref->table->db->change_seqno + 1; + + add_tracked_change_for_references(ref); } } } @@ -2767,7 +2775,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row *row, const struct json *row_json, row->change_seqno[change] = row->table->change_seqno[change] = row->table->db->change_seqno + 1; + if (table->modes[column_idx] & OVSDB_IDL_TRACK) { + if (ovs_list_is_empty(&row->track_node) && + ovsdb_idl_track_is_set(row->table)) { + ovs_list_push_back(&row->table->track_list, + &row->track_node); + } + add_tracked_change_for_references(row); if (!row->updated) { row->updated = bitmap_allocate(class->n_columns); @@ -4843,6 +4858,7 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn, hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid)); hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid)); ovsdb_idl_add_to_indexes(row); + return row; } diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in index b13195606..5914e0878 100755 --- a/ovsdb/ovsdb-idlc.in +++ b/ovsdb/ovsdb-idlc.in @@ -279,13 +279,21 @@ const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *); (ROW) = %(s)s_track_get_next(ROW)) -/* Returns true if 'row' was inserted since the last change tracking reset. */ +/* Returns true if 'row' was inserted since the last change tracking reset. + * + * Note: This can only be used to test rows of tracked changes. This cannot be + * used to test if an uncommitted row that has been added locally is new or it + * may given unexpected results. */ static inline bool %(s)s_is_new(const struct %(s)s *row) { - return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0; + return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0; } -/* Returns true if 'row' was deleted since the last change tracking reset. */ +/* Returns true if 'row' was deleted since the last change tracking reset. + * + * Note: This can only be used to test rows of tracked changes. This cannot be + * used to test if an uncommitted row that has been added locally has been + * deleted or it may given unexpected results. */ static inline bool %(s)s_is_deleted(const struct %(s)s *row) { return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_DELETE) > 0; @@ -333,6 +341,14 @@ struct %(s)s *%(s)s_cursor_data(struct ovsdb_idl_cursor *); void %(s)s_init(struct %(s)s *); void %(s)s_delete(const struct %(s)s *); struct %(s)s *%(s)s_insert(struct ovsdb_idl_txn *); + +/* Returns true if the tracked column referenced by 'enum %(s)s_column_id' of + * the row referenced by 'struct %(s)s *' was updated since the last change + * tracking reset. + * + * Note: This can only be used to test rows of tracked changes. This cannot be + * used to test if an uncommitted row that has been added locally has been + * updated or it may given unexpected results. */ bool %(s)s_is_updated(const struct %(s)s *, enum %(s)s_column_id); ''' % {'s': structName, 'S': structName.upper()}) diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index a6d16176c..cacc82d82 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -1162,6 +1162,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated], "where": [], "row": {"b": true}}]']], [[000: i=1 r=2 b=true s=mystring u=<0> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<1> <2>] uuid=<3> +000: inserted row: uuid=<3> 000: updated columns: b ba i ia r ra s sa u ua 001: {"error":null,"result":[{"count":2}]} 002: i=0 r=0 b=true s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<5> @@ -1224,6 +1225,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], [[000: empty 001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]} 002: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> +002: inserted row: uuid=<0> 002: updated columns: b ba i ia r ra s sa u ua 003: {"error":null,"result":[{"count":2}]} 004: i=0 r=0 b=true s= u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> @@ -1235,6 +1237,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], 006: updated columns: r 007: {"error":null,"result":[{"uuid":["uuid","<6>"]}]} 008: i=-1 r=125 b=false s= u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> +008: inserted row: uuid=<6> 008: updated columns: ba i ia r ra 009: {"error":null,"result":[{"count":2}]} 010: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> @@ -1242,7 +1245,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], 010: updated columns: s 010: updated columns: s 011: {"error":null,"result":[{"count":1}]} -012: ##deleted## uuid=<1> +012: deleted row: uuid=<1> 013: reconnect 014: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> 014: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index b1a4be36b..6dd476f75 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -2030,7 +2030,7 @@ print_idl(struct ovsdb_idl *idl, int step) } static void -print_idl_track(struct ovsdb_idl *idl, int step, unsigned int seqno) +print_idl_track(struct ovsdb_idl *idl, int step) { const struct idltest_simple *s; const struct idltest_link1 *l1; @@ -2038,26 +2038,42 @@ print_idl_track(struct ovsdb_idl *idl, int step, unsigned int seqno) int n = 0; IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) { - if (idltest_simple_row_get_seqno(s, OVSDB_IDL_CHANGE_DELETE) >= seqno) { - printf("%03d: ##deleted## uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid)); + if (idltest_simple_is_deleted(s)) { + printf("%03d: deleted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&s->header_.uuid)); } else { print_idl_row_simple(s, step); + if (idltest_simple_is_new(s)) { + printf("%03d: inserted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&s->header_.uuid)); + } } n++; } IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) { - if (idltest_simple_row_get_seqno(s, OVSDB_IDL_CHANGE_DELETE) >= seqno) { - printf("%03d: ##deleted## uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid)); + if (idltest_link1_is_deleted(l1)) { + printf("%03d: deleted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&l1->header_.uuid)); } else { print_idl_row_link1(l1, step); + if (idltest_link1_is_new(l1)) { + printf("%03d: inserted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&l1->header_.uuid)); + } } n++; } IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) { - if (idltest_simple_row_get_seqno(s, OVSDB_IDL_CHANGE_DELETE) >= seqno) { - printf("%03d: ##deleted## uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid)); + if (idltest_link2_is_deleted(l2)) { + printf("%03d: deleted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&l2->header_.uuid)); } else { print_idl_row_link2(l2, step); + if (idltest_link2_is_new(l2)) { + printf("%03d: inserted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&l2->header_.uuid)); + } + } n++; } @@ -2465,7 +2481,7 @@ do_idl(struct ovs_cmdl_context *ctx) /* Print update. */ if (track) { - print_idl_track(idl, step++, ovsdb_idl_get_seqno(idl)); + print_idl_track(idl, step++); ovsdb_idl_track_clear(idl); } else { print_idl(idl, step++); -- GitLab From 17f22fe46142ef0402bff0e3eb9a4768d93b8008 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Tue, 10 Nov 2020 15:34:28 +0100 Subject: [PATCH 388/432] ovsdb-idl: Return correct seqno from ovsdb_idl_db_set_condition(). If an IDL client sets the same monitor condition twice, the expected seqno when the IDL contents are updated should be the same for both calls. In the following scenario: 1. Client calls ovsdb_idl_db_set_condition(db, table, cond1) 2. ovsdb_idl sends monitor_cond_change(cond1) but the server doesn't yet reply. 3. Client calls ovsdb_idl_db_set_condition(db, table, cond1) At step 3 the returned expected seqno should be the same as at step 1. Similarly, if step 2 is skipped, i.e., the client calls sets the condition twice in the same iteration, then both ovsdb_idl_db_set_condition() calls should return the same value. Fixes: 46437c5232bd ("ovsdb-idl: Enhance conditional monitoring API") Signed-off-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 9 ++++++--- tests/test-ovsdb.c | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index ede4060cc..6334061b4 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -1564,7 +1564,6 @@ ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db, { struct ovsdb_idl_condition *table_cond; struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc); - unsigned int seqno = db->cond_seqno; /* Compare the new condition to the last known condition which can be * either "new" (not sent yet), "requested" or "acked", in this order. @@ -1582,10 +1581,14 @@ ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db, ovsdb_idl_condition_clone(&table->new_cond, condition); db->cond_changed = true; poll_immediate_wake(); - return seqno + 1; + return db->cond_seqno + 1; + } else if (table_cond != table->ack_cond) { + /* 'condition' was already set but has not been "acked" yet. The IDL + * will be up to date when db->cond_seqno gets incremented. */ + return db->cond_seqno + 1; } - return seqno; + return db->cond_seqno; } /* Sets the replication condition for 'tc' in 'idl' to 'condition' and diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index 6dd476f75..aade40f3f 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -2407,6 +2407,10 @@ update_conditions(struct ovsdb_idl *idl, char *commands) if (seqno == next_seqno ) { ovs_fatal(0, "condition unchanged"); } + unsigned int new_next_seqno = ovsdb_idl_set_condition(idl, tc, &cond); + if (next_seqno != new_next_seqno) { + ovs_fatal(0, "condition expected seqno changed"); + } ovsdb_idl_condition_destroy(&cond); json_destroy(json); } -- GitLab From 7bfb1952b9dd4f34fc11706da83e0d7b514420c7 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Tue, 17 Nov 2020 15:26:13 -0800 Subject: [PATCH 389/432] Update scripts to support RHEL 7.9 Add support for RHEL7.9 GA release with kernel 3.10.0-1160 Signed-off-by: Greg Rose Reviewed-by: Yifeng Sun Signed-off-by: Gurucharan Shetty --- rhel/openvswitch-kmod-fedora.spec.in | 6 ++++-- rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/rhel/openvswitch-kmod-fedora.spec.in b/rhel/openvswitch-kmod-fedora.spec.in index 15eec6d4c..ff190064f 100644 --- a/rhel/openvswitch-kmod-fedora.spec.in +++ b/rhel/openvswitch-kmod-fedora.spec.in @@ -19,6 +19,7 @@ # - 3.10.0 major revision 1062 (RHEL 7.7) # - 3.10.0 major revision 1101 (RHEL 7.8 Beta) # - 3.10.0 major revision 1127 (RHEL 7.8 GA) +# - 3.10.0 major revision 1160 (RHEL 7.9 GA) # By default, build against the current running kernel version #%define kernel 3.1.5-1.fc16.x86_64 #define kernel %{kernel_source} @@ -98,8 +99,9 @@ if grep -qs "suse" /etc/os-release; then elif [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ] && { [ "$major_rev" = "327" ] || [ "$major_rev" = "693" ] || \ [ "$major_rev" = "957" ] || [ "$major_rev" == "1062" ] || \ - [ "$major_rev" = "1101" ] || [ "$major_rev" = "1127" ] ; }; then - # For RHEL 7.2, 7.4, 7.6, 7.7, and 7.8 + [ "$major_rev" = "1101" ] || [ "$major_rev" = "1127" ] || \ + [ "$major_rev" = "1160" ] ; }; then + # For RHEL 7.2, 7.4, 7.6, 7.7, 7.8 and 7.9 if [ -x "%{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh" ]; then %{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh fi diff --git a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh index c70e135cd..9bf25a46b 100644 --- a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh +++ b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh @@ -21,6 +21,7 @@ # - 3.10.0 major revision 1062 (RHEL 7.7) # - 3.10.0 major revision 1101 (RHEL 7.8 Beta) # - 3.10.0 major revision 1127 (RHEL 7.8 GA) +# - 3.10.0 major revision 1160 (RHEL 7.9) # - 4.4.x, x >= 73 (SLES 12 SP3) # - 4.12.x, x >= 14 (SLES 12 SP4). # It is packaged in the openvswitch kmod RPM and run in the post-install @@ -118,6 +119,11 @@ if [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ]; then comp_ver=10 ver_offset=4 installed_ver="$minor_rev" + elif [ "$major_rev" = "1160" ]; then +# echo "rhel79" + comp_ver=10 + ver_offset=4 + installed_ver="$minor_rev" fi elif [ "$mainline_major" = "4" ] && [ "$mainline_minor" = "4" ]; then if [ "$mainline_patch" -ge "73" ]; then -- GitLab From d409f50062a7a72233e00cfe0466228034f8fb31 Mon Sep 17 00:00:00 2001 From: Mark Gray Date: Thu, 19 Nov 2020 03:44:34 -0500 Subject: [PATCH 390/432] python: Update build system to ensure dirs.py is created. Update build system to ensure dirs.py is created when it is a dependency for a build target. Also, update setup.py to check for that dependency. Fixes: 943c4a325045 ("python: set ovs.dirs variables with build system values") Signed-off-by: Mark Gray Signed-off-by: Ilya Maximets --- lib/automake.mk | 2 +- ovsdb/automake.mk | 2 +- python/automake.mk | 24 ++++++++++++------------ python/ovs/.gitignore | 2 +- python/setup.py | 9 +++++++++ 5 files changed, 24 insertions(+), 15 deletions(-) diff --git a/lib/automake.mk b/lib/automake.mk index 8eeb6c3f6..380a67228 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -575,7 +575,7 @@ MAN_FRAGMENTS += \ OVSIDL_BUILT += lib/vswitch-idl.c lib/vswitch-idl.h lib/vswitch-idl.ovsidl EXTRA_DIST += lib/vswitch-idl.ann -lib/vswitch-idl.ovsidl: vswitchd/vswitch.ovsschema lib/vswitch-idl.ann python/ovs/dirs.py +lib/vswitch-idl.ovsidl: vswitchd/vswitch.ovsschema lib/vswitch-idl.ann $(AM_V_GEN)$(OVSDB_IDLC) annotate $(srcdir)/vswitchd/vswitch.ovsschema $(srcdir)/lib/vswitch-idl.ann > $@.tmp && mv $@.tmp $@ lib/dirs.c: lib/dirs.c.in Makefile diff --git a/ovsdb/automake.mk b/ovsdb/automake.mk index b895f4292..d60f3f4ec 100644 --- a/ovsdb/automake.mk +++ b/ovsdb/automake.mk @@ -106,7 +106,7 @@ CLEANFILES += $(OVSIDL_BUILT) # However, current versions of Automake seem to output all variable # assignments before any targets, so it doesn't seem to be a problem, # at least for now. -$(OVSIDL_BUILT): ovsdb/ovsdb-idlc.in +$(OVSIDL_BUILT): ovsdb/ovsdb-idlc.in python/ovs/dirs.py # ovsdb-doc EXTRA_DIST += ovsdb/ovsdb-doc diff --git a/python/automake.mk b/python/automake.mk index c4382ec60..767512f17 100644 --- a/python/automake.mk +++ b/python/automake.mk @@ -74,12 +74,12 @@ ovs-install-data-local: $(MKDIR_P) python/ovs sed \ -e '/^##/d' \ - -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ - -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ - -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ - -e 's,[@]bindir[@],$(bindir),g' \ - -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ - -e 's,[@]DBDIR[@],$(DBDIR),g' \ + -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ + -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ + -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ + -e 's,[@]bindir[@],$(bindir),g' \ + -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ + -e 's,[@]DBDIR[@],$(DBDIR),g' \ < $(srcdir)/python/ovs/dirs.py.template \ > python/ovs/dirs.py.tmp $(MKDIR_P) $(DESTDIR)$(pkgdatadir)/python/ovs @@ -107,12 +107,12 @@ ALL_LOCAL += $(srcdir)/python/ovs/dirs.py $(srcdir)/python/ovs/dirs.py: python/ovs/dirs.py.template $(AM_V_GEN)sed \ -e '/^##/d' \ - -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ - -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ - -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ - -e 's,[@]bindir[@],$(bindir),g' \ - -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ - -e 's,[@]DBDIR[@],$(sysconfdir)/openvswitch,g' \ + -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \ + -e 's,[@]RUNDIR[@],$(RUNDIR),g' \ + -e 's,[@]LOGDIR[@],$(LOGDIR),g' \ + -e 's,[@]bindir[@],$(bindir),g' \ + -e 's,[@]sysconfdir[@],$(sysconfdir),g' \ + -e 's,[@]DBDIR[@],$(sysconfdir)/openvswitch,g' \ < $? > $@.tmp && \ mv $@.tmp $@ EXTRA_DIST += python/ovs/dirs.py.template diff --git a/python/ovs/.gitignore b/python/ovs/.gitignore index 51030beca..8bbcd824f 100644 --- a/python/ovs/.gitignore +++ b/python/ovs/.gitignore @@ -1,2 +1,2 @@ version.py -dir.py +dirs.py diff --git a/python/setup.py b/python/setup.py index b7252800c..d385d8372 100644 --- a/python/setup.py +++ b/python/setup.py @@ -30,6 +30,15 @@ except IOError: file=sys.stderr) sys.exit(-1) +try: + # Try to open generated ovs/dirs.py. However, in this case we + # don't need to exec() + open("ovs/dirs.py") +except IOError: + print("Ensure dirs.py is created by running make python/ovs/dirs.py", + file=sys.stderr) + sys.exit(-1) + ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) if sys.platform == 'win32': ext_errors += (IOError, ValueError) -- GitLab From 629283a8eb1eea0b9d82f8549fe48f2b8209934f Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 25 Nov 2020 13:35:48 +0100 Subject: [PATCH 391/432] ci: Don't use 'native' machine for DPDK cache. It's possible that actual HW where CI is running is slightly different between jobs. That makes all unit tests to fail with cached DPDK builds due to 'Illegal instruction' crashes. Changing machine type to 'default' to generate binaries as generic as possible and avoid this kind of issues. Changing the name of a cache version file, so we will not use old 'native' builds that are currently in cache. Fixes: 7654a3ed0b38 ("travis: Cache DPDK build.") Acked-by: Kevin Traynor Acked-by: Simon Horman Signed-off-by: Ilya Maximets --- .travis/linux-build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.travis/linux-build.sh b/.travis/linux-build.sh index 60d8931f3..16102ac94 100755 --- a/.travis/linux-build.sh +++ b/.travis/linux-build.sh @@ -135,6 +135,10 @@ function install_dpdk() sed -i '/CONFIG_RTE_EAL_IGB_UIO=y/s/=y/=n/' build/.config sed -i '/CONFIG_RTE_KNI_KMOD=y/s/=y/=n/' build/.config + # Switching to 'default' machine to make dpdk-dir cache usable on different + # CPUs. We can't be sure that all CI machines are exactly same. + sed -i '/CONFIG_RTE_MACHINE="native"/s/="native"/="default"/' build/.config + make -j4 CC=gcc EXTRA_CFLAGS='-fPIC' EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build" echo "Installed DPDK source in $(pwd)" -- GitLab From 6cb2f5a630e32a2c521de95eaf97e0faea6c764c Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 23 Nov 2020 23:34:28 +0100 Subject: [PATCH 392/432] github: Add GitHub Actions workflow. This is an initial version of GitHub Actions support. It mostly mimics our current Travis CI build matrix with slight differences. The main issue is that we don't have ARM support here. Minor difference that we can not install 32-bit versions of libunwind and libunbound since those are not avaialble in repository. Higher concurrency level allows to finish all tests less than in 20 minutes. Which is 3 times faster than in Travis. .travis folder renamed to .ci to highlight that it used not only for Travis CI. Travis CI support will be reduced to only test ARM builds soon and will be completely removed when travis-ci.org will be turned into read-only mode. What happened to Travis CI: https://mail.openvswitch.org/pipermail/ovs-dev/2020-November/377773.html Acked-by: Simon Horman Signed-off-by: Ilya Maximets --- {.travis => .ci}/linux-build.sh | 0 {.travis => .ci}/linux-prepare.sh | 13 +- {.travis => .ci}/osx-build.sh | 0 {.travis => .ci}/osx-prepare.sh | 0 .github/workflows/build-and-test.yml | 203 ++++++++++++++++++ .travis.yml | 4 +- .../contributing/submitting-patches.rst | 9 +- Makefile.am | 9 +- NEWS | 2 + README.rst | 2 + 10 files changed, 227 insertions(+), 15 deletions(-) rename {.travis => .ci}/linux-build.sh (100%) rename {.travis => .ci}/linux-prepare.sh (72%) rename {.travis => .ci}/osx-build.sh (100%) rename {.travis => .ci}/osx-prepare.sh (100%) create mode 100644 .github/workflows/build-and-test.yml diff --git a/.travis/linux-build.sh b/.ci/linux-build.sh similarity index 100% rename from .travis/linux-build.sh rename to .ci/linux-build.sh diff --git a/.travis/linux-prepare.sh b/.ci/linux-prepare.sh similarity index 72% rename from .travis/linux-prepare.sh rename to .ci/linux-prepare.sh index 71eb347e8..fea905a83 100755 --- a/.travis/linux-prepare.sh +++ b/.ci/linux-prepare.sh @@ -25,10 +25,15 @@ pip3 install --user --upgrade docutils if [ "$M32" ]; then # Installing 32-bit libraries. - # 32-bit and 64-bit libunwind can not be installed at the same time. - # This will remove the 64-bit libunwind and install 32-bit version. - sudo apt-get install -y \ - libunwind-dev:i386 libunbound-dev:i386 gcc-multilib + pkgs="gcc-multilib" + if [ -z "$GITHUB_WORKFLOW" ]; then + # 32-bit and 64-bit libunwind can not be installed at the same time. + # This will remove the 64-bit libunwind and install 32-bit version. + # GitHub Actions doesn't have 32-bit versions of these libs. + pkgs=$pkgs" libunwind-dev:i386 libunbound-dev:i386" + fi + + sudo apt-get install -y $pkgs fi # IPv6 is supported by kernel but disabled in TravisCI images: diff --git a/.travis/osx-build.sh b/.ci/osx-build.sh similarity index 100% rename from .travis/osx-build.sh rename to .ci/osx-build.sh diff --git a/.travis/osx-prepare.sh b/.ci/osx-prepare.sh similarity index 100% rename from .travis/osx-prepare.sh rename to .ci/osx-prepare.sh diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml new file mode 100644 index 000000000..847fd3150 --- /dev/null +++ b/.github/workflows/build-and-test.yml @@ -0,0 +1,203 @@ +name: Build and Test + +on: [push, pull_request] + +jobs: + build-linux: + env: + dependencies: | + automake libtool gcc bc libjemalloc1 libjemalloc-dev \ + libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev \ + python3-openssl python3-pip python3-sphinx \ + selinux-policy-dev + deb_dependencies: | + linux-headers-$(uname -r) build-essential fakeroot devscripts equivs + AFXDP: ${{ matrix.afxdp }} + CC: ${{ matrix.compiler }} + DEB_PACKAGE: ${{ matrix.deb_package }} + DPDK: ${{ matrix.dpdk }} + DPDK_SHARED: ${{ matrix.dpdk_shared }} + KERNEL: ${{ matrix.kernel }} + KERNEL_LIST: ${{ matrix.kernel_list }} + LIBS: ${{ matrix.libs }} + M32: ${{ matrix.m32 }} + OPTS: ${{ matrix.opts }} + TESTSUITE: ${{ matrix.testsuite }} + + name: linux ${{ join(matrix.*, ' ') }} + runs-on: ubuntu-18.04 + timeout-minutes: 30 + + strategy: + fail-fast: false + matrix: + include: + - compiler: gcc + opts: --disable-ssl + - compiler: clang + opts: --disable-ssl + + - compiler: gcc + testsuite: test + kernel: 3.16 + - compiler: clang + testsuite: test + kernel: 3.16 + + - compiler: gcc + testsuite: test + opts: --enable-shared + - compiler: clang + testsuite: test + opts: --enable-shared + + - compiler: gcc + testsuite: test + dpdk: dpdk + - compiler: clang + testsuite: test + dpdk: dpdk + + - compiler: gcc + testsuite: test + libs: -ljemalloc + - compiler: clang + testsuite: test + libs: -ljemalloc + + - compiler: gcc + kernel_list: 5.8 5.5 5.4 4.19 + - compiler: clang + kernel_list: 5.8 5.5 5.4 4.19 + + - compiler: gcc + kernel_list: 4.14 4.9 4.4 3.16 + - compiler: clang + kernel_list: 4.14 4.9 4.4 3.16 + + - compiler: gcc + afxdp: afxdp + kernel: 5.3 + - compiler: clang + afxdp: afxdp + kernel: 5.3 + + - compiler: gcc + dpdk: dpdk + opts: --enable-shared + - compiler: clang + dpdk: dpdk + opts: --enable-shared + + - compiler: gcc + dpdk_shared: dpdk-shared + - compiler: clang + dpdk_shared: dpdk-shared + + - compiler: gcc + dpdk_shared: dpdk-shared + opts: --enable-shared + - compiler: clang + dpdk_shared: dpdk-shared + opts: --enable-shared + + - compiler: gcc + m32: m32 + opts: --disable-ssl + + - compiler: gcc + deb_package: deb + + steps: + - name: checkout + uses: actions/checkout@v2 + + - name: create ci signature file for the dpdk cache key + if: matrix.dpdk != '' || matrix.dpdk_shared != '' + # This will collect most of DPDK related lines, so hash will be different + # if something changed in a way we're building DPDK including DPDK_VER. + # This also allows us to use cache from any branch as long as version + # and a way we're building DPDK stays the same. + run: | + grep -irE 'RTE_|DPDK|meson|ninja' -r .ci/ > dpdk-ci-signature + cat dpdk-ci-signature + + - name: cache + if: matrix.dpdk != '' || matrix.dpdk_shared != '' + uses: actions/cache@v2 + env: + matrix_key: ${{ matrix.dpdk }}${{ matrix.dpdk_shared }} + ci_key: ${{ hashFiles('dpdk-ci-signature') }} + with: + path: dpdk-dir + key: ${{ env.matrix_key }}-${{ env.ci_key }} + + - name: install common dependencies + if: matrix.deb_package == '' + run: sudo apt install -y ${{ env.dependencies }} + - name: install dependencies for debian packages + if: matrix.deb_package != '' + run: sudo apt install -y ${{ env.deb_dependencies }} + - name: install libunbound libunwind + if: matrix.m32 == '' + run: sudo apt install -y libunbound-dev libunwind-dev + + - name: prepare + run: ./.ci/linux-prepare.sh + + - name: build + run: PATH="$PATH:$HOME/bin" ./.ci/linux-build.sh + + - name: upload deb packages + if: matrix.deb_package != '' + uses: actions/upload-artifact@v2 + with: + name: deb-packages + path: '/home/runner/work/ovs/*.deb' + + - name: copy logs on failure + if: failure() || cancelled() + run: | + # upload-artifact@v2 throws exceptions if it tries to upload socket + # files and we could have some socket files in testsuite.dir. + # Also, upload-artifact@v2 doesn't work well enough with wildcards. + # So, we're just archiving everything here to avoid any issues. + mkdir logs + cp config.log ./logs/ + cp -r ./*/_build/sub/tests/testsuite.* ./logs/ || true + tar -czvf logs.tgz logs/ + + - name: upload logs on failure + if: failure() || cancelled() + uses: actions/upload-artifact@v2 + with: + name: logs-linux-${{ join(matrix.*, '-') }} + path: logs.tgz + + build-osx: + env: + CC: clang + OPTS: --disable-ssl + + name: osx clang --disable-ssl + runs-on: macos-latest + timeout-minutes: 30 + + strategy: + fail-fast: false + + steps: + - name: checkout + uses: actions/checkout@v2 + - name: install dependencies + run: brew install automake libtool + - name: prepare + run: ./.ci/osx-prepare.sh + - name: build + run: PATH="$PATH:$HOME/bin" ./.ci/osx-build.sh + - name: upload logs on failure + if: failure() + uses: actions/upload-artifact@v2 + with: + name: logs-osx-clang---disable-ssl + path: config.log diff --git a/.travis.yml b/.travis.yml index 9fd8bbe01..34ef16aa7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,7 @@ addons: - libunbound-dev - libunwind-dev -before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh +before_install: ./.ci/${TRAVIS_OS_NAME}-prepare.sh before_script: export PATH=$PATH:$HOME/bin @@ -76,7 +76,7 @@ matrix: - devscripts - equivs -script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS +script: ./.ci/${TRAVIS_OS_NAME}-build.sh $OPTS notifications: email: diff --git a/Documentation/internals/contributing/submitting-patches.rst b/Documentation/internals/contributing/submitting-patches.rst index 5a314cc60..4a6780371 100644 --- a/Documentation/internals/contributing/submitting-patches.rst +++ b/Documentation/internals/contributing/submitting-patches.rst @@ -68,11 +68,10 @@ Testing is also important: feature. A bug fix patch should preferably add a test that would fail if the bug recurs. -If you are using GitHub, then you may utilize the travis-ci.org CI build system -by linking your GitHub repository to it. This will run some of the above tests -automatically when you push changes to your repository. See the "Continuous -Integration with Travis-CI" in :doc:`/topics/testing` for details on how to set -it up. +If you are using GitHub, then you may utilize the travis-ci.org and the GitHub +Actions CI build systems. They will run some of the above tests automatically +when you push changes to your repository. See the "Continuous Integration with +Travis-CI" in :doc:`/topics/testing` for details on how to set it up. Email Subject ------------- diff --git a/Makefile.am b/Makefile.am index a3fbb15e2..691a005ad 100644 --- a/Makefile.am +++ b/Makefile.am @@ -76,12 +76,13 @@ EXTRA_DIST = \ MAINTAINERS.rst \ README.rst \ NOTICE \ + .ci/linux-build.sh \ + .ci/linux-prepare.sh \ + .ci/osx-build.sh \ + .ci/osx-prepare.sh \ .cirrus.yml \ + .github/workflows/build-and-test.yml \ .travis.yml \ - .travis/linux-build.sh \ - .travis/linux-prepare.sh \ - .travis/osx-build.sh \ - .travis/osx-prepare.sh \ appveyor.yml \ boot.sh \ poc/builders/Vagrantfile \ diff --git a/NEWS b/NEWS index 185555848..7e291a180 100644 --- a/NEWS +++ b/NEWS @@ -25,6 +25,8 @@ Post-v2.14.0 "secondary", respectively, for OpenFlow connection roles. * The term "slave" has been replaced by "member", for bonds, LACP, and OpenFlow bundle actions. + - Support for GitHub Actions based continuous integration builds has been + added. v2.14.0 - 17 Aug 2020 diff --git a/README.rst b/README.rst index e06ddf267..319f70515 100644 --- a/README.rst +++ b/README.rst @@ -6,6 +6,8 @@ Open vSwitch ============ +.. image:: https://github.com/openvswitch/ovs/workflows/Build%20and%20Test/badge.svg + :target: https://github.com/openvswitch/ovs/actions .. image:: https://travis-ci.org/openvswitch/ovs.png :target: https://travis-ci.org/openvswitch/ovs .. image:: https://ci.appveyor.com/api/projects/status/github/openvswitch/ovs?branch=master&svg=true&retina=true -- GitLab From a8ee6bf72852d2469dcddf26918f7fce53f32960 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 25 Nov 2020 12:20:04 +0100 Subject: [PATCH 393/432] travis: Keep only arm64 builds. All other builds are covered by GitHub Actions now. This should decrease time our jobs waiting in a queue due to reduced capacity of travis-ci.org. Acked-by: Simon Horman Signed-off-by: Ilya Maximets --- .travis.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/.travis.yml b/.travis.yml index 34ef16aa7..acf3c10fb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,4 @@ language: c -compiler: - - gcc - - clang os: - linux @@ -32,25 +29,8 @@ before_install: ./.ci/${TRAVIS_OS_NAME}-prepare.sh before_script: export PATH=$PATH:$HOME/bin -env: - - OPTS="--disable-ssl" - - TESTSUITE=1 KERNEL=3.16 - - TESTSUITE=1 OPTS="--enable-shared" - - TESTSUITE=1 DPDK=1 - - TESTSUITE=1 LIBS=-ljemalloc - - KERNEL_LIST="5.8 5.5 5.4 4.19" - - KERNEL_LIST="4.14 4.9 4.4 3.16" - - AFXDP=1 KERNEL=5.3 - - M32=1 OPTS="--disable-ssl" - - DPDK=1 OPTS="--enable-shared" - - DPDK_SHARED=1 - - DPDK_SHARED=1 OPTS="--enable-shared" - matrix: include: - - os: osx - compiler: clang - env: OPTS="--disable-ssl" - arch: arm64 compiler: gcc env: TESTSUITE=1 DPDK=1 @@ -66,15 +46,6 @@ matrix: - arch: arm64 compiler: clang env: OPTS="--disable-ssl" - - env: DEB_PACKAGE=1 - addons: - apt: - packages: - - linux-headers-$(uname -r) - - build-essential - - fakeroot - - devscripts - - equivs script: ./.ci/${TRAVIS_OS_NAME}-build.sh $OPTS -- GitLab From dc497e36fc06a72e9f66674ab947d5b3d98b7f5f Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 18 Nov 2020 22:18:58 +0100 Subject: [PATCH 394/432] checkpatch: Add check for a whitespace after cast. Coding style says: "Put a space between the ``()`` used in a cast and the expression whose type is cast: ``(void *) 0``.". This style rule is frequently overlooked. Let's check for it. Signed-off-by: Ilya Maximets Acked-by: Ian Stokes --- tests/checkpatch.at | 17 +++++++++++++++++ utilities/checkpatch.py | 13 +++++++++++++ 2 files changed, 30 insertions(+) diff --git a/tests/checkpatch.at b/tests/checkpatch.at index 6c7394772..a51e46e7a 100755 --- a/tests/checkpatch.at +++ b/tests/checkpatch.at @@ -326,3 +326,20 @@ try_checkpatch \ " AT_CLEANUP + +AT_SETUP([checkpatch - whitespace around cast]) +try_checkpatch \ + "COMMON_PATCH_HEADER + + (int) a; + " + +try_checkpatch \ + "COMMON_PATCH_HEADER + + (int)a; + " \ + "ERROR: Inappropriate spacing around cast + #8 FILE: A.c:1: + (int)a; +" + +AT_CLEANUP diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py index ed231fa6f..bc6bfae15 100755 --- a/utilities/checkpatch.py +++ b/utilities/checkpatch.py @@ -167,6 +167,7 @@ __regex_is_for_if_single_line_bracket = \ __regex_ends_with_bracket = \ re.compile(r'[^\s]\) {(\s+/\*[\s\Sa-zA-Z0-9\.,\?\*/+-]*)?$') __regex_ptr_declaration_missing_whitespace = re.compile(r'[a-zA-Z0-9]\*[^*]') +__regex_cast_missing_whitespace = re.compile(r'\)[a-zA-Z0-9]') __regex_is_comment_line = re.compile(r'^\s*(/\*|\*\s)') __regex_has_comment = re.compile(r'.*(/\*|\*\s)') __regex_has_c99_comment = re.compile(r'.*//.*$') @@ -286,6 +287,12 @@ def pointer_whitespace_check(line): return __regex_ptr_declaration_missing_whitespace.search(line) is not None +def cast_whitespace_check(line): + """Return TRUE if there is no space between the '()' used in a cast and + the expression whose type is cast, i.e.: '(void *)foo'""" + return __regex_cast_missing_whitespace.search(line) is not None + + def line_length_check(line): """Return TRUE if the line length is too long""" if len(line) > 79: @@ -551,6 +558,12 @@ checks = [ 'print': lambda: print_error("Inappropriate spacing in pointer declaration")}, + {'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None, + 'prereq': lambda x: not is_comment_line(x), + 'check': lambda x: cast_whitespace_check(x), + 'print': + lambda: print_error("Inappropriate spacing around cast")}, + {'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None, 'prereq': lambda x: not is_comment_line(x), 'check': lambda x: trailing_operator(x), -- GitLab From cd9e88eeb588e382e5c25ed81ed02d222617cc49 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 18 Nov 2020 22:05:59 +0100 Subject: [PATCH 395/432] perf-counter: Split numbers in the output. While trying to benchmark big functions, values could be longer than 12 digits. In this case all of them printed without spaces. It's hard ot read. Fixes: 619c3a42dc1e ("lib: add a hardware performance counter access library") Signed-off-by: Ilya Maximets Acked-by: Greg Rose --- lib/perf-counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/perf-counter.c b/lib/perf-counter.c index 402fabe17..e4eca58d0 100644 --- a/lib/perf-counter.c +++ b/lib/perf-counter.c @@ -111,7 +111,7 @@ perf_counter_to_ds(struct ds *ds, struct perf_counter *pfc) ratio = 0.0; } - ds_put_format(ds, "%-40s%12"PRIu64"%12"PRIu64"%12.1f\n", + ds_put_format(ds, "%-40s %12"PRIu64" %12"PRIu64" %12.1f\n", pfc->name, pfc->n_events, pfc->total_count, ratio); } -- GitLab From 37423e4dc8bd0496f562421d922fbb7eb8290b11 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 16 Nov 2020 20:08:22 +0100 Subject: [PATCH 396/432] tests: Add overflow test for the sha1 library. This is a unit test for the overflow detection issue fixed by commit a1d2c5f5d9ed ("sha1: Fix algorithm for data bigger than 512 megabytes.") Signed-off-by: Ilya Maximets Acked-by: Paolo Valerio Tested-by: Paolo Valerio --- tests/library.at | 3 ++- tests/test-sha1.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/tests/library.at b/tests/library.at index ac4ea4abf..1702b7556 100644 --- a/tests/library.at +++ b/tests/library.at @@ -53,7 +53,8 @@ AT_CHECK([ovstest test-packets]) AT_CLEANUP AT_SETUP([SHA-1]) -AT_CHECK([ovstest test-sha1], [0], [......... +AT_KEYWORDS([sha1]) +AT_CHECK([ovstest test-sha1], [0], [.......... ]) AT_CLEANUP diff --git a/tests/test-sha1.c b/tests/test-sha1.c index b7279db6a..cc80888a7 100644 --- a/tests/test-sha1.c +++ b/tests/test-sha1.c @@ -137,6 +137,42 @@ test_big_vector(void) free(vec.data); } +static void +test_huge_vector(void) +{ + enum { SIZE = 1000000000 }; + struct test_vector vec = { + NULL, SIZE, + /* Computed by the sha1sum utility for a file with 10^9 symbols 'a'. */ + { 0xD0, 0xF3, 0xE4, 0xF2, 0xF3, 0x1C, 0x66, 0x5A, 0xBB, 0xD8, + 0xF5, 0x18, 0xE8, 0x48, 0xD5, 0xCB, 0x80, 0xCA, 0x78, 0xF7 } + }; + int chunk = random_range(SIZE / 10000); + uint8_t md[SHA1_DIGEST_SIZE]; + struct sha1_ctx sha1; + size_t i, sz; + + /* It's not user-friendly to allocate 1GB of memory for a unit test, + * so we're allocating only a small chunk and re-using it. */ + vec.data = xmalloc(chunk); + for (i = 0; i < chunk; i++) { + vec.data[i] = 'a'; + } + + sha1_init(&sha1); + for (sz = 0; sz < SIZE; sz += chunk) { + int n = sz + chunk < SIZE ? chunk : SIZE - sz; + + sha1_update(&sha1, vec.data, n); + } + sha1_final(&sha1, md); + ovs_assert(!memcmp(md, vec.output, SHA1_DIGEST_SIZE)); + + free(vec.data); + putchar('.'); + fflush(stdout); +} + static void test_shar1_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) { @@ -147,6 +183,7 @@ test_shar1_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) } test_big_vector(); + test_huge_vector(); putchar('\n'); } -- GitLab From f0d23f67954cce22e9465b566f13ff8c4aec168b Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 23 Nov 2020 09:37:47 +0100 Subject: [PATCH 397/432] ovsdb-idl: Fix iteration over tracked rows with no actual data. When idl removes orphan rows, those rows are inserted into the 'track_list'. This allows iterators such as *_FOR_EACH_TRACKED () to return orphan rows that never had any data to the IDL user. In this case, it is difficult for the user to understand whether it is a row with no data (there was no "insert" / "modify" for this row) or it is a row with zero data (columns were cleared by DB transaction). The main problem with this condition is that rows without data will have NULL pointers instead of references that should be there according to the database schema. For example, ovn-controller might crash: ERROR: AddressSanitizer: SEGV on unknown address 0x000000000100 (pc 0x00000055e9b2 bp 0x7ffef6180880 sp 0x7ffef6180860 T0) The signal is caused by a READ memory access. Hint: address points to the zero page. #0 0x55e9b1 in handle_deleted_lport /controller/binding.c #1 0x55e903 in handle_deleted_vif_lport /controller/binding.c:2072:5 #2 0x55e059 in binding_handle_port_binding_changes /controller/binding.c:2155:23 #3 0x5a6395 in runtime_data_sb_port_binding_handler /controller/ovn-controller.c:1454:10 #4 0x5e15b3 in engine_compute /lib/inc-proc-eng.c:306:18 #5 0x5e0faf in engine_run_node /lib/inc-proc-eng.c:352:14 #6 0x5e0e04 in engine_run /lib/inc-proc-eng.c:377:9 #7 0x5a03de in main /controller/ovn-controller.c #8 0x7f4fd9c991a2 in __libc_start_main (/lib64/libc.so.6+0x271a2) #9 0x483f0d in _start (/controller/ovn-controller+0x483f0d) It doesn't make much sense to return non-real rows to the user, so it's best to exclude them from iteration. Test included. Without the fix, provided test will print empty orphan rows that was never received by idl as tracked changes. Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.") Signed-off-by: Ilya Maximets Acked-by: Dumitru Ceara --- lib/ovsdb-idl.c | 22 ++++++++++----- tests/idltest.ovsschema | 15 ++++++++++ tests/ovsdb-idl.at | 55 +++++++++++++++++++++++++++++++++++++ tests/test-ovsdb.c | 61 +++++++++++++++++++++++++++++++++++++---- 4 files changed, 140 insertions(+), 13 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 6334061b4..23648ff6b 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -1892,29 +1892,37 @@ ovsdb_idl_track_is_set(struct ovsdb_idl_table *table) } /* Returns the first tracked row in table with class 'table_class' - * for the specified 'idl'. Returns NULL if there are no tracked rows */ + * for the specified 'idl'. Returns NULL if there are no tracked rows. + * Pure orphan rows, i.e. rows that never had any datum, are skipped. */ const struct ovsdb_idl_row * ovsdb_idl_track_get_first(const struct ovsdb_idl *idl, const struct ovsdb_idl_table_class *table_class) { struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(&idl->data, table_class); + struct ovsdb_idl_row *row; - if (!ovs_list_is_empty(&table->track_list)) { - return CONTAINER_OF(ovs_list_front(&table->track_list), struct ovsdb_idl_row, track_node); + LIST_FOR_EACH (row, track_node, &table->track_list) { + if (!ovsdb_idl_row_is_orphan(row) || row->tracked_old_datum) { + return row; + } } return NULL; } /* Returns the next tracked row in table after the specified 'row' - * (in no particular order). Returns NULL if there are no tracked rows */ + * (in no particular order). Returns NULL if there are no tracked rows. + * Pure orphan rows, i.e. rows that never had any datum, are skipped.*/ const struct ovsdb_idl_row * ovsdb_idl_track_get_next(const struct ovsdb_idl_row *row) { - if (row->track_node.next != &row->table->track_list) { - return CONTAINER_OF(row->track_node.next, struct ovsdb_idl_row, track_node); - } + struct ovsdb_idl_table *table = row->table; + LIST_FOR_EACH_CONTINUE (row, track_node, &table->track_list) { + if (!ovsdb_idl_row_is_orphan(row) || row->tracked_old_datum) { + return row; + } + } return NULL; } diff --git a/tests/idltest.ovsschema b/tests/idltest.ovsschema index e04755ea0..3ddb612b0 100644 --- a/tests/idltest.ovsschema +++ b/tests/idltest.ovsschema @@ -195,6 +195,21 @@ }, "isRoot": true }, + "simple6": { + "columns" : { + "name": {"type": "string"}, + "weak_ref": { + "type": { + "key": {"type": "uuid", + "refTable": "simple", + "refType": "weak"}, + "min": 0, + "max": "unlimited" + } + } + }, + "isRoot": true + }, "singleton" : { "columns" : { "name" : { diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index cacc82d82..406a57627 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -967,6 +967,7 @@ AT_CHECK([grep ovsdb_idl stderr | sort], [0], [dnl test-ovsdb|ovsdb_idl|idltest database lacks indexed table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks link2 table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks simple5 table (database needs upgrade?) +test-ovsdb|ovsdb_idl|idltest database lacks simple6 table (database needs upgrade?) test-ovsdb|ovsdb_idl|idltest database lacks singleton table (database needs upgrade?) test-ovsdb|ovsdb_idl|link1 table in idltest database lacks l2 column (database needs upgrade?) ]) @@ -1171,6 +1172,59 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated], 003: done ]]) +dnl This test creates database with weak references and checks that orphan +dnl rows created for weak references are not available for iteration via +dnl list of tracked changes. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan weak references], + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}, + "uuid-name": "weak_row0"}, + {"op": "insert", + "table": "simple", + "row": {"s": "row1_s"}, + "uuid-name": "weak_row1"}, + {"op": "insert", + "table": "simple", + "row": {"s": "row2_s"}, + "uuid-name": "weak_row2"}, + {"op": "insert", + "table": "simple6", + "row": {"name": "first_row", + "weak_ref": ["set", + [["named-uuid", "weak_row0"], + ["named-uuid", "weak_row1"], + ["named-uuid", "weak_row2"]] + ]}}]']], + [['condition simple []' \ + 'condition simple [["s","==","row1_s"]]' \ + '["idltest", + {"op": "update", + "table": "simple6", + "where": [], + "row": {"name": "new_name"}}]' \ + '["idltest", + {"op": "delete", + "table": "simple6", + "where": []}]']], + [[000: change conditions +001: inserted row: uuid=<0> +001: name=first_row weak_ref=[] uuid=<0> +001: updated columns: name weak_ref +002: change conditions +003: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +003: inserted row: uuid=<2> +003: name=first_row weak_ref=[<2>] uuid=<0> +003: updated columns: s +004: {"error":null,"result":[{"count":1}]} +005: name=new_name weak_ref=[<2>] uuid=<0> +005: updated columns: name +006: {"error":null,"result":[{"count":1}]} +007: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +008: done +]]) + OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], [], [['["idltest", @@ -1246,6 +1300,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], 010: updated columns: s 011: {"error":null,"result":[{"count":1}]} 012: deleted row: uuid=<1> +012: i=0 r=123.5 b=true s=newstring u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1> 013: reconnect 014: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6> 014: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0> diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index aade40f3f..31513c537 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -1904,6 +1904,26 @@ print_idl_row_updated_link2(const struct idltest_link2 *l2, int step) } } +static void +print_idl_row_updated_simple6(const struct idltest_simple6 *s6, int step) +{ + size_t i; + bool updated = false; + + for (i = 0; i < IDLTEST_SIMPLE6_N_COLUMNS; i++) { + if (idltest_simple6_is_updated(s6, i)) { + if (!updated) { + printf("%03d: updated columns:", step); + updated = true; + } + printf(" %s", idltest_simple6_columns[i].name); + } + } + if (updated) { + printf("\n"); + } +} + static void print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step) { @@ -1991,6 +2011,22 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step) print_idl_row_updated_link2(l2, step); } +static void +print_idl_row_simple6(const struct idltest_simple6 *s6, int step) +{ + int i; + + printf("%03d: name=%s ", step, s6->name); + printf("weak_ref=["); + for (i = 0; i < s6->n_weak_ref; i++) { + printf("%s"UUID_FMT, i ? " " : "", + UUID_ARGS(&s6->weak_ref[i]->header_.uuid)); + } + + printf("] uuid="UUID_FMT"\n", UUID_ARGS(&s6->header_.uuid)); + print_idl_row_updated_simple6(s6, step); +} + static void print_idl_row_singleton(const struct idltest_singleton *sng, int step) { @@ -2032,21 +2068,20 @@ print_idl(struct ovsdb_idl *idl, int step) static void print_idl_track(struct ovsdb_idl *idl, int step) { + const struct idltest_simple6 *s6; const struct idltest_simple *s; const struct idltest_link1 *l1; const struct idltest_link2 *l2; int n = 0; IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) { + print_idl_row_simple(s, step); if (idltest_simple_is_deleted(s)) { printf("%03d: deleted row: uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid)); - } else { - print_idl_row_simple(s, step); - if (idltest_simple_is_new(s)) { - printf("%03d: inserted row: uuid="UUID_FMT"\n", step, - UUID_ARGS(&s->header_.uuid)); - } + } else if (idltest_simple_is_new(s)) { + printf("%03d: inserted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&s->header_.uuid)); } n++; } @@ -2077,6 +2112,18 @@ print_idl_track(struct ovsdb_idl *idl, int step) } n++; } + IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) { + print_idl_row_simple6(s6, step); + if (idltest_simple6_is_deleted(s6)) { + printf("%03d: deleted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&s6->header_.uuid)); + } else if (idltest_simple6_is_new(s6)) { + printf("%03d: inserted row: uuid="UUID_FMT"\n", step, + UUID_ARGS(&s6->header_.uuid)); + } + n++; + } + if (!n) { printf("%03d: empty\n", step); } @@ -2298,6 +2345,8 @@ find_table_class(const char *name) return &idltest_table_link1; } else if (!strcmp(name, "link2")) { return &idltest_table_link2; + } else if (!strcmp(name, "simple6")) { + return &idltest_table_simple6; } return NULL; } -- GitLab From 82367043e3e8984d9f94f54e7fe123f4b7ac9dd8 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Wed, 21 Oct 2020 09:49:39 -0700 Subject: [PATCH 398/432] compat: rcu: Add support for consolidated-RCU reader checking Upstream commit: commit 28875945ba98d1b47a8a706812b6494d165bb0a0 Author: Joel Fernandes (Google) Date: Tue Jul 16 18:12:22 2019 -0400 rcu: Add support for consolidated-RCU reader checking This commit adds RCU-reader checks to list_for_each_entry_rcu() and hlist_for_each_entry_rcu(). These checks are optional, and are indicated by a lockdep expression passed to a new optional argument to these two macros. If this optional lockdep expression is omitted, these two macros act as before, checking for an RCU read-side critical section. Signed-off-by: Joel Fernandes (Google) [ paulmck: Update to eliminate return within macro and update comment. ] Signed-off-by: Paul E. McKenney Backport portion of upstream commit for hlist_for_each_entry_rcu() macro so that it can be used in following bug fix. Cc: Joel Fernandes (Google) Signed-off-by: Greg Rose Signed-off-by: Ilya Maximets --- datapath/linux/compat/include/linux/rculist.h | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/datapath/linux/compat/include/linux/rculist.h b/datapath/linux/compat/include/linux/rculist.h index 8df8ad8a2..40fd5e171 100644 --- a/datapath/linux/compat/include/linux/rculist.h +++ b/datapath/linux/compat/include/linux/rculist.h @@ -9,9 +9,28 @@ #define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) #endif +/* + * Check during list traversal that we are within an RCU reader + */ + +#define check_arg_count_one(dummy) + +#ifdef CONFIG_PROVE_RCU_LIST +#define __list_check_rcu(dummy, cond, extra...) \ + ({ \ + check_arg_count_one(extra); \ + RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \ + "RCU-list traversed in non-reader section!"); \ + }) +#else +#define __list_check_rcu(dummy, cond, extra...) \ + ({ check_arg_count_one(extra); }) +#endif + #undef hlist_for_each_entry_rcu -#define hlist_for_each_entry_rcu(pos, head, member) \ - for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ +#define hlist_for_each_entry_rcu(pos, head, member, cond...) \ + for (__list_check_rcu(dummy, ## cond, 0), \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ -- GitLab From fa57e9e45257f32b80c135c18ae821ac3c43a738 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Wed, 21 Oct 2020 09:49:40 -0700 Subject: [PATCH 399/432] datapath: ovs_ct_exit to be done under ovs_lock Upstream commit: commit 27de77cec985233bdf6546437b9761853265c505 Author: Tonghao Zhang Date: Fri Apr 17 02:57:31 2020 +0800 net: openvswitch: ovs_ct_exit to be done under ovs_lock syzbot wrote: | ============================= | WARNING: suspicious RCU usage | 5.7.0-rc1+ #45 Not tainted | ----------------------------- | net/openvswitch/conntrack.c:1898 RCU-list traversed in non-reader section!! | | other info that might help us debug this: | rcu_scheduler_active = 2, debug_locks = 1 | ... | | stack backtrace: | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c8995f-prebuilt.qemu.org 04/01/2014 | Workqueue: netns cleanup_net | Call Trace: | ... | ovs_ct_exit | ovs_exit_net | ops_exit_list.isra.7 | cleanup_net | process_one_work | worker_thread To avoid that warning, invoke the ovs_ct_exit under ovs_lock and add lockdep_ovsl_is_held as optional lockdep expression. Link: https://lore.kernel.org/lkml/000000000000e642a905a0cbee6e@google.com Fixes: 11efd5cb04a1 ("openvswitch: Support conntrack zone limit") Cc: Pravin B Shelar Cc: Yi-Hung Wei Reported-by: syzbot+7ef50afd3a211f879112@syzkaller.appspotmail.com Signed-off-by: Tonghao Zhang Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Cc: Tonghao Zhang Fixes: cb2a5486a3a3 ("datapath: conntrack: Support conntrack zone limit") Signed-off-by: Greg Rose Acked-by: Tonghao Zhang Signed-off-by: Ilya Maximets --- datapath/conntrack.c | 3 ++- datapath/datapath.c | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/datapath/conntrack.c b/datapath/conntrack.c index c7a318baf..50b4d7bd6 100644 --- a/datapath/conntrack.c +++ b/datapath/conntrack.c @@ -1984,7 +1984,8 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net) struct hlist_head *head = &info->limits[i]; struct ovs_ct_limit *ct_limit; - hlist_for_each_entry_rcu(ct_limit, head, hlist_node) + hlist_for_each_entry_rcu(ct_limit, head, hlist_node, + lockdep_ovsl_is_held()) kfree_rcu(ct_limit, rcu); } kfree(ovs_net->ct_limit_info->limits); diff --git a/datapath/datapath.c b/datapath/datapath.c index 8e9b9a706..b88d16107 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -2572,8 +2572,10 @@ static void __net_exit ovs_exit_net(struct net *dnet) ovs_netns_frags6_exit(dnet); ovs_netns_frags_exit(dnet); - ovs_ct_exit(dnet); ovs_lock(); + + ovs_ct_exit(dnet); + list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node) __dp_destroy(dp); -- GitLab From 2407099c9fb404eda1ec59a7437c3712fbe74dfd Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Mon, 30 Nov 2020 17:41:14 +0100 Subject: [PATCH 400/432] ovsdb-idl: Fix memleak when reinserting tracked orphan rows. Considering the following updates processed by an IDL client: 1. Delete row R1 from table A while R1 is also referenced by row R2 from table B: - because row R2 still refers to row R1, this will create an orphan R1 but also sets row->tracked_old_datum to report to the IDL client that the row has been deleted. 2. Insert row R1 to table A. - because orphan R1 already existed in the IDL, it will be reused. - R1 still has row->tracked_old_datum set (and may also be on the table->track_list). 3. Delete row R2 from table B and row R1 from table A. - row->tracked_old_datum is set again but the previous tracked_old_datum was never freed. IDL clients use the deleted old_datum values so when multiple delete operations are received for a row, always track the first one as that will match the contents of the row the IDL client knew about. Running the newly added test case with valgrind, without the fix, produces the following report: ==23113== 327 (240 direct, 87 indirect) bytes in 1 blocks are definitely lost in loss record 43 of 43 ==23113== at 0x4C29F73: malloc (vg_replace_malloc.c:309) ==23113== by 0x476761: xmalloc (util.c:138) ==23113== by 0x45D8B3: ovsdb_idl_insert_row (ovsdb-idl.c:3431) ==23113== by 0x45B7F9: ovsdb_idl_process_update2 (ovsdb-idl.c:2670) ==23113== by 0x45AFCF: ovsdb_idl_db_parse_update__ (ovsdb-idl.c:2479) ==23113== by 0x45B262: ovsdb_idl_db_parse_update (ovsdb-idl.c:2542) ==23113== by 0x45ABBE: ovsdb_idl_db_parse_update_rpc (ovsdb-idl.c:2358) ==23113== by 0x4576DD: ovsdb_idl_process_msg (ovsdb-idl.c:865) ==23113== by 0x457973: ovsdb_idl_run (ovsdb-idl.c:944) ==23113== by 0x40B7B9: do_idl (test-ovsdb.c:2523) ==23113== by 0x44425D: ovs_cmdl_run_command__ (command-line.c:247) ==23113== by 0x44430E: ovs_cmdl_run_command (command-line.c:278) ==23113== by 0x404BA6: main (test-ovsdb.c:76) Fixes: 72aeb243a52a ("ovsdb-idl: Tracking - preserve data for deleted rows.") Signed-off-by: Dumitru Ceara Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 2 +- tests/ovsdb-idl.at | 52 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 23648ff6b..6afae2d22 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -3227,7 +3227,7 @@ ovsdb_idl_row_clear_old(struct ovsdb_idl_row *row) { ovs_assert(row->old_datum == row->new_datum); if (!ovsdb_idl_row_is_orphan(row)) { - if (ovsdb_idl_track_is_set(row->table)) { + if (ovsdb_idl_track_is_set(row->table) && !row->tracked_old_datum) { row->tracked_old_datum = row->old_datum; } else { const struct ovsdb_idl_table_class *class = row->table->class_; diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 406a57627..4b4791a7d 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -1225,6 +1225,58 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan weak refer 008: done ]]) +dnl This test creates database with weak references and checks that the +dnl content of orphaned rows created for weak references after monitor +dnl condition change are not leaked when the row is reinserted and deleted. +OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan rows, conditional], + [['["idltest", + {"op": "insert", + "table": "simple", + "row": {"s": "row0_s"}, + "uuid-name": "weak_row0"}, + {"op": "insert", + "table": "simple", + "row": {"s": "row1_s"}, + "uuid-name": "weak_row1"}, + {"op": "insert", + "table": "simple6", + "row": {"name": "first_row", + "weak_ref": ["set", + [["named-uuid", "weak_row0"]] + ]}}]']], + [['condition simple []' \ + 'condition simple [["s","==","row0_s"]]' \ + 'condition simple [["s","==","row1_s"]]' \ + 'condition simple [["s","==","row0_s"]]' \ + '["idltest", + {"op": "delete", + "table": "simple6", + "where": []}]']], + [[000: change conditions +001: inserted row: uuid=<0> +001: name=first_row weak_ref=[] uuid=<0> +001: updated columns: name weak_ref +002: change conditions +003: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +003: inserted row: uuid=<2> +003: name=first_row weak_ref=[<2>] uuid=<0> +003: updated columns: s +004: change conditions +005: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +005: inserted row: uuid=<3> +005: updated columns: s +006: change conditions +007: deleted row: uuid=<3> +007: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +007: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3> +007: inserted row: uuid=<2> +007: name=first_row weak_ref=[<2>] uuid=<0> +007: updated columns: s +008: {"error":null,"result":[{"count":1}]} +009: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2> +010: done +]]) + OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops], [], [['["idltest", -- GitLab From 08e130abb1b0942d011f4dceff5282c2fa5d3c82 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Mon, 30 Nov 2020 17:41:29 +0100 Subject: [PATCH 401/432] ovsdb-idl: Fix memleak when deleting orphan rows. Pure IDL orphan rows, i.e., for which no "insert" operation was seen, which are part of tables with change tracking enabled should also be freed when the table track_list is flushed. Reported-by: Ilya Maximets Fixes: 72aeb243a52a ("ovsdb-idl: Tracking - preserve data for deleted rows.") Signed-off-by: Dumitru Ceara Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 6afae2d22..e0c9833aa 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -1977,16 +1977,18 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db) ovs_list_remove(&row->track_node); ovs_list_init(&row->track_node); - if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) { + if (ovsdb_idl_row_is_orphan(row)) { ovsdb_idl_row_unparse(row); - const struct ovsdb_idl_table_class *class = - row->table->class_; - for (size_t c = 0; c < class->n_columns; c++) { - ovsdb_datum_destroy(&row->tracked_old_datum[c], - &class->columns[c].type); + if (row->tracked_old_datum) { + const struct ovsdb_idl_table_class *class = + row->table->class_; + for (size_t c = 0; c < class->n_columns; c++) { + ovsdb_datum_destroy(&row->tracked_old_datum[c], + &class->columns[c].type); + } + free(row->tracked_old_datum); + row->tracked_old_datum = NULL; } - free(row->tracked_old_datum); - row->tracked_old_datum = NULL; free(row); } } -- GitLab From 91a6a4580267027a051aebcdd88df850dc887dc9 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Mon, 30 Nov 2020 17:41:41 +0100 Subject: [PATCH 402/432] ovsdb-idl: Fix use-after-free when deleting orphaned rows. It's possible that the IDL client processes multiple jsonrpc updates in a single ovsdb_idl_run(). Considering the following updates processed in a single IDL run: 1. Update row R1 from table A while R1 is also referenced by row R2 from table B: - this adds R1 to table A's track_list. 2. Delete row R1 from table A while R1 is also referenced by row R2 from table B: - because row R2 still refers to row R1, this will create an orphan R1. - at this point R1 is still in table A's hmap. When the IDL client calls ovsdb_idl_track_clear() after it has finished processing the tracked changes, row R1 gets freed leaving a dangling pointer in table A's hmap. To fix this we don't free rows in ovsdb_idl_track_clear() if they are orphan and still referenced by other rows, i.e., the row's 'dst_arcs' list is not empty. Later, when all arc sources (e.g., R2) are deleted, the orphan R1 will be cleaned up as well. The only exception is when the whole contents of the IDL are flushed, in ovsdb_idl_db_clear(), in which case it's safe to free all rows. Reported-by: Ilya Maximets Fixes: 932104f483ef ("ovsdb-idl: Add support for change tracking.") Signed-off-by: Dumitru Ceara Acked-by: Han Zhou Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index e0c9833aa..e61635de0 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -221,7 +221,7 @@ struct ovsdb_idl_db { struct uuid last_id; }; -static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *); +static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool flush_all); static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *, const struct ovsdb_idl_column *); static void ovsdb_idl_db_omit(struct ovsdb_idl_db *, @@ -660,7 +660,7 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db) ovsdb_idl_row_destroy_postprocess(db); db->cond_seqno = 0; - ovsdb_idl_db_track_clear(db); + ovsdb_idl_db_track_clear(db, true); if (changed) { db->change_seqno++; @@ -1955,7 +1955,7 @@ ovsdb_idl_track_is_updated(const struct ovsdb_idl_row *row, * loop when it is ready to do ovsdb_idl_run() again. */ static void -ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db) +ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all) { size_t i; @@ -1989,7 +1989,20 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db) free(row->tracked_old_datum); row->tracked_old_datum = NULL; } - free(row); + + /* Rows that were reused as orphan after being processed + * for deletion are still in the table hmap and will be + * cleaned up when their src arcs are removed. These rows + * will not be reported anymore as "deleted" to IDL + * clients. + * + * The exception is when 'destroy' is explicitly set to + * 'true' which usually happens when the complete IDL + * contents are being flushed. + */ + if (flush_all || ovs_list_is_empty(&row->dst_arcs)) { + free(row); + } } } } @@ -2004,7 +2017,7 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db) void ovsdb_idl_track_clear(struct ovsdb_idl *idl) { - ovsdb_idl_db_track_clear(&idl->data); + ovsdb_idl_db_track_clear(&idl->data, false); } static void -- GitLab From 71d0c0d8b4d8bf01cb5367aabc5ceadba1f8ce61 Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 24 Nov 2020 11:01:09 +0800 Subject: [PATCH 403/432] lib/tc: fix parse act pedit for tos rewrite Check overlap between current pedit key, which is always 4 bytes (range [off, off + 3]), and a map entry in flower_pedit_map sf = ROUND_DOWN(mf, 4) (range [sf|mf, (mf + sz - 1)|ef]). So for the tos the rewite the off + 3(3) is greater than mf, and should less than ef(4) but not mf+sz(2). Signed-off-by: wenxu Signed-off-by: Simon Horman --- lib/tc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/tc.c b/lib/tc.c index 8761304c9..c2de78bfe 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -1003,6 +1003,7 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) int flower_off = m->flower_offset; int sz = m->size; int mf = m->offset; + int ef = ROUND_UP(mf, 4); if (m->htype != type) { continue; @@ -1010,9 +1011,10 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) /* check overlap between current pedit key, which is always * 4 bytes (range [off, off + 3]), and a map entry in - * flower_pedit_map (range [mf, mf + sz - 1]) */ + * flower_pedit_map sf = ROUND_DOWN(mf, 4) + * (range [sf|mf, (mf + sz - 1)|ef]) */ if ((keys->off >= mf && keys->off < mf + sz) - || (keys->off + 3 >= mf && keys->off + 3 < mf + sz)) { + || (keys->off + 3 >= mf && keys->off + 3 < ef)) { int diff = flower_off + (keys->off - mf); ovs_be32 *dst = (void *) (rewrite_key + diff); ovs_be32 *dst_m = (void *) (rewrite_mask + diff); -- GitLab From 35454eba79a42a7374bb2572f1348abe4cc92318 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 26 Nov 2020 01:43:57 +0100 Subject: [PATCH 404/432] ovsdb-cluster.at: Fix infinite loop in torture tests. For some reason, while running cluster torture tests in GitHub Actions workflow, failure of 'echo' command doesn't fail the loop and subshell never exits, but keeps infinitely printing errors after breaking from the loop on the right side of the pipeline: testsuite: line 8591: echo: write error: Broken pipe Presumably, that is caused by some shell configuration option, but I have no idea which one and I'm not able to reproduce locally with shell configuration options provided in GitHub documentation. Let's just add an explicit 'exit' on 'echo' failure. This will guarantee exit from the loop and the subshell regardless of configuration. Fixes: 0f03ae3754ec ("ovsdb: Improve timing in cluster torture test.") Acked-by: Simon Horman Signed-off-by: Ilya Maximets --- tests/ovsdb-cluster.at | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at index e0758e954..92aa42709 100644 --- a/tests/ovsdb-cluster.at +++ b/tests/ovsdb-cluster.at @@ -701,7 +701,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral # Use file instead of var because code inside "while" runs in a subshell. echo 0 > phase i=0 - (while :; do echo; sleep 0.1; done) | while read REPLY; do + (while :; do echo || exit 0; sleep 0.1; done) | while read REPLY; do printf "t=%2d s:" $i done=0 for j in $(seq 0 $(expr $n1 - 1)); do -- GitLab From 97dbef6de9d8ac72a20d4ccbb3bcdba3e308f6b9 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Fri, 4 Dec 2020 15:54:41 +0100 Subject: [PATCH 405/432] ovsdb-idl: Fix expected condition seqno when changes are pending. Commit 17f22fe46142 tried to address this but only covered some of the cases. The correct way to report the expected seqno is to take into account if there already is a condition change that was requested to the server but not acked yet. In that case, the new condition change request will be sent only after the already requested one is acked. That is, expected condition seqno when conditions are up to date is db->cond_seqno + 2 in this case. Fixes: 17f22fe46142 ("ovsdb-idl: Return correct seqno from ovsdb_idl_db_set_condition().") Suggested-by: Ilya Maximets Signed-off-by: Dumitru Ceara Signed-off-by: Ilya Maximets --- lib/ovsdb-idl.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index e61635de0..efaa08a1e 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -1564,6 +1564,7 @@ ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db, { struct ovsdb_idl_condition *table_cond; struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc); + unsigned int curr_seqno = db->cond_seqno; /* Compare the new condition to the last known condition which can be * either "new" (not sent yet), "requested" or "acked", in this order. @@ -1581,14 +1582,11 @@ ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db, ovsdb_idl_condition_clone(&table->new_cond, condition); db->cond_changed = true; poll_immediate_wake(); - return db->cond_seqno + 1; - } else if (table_cond != table->ack_cond) { - /* 'condition' was already set but has not been "acked" yet. The IDL - * will be up to date when db->cond_seqno gets incremented. */ - return db->cond_seqno + 1; } - return db->cond_seqno; + /* Conditions will be up to date when we receive replies for already + * requested and new conditions, if any. */ + return curr_seqno + (table->new_cond ? 1 : 0) + (table->req_cond ? 1 : 0); } /* Sets the replication condition for 'tc' in 'idl' to 'condition' and -- GitLab From af06184705072804a4c1374f9c824c9e4c241c26 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Sun, 6 Dec 2020 10:16:45 +0200 Subject: [PATCH 406/432] dpif-netlink: Count the number of offloaded rules Add a counter for the offloaded rules, and display it in the command of "ovs-appctl upcall/show". Signed-off-by: Jianbo Liu Reviewed-by: Roi Dayan Signed-off-by: Simon Horman --- lib/dpif-netlink.c | 9 +++++++++ lib/dpif.h | 1 + ofproto/ofproto-dpif-upcall.c | 6 ++++++ 3 files changed, 16 insertions(+) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 2f881e4fa..6858ba612 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -208,6 +208,7 @@ struct dpif_netlink { /* Change notification. */ struct nl_sock *port_notifier; /* vport multicast group subscriber. */ bool refresh_channels; + struct atomic_count n_offloaded_flows; }; static void report_loss(struct dpif_netlink *, struct dpif_channel *, @@ -653,6 +654,7 @@ dpif_netlink_run(struct dpif *dpif_) static int dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) { + struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_netlink_dp dp; struct ofpbuf *buf; int error; @@ -678,6 +680,7 @@ dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) } ofpbuf_delete(buf); } + stats->n_offloaded_flows = atomic_count_get(&dpif->n_offloaded_flows); return error; } @@ -2189,6 +2192,9 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op) } err = parse_flow_put(dpif, put); + if (!err && (put->flags & DPIF_FP_CREATE)) { + atomic_count_inc(&dpif->n_offloaded_flows); + } log_flow_put_message(&dpif->dpif, &this_module, put, 0); break; } @@ -2203,6 +2209,9 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op) dpif_normalize_type(dpif_type(&dpif->dpif)), del->ufid, del->stats); + if (!err) { + atomic_count_dec(&dpif->n_offloaded_flows); + } log_flow_del_message(&dpif->dpif, &this_module, del, 0); break; } diff --git a/lib/dpif.h b/lib/dpif.h index cb047dbe2..7ef148c6d 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -429,6 +429,7 @@ struct dpif_dp_stats { uint64_t n_missed; /* Number of flow table misses. */ uint64_t n_lost; /* Number of misses not sent to userspace. */ uint64_t n_flows; /* Number of flows present. */ + uint64_t n_offloaded_flows; /* Number of offloaded flows present. */ uint64_t n_mask_hit; /* Number of mega flow masks visited for flow table matches. */ uint32_t n_masks; /* Number of mega flow masks. */ diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index e022fde27..19b92dfe0 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -175,6 +175,7 @@ struct udpif { /* n_flows_mutex prevents multiple threads updating these concurrently. */ atomic_uint n_flows; /* Number of flows in the datapath. */ + atomic_uint n_offloaded_flows; /* Number of the offloaded flows. */ atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */ struct ovs_mutex n_flows_mutex; @@ -730,6 +731,8 @@ udpif_get_n_flows(struct udpif *udpif) dpif_get_dp_stats(udpif->dpif, &stats); flow_count = stats.n_flows; atomic_store_relaxed(&udpif->n_flows, flow_count); + atomic_store_relaxed(&udpif->n_offloaded_flows, + stats.n_offloaded_flows); ovs_mutex_unlock(&udpif->n_flows_mutex); } else { atomic_read_relaxed(&udpif->n_flows, &flow_count); @@ -2904,6 +2907,7 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, struct udpif *udpif; LIST_FOR_EACH (udpif, list_node, &all_udpifs) { + unsigned int n_offloaded_flows; unsigned int flow_limit; bool ufid_enabled; size_t i; @@ -2915,6 +2919,8 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_put_format(&ds, " flows : (current %lu)" " (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif), udpif->avg_n_flows, udpif->max_n_flows, flow_limit); + atomic_read_relaxed(&udpif->n_offloaded_flows, &n_offloaded_flows); + ds_put_format(&ds, " offloaded flows : %u\n", n_offloaded_flows); ds_put_format(&ds, " dump duration : %lldms\n", udpif->dump_duration); ds_put_format(&ds, " ufid enabled : "); if (ufid_enabled) { -- GitLab From 252e1e5764439085e32f07695b45848a079ba4df Mon Sep 17 00:00:00 2001 From: Ian Stokes Date: Tue, 15 Dec 2020 16:41:28 +0000 Subject: [PATCH 407/432] dpdk: Update to use DPDK v20.11. This commit adds support for DPDK v20.11, it includes the following changes. 1. travis: Remove explicit DPDK kmods configuration. 2. sparse: Fix build with 20.05 DPDK tracepoints. 3. netdev-dpdk: Remove experimental API flag. http://patchwork.ozlabs.org/project/openvswitch/list/?series=173216&state=* 4. sparse: Update to DPDK 20.05 trace point header. http://patchwork.ozlabs.org/project/openvswitch/list/?series=179604&state=* 5. sparse: Fix build with DPDK 20.08. http://patchwork.ozlabs.org/project/openvswitch/list/?series=200181&state=* 6. build: Add support for DPDK meson build. http://patchwork.ozlabs.org/project/openvswitch/list/?series=199138&state=* 7. netdev-dpdk: Remove usage of RTE_ETH_DEV_CLOSE_REMOVE flag. http://patchwork.ozlabs.org/project/openvswitch/list/?series=207850&state=* 8. netdev-dpdk: Fix build with 20.11-rc1. http://patchwork.ozlabs.org/project/openvswitch/list/?series=209006&state=* 9. sparse: Fix __ATOMIC_* redefinition errors http://patchwork.ozlabs.org/project/openvswitch/list/?series=209452&state=* 10. build: Remove DPDK make build references. http://patchwork.ozlabs.org/project/openvswitch/list/?series=216682&state=* For credit all authors of the original commits to 'dpdk-latest' with the above changes have been added as co-authors for this commit. Signed-off-by: David Marchand Co-authored-by: David Marchand Signed-off-by: Sunil Pai G Co-authored-by: Sunil Pai G Signed-off-by: Eli Britstein Co-authored-by: Eli Britstein Tested-by: Harry van Haaren Tested-by: Govindharajan, Hariprasad Tested-by: Tonghao Zhang Acked-by: Ilya Maximets Signed-off-by: Ian Stokes --- .ci/linux-build.sh | 48 +++++++---- .ci/linux-prepare.sh | 1 + .github/workflows/build-and-test.yml | 5 +- .travis.yml | 3 + Documentation/faq/releases.rst | 2 +- Documentation/intro/install/afxdp.rst | 2 +- Documentation/intro/install/dpdk.rst | 68 ++++++++------- Documentation/topics/dpdk/phy.rst | 22 +++-- Documentation/topics/dpdk/vdev.rst | 2 +- Documentation/topics/dpdk/vhost-user.rst | 20 +---- Documentation/topics/testing.rst | 2 +- Documentation/topics/userspace-tso.rst | 2 +- NEWS | 1 + acinclude.m4 | 101 ++++++++++++----------- include/sparse/automake.mk | 2 + include/sparse/rte_mbuf.h | 29 +++++++ include/sparse/rte_trace_point.h | 28 +++++++ lib/dpdk.c | 2 +- lib/netdev-dpdk.c | 20 +---- 19 files changed, 216 insertions(+), 144 deletions(-) create mode 100644 include/sparse/rte_mbuf.h create mode 100644 include/sparse/rte_trace_point.h diff --git a/.ci/linux-build.sh b/.ci/linux-build.sh index 16102ac94..3e5136fd4 100755 --- a/.ci/linux-build.sh +++ b/.ci/linux-build.sh @@ -87,17 +87,29 @@ function install_dpdk() { local DPDK_VER=$1 local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version" + local DPDK_OPTS="" + local DPDK_LIB="" if [ -z "$TRAVIS_ARCH" ] || [ "$TRAVIS_ARCH" == "amd64" ]; then - TARGET="x86_64-native-linuxapp-gcc" + DPDK_LIB=$(pwd)/dpdk-dir/build/lib/x86_64-linux-gnu elif [ "$TRAVIS_ARCH" == "aarch64" ]; then - TARGET="arm64-armv8a-linuxapp-gcc" + DPDK_LIB=$(pwd)/dpdk-dir/build/lib/aarch64-linux-gnu else echo "Target is unknown" exit 1 fi + if [ "$DPDK_SHARED" ]; then + EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared" + export LD_LIBRARY_PATH=$DPDK_LIB/:$LD_LIBRARY_PATH + else + EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static" + fi + + # Export the following path for pkg-config to find the .pc file. + export PKG_CONFIG_PATH=$DPDK_LIB/pkgconfig/:$PKG_CONFIG_PATH + if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then # Avoid using cache for git tree build. rm -rf dpdk-dir @@ -110,7 +122,8 @@ function install_dpdk() if [ -f "${VERSION_FILE}" ]; then VER=$(cat ${VERSION_FILE}) if [ "${VER}" = "${DPDK_VER}" ]; then - EXTRA_OPTS="${EXTRA_OPTS} --with-dpdk=$(pwd)/dpdk-dir/build" + # Update the library paths. + sudo ldconfig echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir" return fi @@ -124,23 +137,24 @@ function install_dpdk() pushd dpdk-dir fi - make config CC=gcc T=$TARGET + # Switching to 'default' machine to make dpdk-dir cache usable on + # different CPUs. We can't be sure that all CI machines are exactly same. + DPDK_OPTS="$DPDK_OPTS -Dmachine=default" - if [ "$DPDK_SHARED" ]; then - sed -i '/CONFIG_RTE_BUILD_SHARED_LIB=n/s/=n/=y/' build/.config - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/$TARGET/lib - fi + # Disable building DPDK unit tests. Not needed for OVS build or tests. + DPDK_OPTS="$DPDK_OPTS -Dtests=false" + + # Install DPDK using prefix. + DPDK_OPTS="$DPDK_OPTS --prefix=$(pwd)/build" + + CC=gcc meson $DPDK_OPTS build + ninja -C build + ninja -C build install - # Disable building DPDK kernel modules. Not needed for OVS build or tests. - sed -i '/CONFIG_RTE_EAL_IGB_UIO=y/s/=y/=n/' build/.config - sed -i '/CONFIG_RTE_KNI_KMOD=y/s/=y/=n/' build/.config + # Update the library paths. + sudo ldconfig - # Switching to 'default' machine to make dpdk-dir cache usable on different - # CPUs. We can't be sure that all CI machines are exactly same. - sed -i '/CONFIG_RTE_MACHINE="native"/s/="native"/="default"/' build/.config - make -j4 CC=gcc EXTRA_CFLAGS='-fPIC' - EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build" echo "Installed DPDK source in $(pwd)" popd echo "${DPDK_VER}" > ${VERSION_FILE} @@ -187,7 +201,7 @@ fi if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then if [ -z "$DPDK_VER" ]; then - DPDK_VER="19.11.2" + DPDK_VER="20.11" fi install_dpdk $DPDK_VER if [ "$CC" = "clang" ]; then diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh index fea905a83..69a40011f 100755 --- a/.ci/linux-prepare.sh +++ b/.ci/linux-prepare.sh @@ -22,6 +22,7 @@ cd .. pip3 install --disable-pip-version-check --user flake8 hacking pip3 install --user --upgrade docutils +pip3 install --user 'meson==0.47.1' if [ "$M32" ]; then # Installing 32-bit libraries. diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 847fd3150..c83066138 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -8,7 +8,8 @@ jobs: dependencies: | automake libtool gcc bc libjemalloc1 libjemalloc-dev \ libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev \ - python3-openssl python3-pip python3-sphinx \ + ninja-build python3-openssl python3-pip \ + python3-setuptools python3-sphinx python3-wheel \ selinux-policy-dev deb_dependencies: | linux-headers-$(uname -r) build-essential fakeroot devscripts equivs @@ -146,7 +147,7 @@ jobs: run: ./.ci/linux-prepare.sh - name: build - run: PATH="$PATH:$HOME/bin" ./.ci/linux-build.sh + run: PATH="$PATH:$HOME/bin:$HOME/.local/bin" ./.ci/linux-build.sh - name: upload deb packages if: matrix.deb_package != '' diff --git a/.travis.yml b/.travis.yml index acf3c10fb..51d051108 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,9 @@ addons: - selinux-policy-dev - libunbound-dev - libunwind-dev + - python3-setuptools + - python3-wheel + - ninja-build before_install: ./.ci/${TRAVIS_OS_NAME}-prepare.sh diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index 3623e3f40..a52df2205 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -214,7 +214,7 @@ Q: Are all the DPDK releases that OVS versions work with maintained? The latest information about DPDK stable and LTS releases can be found at `DPDK stable`_. -.. _DPDK stable: http://dpdk.org/doc/guides/contributing/stable.html +.. _DPDK stable: http://doc.dpdk.org/guides-20.11/contributing/stable.html Q: I get an error like this when I configure Open vSwitch: diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index 3c8f78825..aad0aeebe 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -396,7 +396,7 @@ PVP using vhostuser device -------------------------- First, build OVS with DPDK and AFXDP:: - ./configure --enable-afxdp --with-dpdk= + ./configure --enable-afxdp --with-dpdk=shared|static make -j4 && make install Create a vhost-user port from OVS:: diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst index fe11571d2..a595417ce 100644 --- a/Documentation/intro/install/dpdk.rst +++ b/Documentation/intro/install/dpdk.rst @@ -42,7 +42,7 @@ Build requirements In addition to the requirements described in :doc:`general`, building Open vSwitch with DPDK will require the following: -- DPDK 19.11.2 +- DPDK 20.11 - A `DPDK supported NIC`_ @@ -59,8 +59,10 @@ vSwitch with DPDK will require the following: Detailed system requirements can be found at `DPDK requirements`_. -.. _DPDK supported NIC: http://dpdk.org/doc/nics -.. _DPDK requirements: http://dpdk.org/doc/guides/linux_gsg/sys_reqs.html +.. _DPDK supported NIC: https://doc.dpdk.org/guides-20.11/nics/index.html +.. _DPDK requirements: https://doc.dpdk.org/guides-20.11/linux_gsg/sys_reqs.html + +.. _dpdk-install: Installing ---------- @@ -71,38 +73,44 @@ Install DPDK #. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``:: $ cd /usr/src/ - $ wget https://fast.dpdk.org/rel/dpdk-19.11.2.tar.xz - $ tar xf dpdk-19.11.2.tar.xz - $ export DPDK_DIR=/usr/src/dpdk-stable-19.11.2 + $ wget https://fast.dpdk.org/rel/dpdk-20.11.tar.xz + $ tar xf dpdk-20.11.tar.xz + $ export DPDK_DIR=/usr/src/dpdk-20.11 $ cd $DPDK_DIR -#. (Optional) Configure DPDK as a shared library +#. Configure and install DPDK using Meson - DPDK can be built as either a static library or a shared library. By - default, it is configured for the former. If you wish to use the latter, set - ``CONFIG_RTE_BUILD_SHARED_LIB=y`` in ``$DPDK_DIR/config/common_base``. + Build and install the DPDK library:: - .. note:: + $ export DPDK_BUILD=$DPDK_DIR/build + $ meson build + $ ninja -C build + $ sudo ninja -C build install + $ sudo ldconfig - Minor performance loss is expected when using OVS with a shared DPDK - library compared to a static DPDK library. + Detailed information can be found at `DPDK documentation`_. -#. Configure and install DPDK +#. (Optional) Configure and export the DPDK shared library location - Build and install the DPDK library:: + Since DPDK is built both as static and shared library by default, no extra + configuration is required for the build. - $ export DPDK_TARGET=x86_64-native-linuxapp-gcc - $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET - $ make install T=$DPDK_TARGET DESTDIR=install + Exporting the path to library is not necessary if the DPDK libraries are + system installed. For libraries installed using a prefix, export the path + to this library and also update the $PKG_CONFIG_PATH for use + before building OVS:: -#. (Optional) Export the DPDK shared library location + $ export LD_LIBRARY_PATH=/path/to/installed/DPDK/libraries + $ export PKG_CONFIG_PATH=/path/to/installed/".pc" file/for/DPDK - If DPDK was built as a shared library, export the path to this library for - use when building OVS:: + .. note:: - $ export LD_LIBRARY_PATH=$DPDK_DIR/x86_64-native-linuxapp-gcc/lib + Minor performance loss is expected when using OVS with a shared DPDK + library compared to a static DPDK library. .. _DPDK sources: http://dpdk.org/rel +.. _DPDK documentation: + https://doc.dpdk.org/guides-20.11/linux_gsg/build_dpdk.html Install OVS ~~~~~~~~~~~ @@ -121,16 +129,16 @@ has to be configured to build against the DPDK library (``--with-dpdk``). #. Bootstrap, if required, as described in :ref:`general-bootstrapping` -#. Configure the package using the ``--with-dpdk`` flag:: +#. Configure the package using the ``--with-dpdk`` flag: + + If OVS must consume DPDK static libraries + (also equivalent to ``--with-dpdk=yes`` ):: - $ ./configure --with-dpdk=$DPDK_BUILD + $ ./configure --with-dpdk=static - where ``DPDK_BUILD`` is the path to the built DPDK library. This can be - skipped if DPDK library is installed in its default location. + If OVS must consume DPDK shared libraries:: - If no path is provided to ``--with-dpdk``, but a pkg-config configuration - for libdpdk is available the include paths will be generated via an - equivalent ``pkg-config --cflags libdpdk``. + $ ./configure --with-dpdk=shared .. note:: While ``--with-dpdk`` is required, you can pass any other configuration @@ -703,7 +711,7 @@ Limitations release notes`_. .. _DPDK release notes: - https://doc.dpdk.org/guides-19.11/rel_notes/release_19_11.html + https://doc.dpdk.org/guides-20.11/rel_notes/release_20_11.html - Upper bound MTU: DPDK device drivers differ in how the L2 frame for a given MTU value is calculated e.g. i40e driver includes 2 x vlan headers in diff --git a/Documentation/topics/dpdk/phy.rst b/Documentation/topics/dpdk/phy.rst index 7ee3eacff..986dbd38e 100644 --- a/Documentation/topics/dpdk/phy.rst +++ b/Documentation/topics/dpdk/phy.rst @@ -117,7 +117,7 @@ tool:: For more information, refer to the `DPDK documentation `__. -.. _dpdk-drivers: http://dpdk.org/doc/guides/linux_gsg/linux_drivers.html +.. _dpdk-drivers: https://doc.dpdk.org/guides-20.11/linux_gsg/linux_drivers.html .. _dpdk-phy-multiqueue: @@ -218,18 +218,24 @@ If the log is not seen then the port can be detached like so:: Hotplugging with IGB_UIO ~~~~~~~~~~~~~~~~~~~~~~~~ -As of DPDK 19.11, default igb_uio hotplugging behavior changes from +.. important:: + + As of DPDK v20.11 IGB_UIO has been deprecated and is no longer built as + part of the default DPDK library. Below is intended for those who wish + to use IGB_UIO outside of the standard DPDK build from v20.11 onwards. + +As of DPDK v19.11, default igb_uio hotplugging behavior changed from previous DPDK versions. -With DPDK 19.11, if no device is bound to igb_uio when OVS is launched then -the IOVA mode may be set to virtual addressing for DPDK. This is incompatible -for hotplugging with igb_uio. +From DPDK v19.11 onwards, if no device is bound to igb_uio when OVS is +launched then the IOVA mode may be set to virtual addressing for DPDK. +This is incompatible for hotplugging with igb_uio. To hotplug a port with igb_uio in this case, DPDK must be configured to use physical addressing for IOVA mode. For more information regarding IOVA modes in DPDK please refer to the `DPDK IOVA Mode Detection`__. -__ https://doc.dpdk.org/guides-19.11/prog_guide/env_abstraction_layer.html#iova-mode-detection +__ https://doc.dpdk.org/guides-20.11/prog_guide/env_abstraction_layer.html#iova-mode-detection To configure OVS DPDK to use physical addressing for IOVA:: @@ -261,7 +267,7 @@ Representors are multi devices created on top of one PF. For more information, refer to the `DPDK documentation`__. -__ https://doc.dpdk.org/guides-19.11/prog_guide/switch_representation.html +__ https://doc.dpdk.org/guides-20.11/prog_guide/switch_representation.html Prior to port representors there was a one-to-one relationship between the PF and the eth device. With port representors the relationship becomes one PF to @@ -395,7 +401,7 @@ in the ``options`` column of the ``Interface`` table. kernel netdevice, and be inherited from it when Open vSwitch is restarted, even if the options described in this section are unset from Open vSwitch. -.. _bifurcated-drivers: http://doc.dpdk.org/guides/linux_gsg/linux_drivers.html#bifurcated-driver +.. _bifurcated-drivers: https://doc.dpdk.org/guides-20.11/linux_gsg/linux_drivers.html#bifurcated-driver - Configure the VF MAC address:: diff --git a/Documentation/topics/dpdk/vdev.rst b/Documentation/topics/dpdk/vdev.rst index 1c0df7f4b..7bd48165e 100644 --- a/Documentation/topics/dpdk/vdev.rst +++ b/Documentation/topics/dpdk/vdev.rst @@ -63,4 +63,4 @@ run:: More information on the different types of virtual DPDK PMDs can be found in the `DPDK documentation`__. -__ http://dpdk.org/doc/guides/nics/overview.html +__ https://doc.dpdk.org/guides-20.11/nics/overview.html diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index 75d3fc958..bcd51e65c 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -389,23 +389,7 @@ application in the VM. To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or :ref:`dpdk-vhost-user-client`. Once started, connect to the VM, download the -DPDK sources to VM and build DPDK:: - - $ cd /root/dpdk/ - $ wget https://fast.dpdk.org/rel/dpdk-19.11.2.tar.xz - $ tar xf dpdk-19.11.2.tar.xz - $ export DPDK_DIR=/root/dpdk/dpdk-stable-19.11.2 - $ export DPDK_TARGET=x86_64-native-linuxapp-gcc - $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET - $ cd $DPDK_DIR - $ make install T=$DPDK_TARGET DESTDIR=install - -Build the test-pmd application:: - - $ cd app/test-pmd - $ export RTE_SDK=$DPDK_DIR - $ export RTE_TARGET=$DPDK_TARGET - $ make +DPDK sources to VM and build DPDK as described in :ref:`dpdk-install`. Setup huge pages and DPDK devices using UIO:: @@ -555,4 +539,4 @@ shown with:: Further information can be found in the `DPDK documentation -`__ +`__ diff --git a/Documentation/topics/testing.rst b/Documentation/topics/testing.rst index b9fa94dda..951fe9e85 100644 --- a/Documentation/topics/testing.rst +++ b/Documentation/topics/testing.rst @@ -353,7 +353,7 @@ All tests are skipped if no hugepages are configured. User must look into the DP manual to figure out how to `Configure hugepages`_. The phy test will skip if no compatible physical device is available. -.. _Configure hugepages: https://doc.dpdk.org/guides-19.11/linux_gsg/sys_reqs.html +.. _Configure hugepages: https://doc.dpdk.org/guides-20.11/linux_gsg/sys_reqs.html All the features documented under `Unit Tests`_ are available for the DPDK datapath testsuite. diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst index 14a7c6fb3..bd64e7ed3 100644 --- a/Documentation/topics/userspace-tso.rst +++ b/Documentation/topics/userspace-tso.rst @@ -46,7 +46,7 @@ datasheet for compatibility. Secondly, the NIC must have an associated DPDK Poll Mode Driver (PMD) which supports `TSO`. For a list of features per PMD, refer to the `DPDK documentation`__. -__ https://doc.dpdk.org/guides-19.11/nics/overview.html +__ https://doc.dpdk.org/guides-20.11/nics/overview.html Enabling TSO ~~~~~~~~~~~~ diff --git a/NEWS b/NEWS index 7e291a180..1a39cc661 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,7 @@ Post-v2.14.0 Use the 'cluster/set-backlog-threshold' command to change limits. - DPDK: * Removed support for vhost-user dequeue zero-copy. + * Add support for DPDK 20.11. - Userspace datapath: * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which restricts a flow dump to a single PMD thread if set. diff --git a/acinclude.m4 b/acinclude.m4 index ddf4b71e1..60871f67a 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -334,8 +334,9 @@ dnl dnl Configure DPDK source tree AC_DEFUN([OVS_CHECK_DPDK], [ AC_ARG_WITH([dpdk], - [AC_HELP_STRING([--with-dpdk=/path/to/dpdk], - [Specify the DPDK build directory])], + [AC_HELP_STRING([--with-dpdk=static|shared|yes], + [Specify "static" or "shared" depending on the + DPDK libraries to use])], [have_dpdk=true]) AC_MSG_CHECKING([whether dpdk is enabled]) @@ -345,35 +346,45 @@ AC_DEFUN([OVS_CHECK_DPDK], [ else AC_MSG_RESULT([yes]) case "$with_dpdk" in - yes) - DPDK_AUTO_DISCOVER="true" - PKG_CHECK_MODULES_STATIC([DPDK], [libdpdk], [ - DPDK_INCLUDE="$DPDK_CFLAGS" - DPDK_LIB="$DPDK_LIBS"], [ - DPDK_INCLUDE="-I/usr/local/include/dpdk -I/usr/include/dpdk" - DPDK_LIB="-ldpdk"]) - ;; - *) - DPDK_AUTO_DISCOVER="false" - DPDK_INCLUDE_PATH="$with_dpdk/include" - # If 'with_dpdk' is passed install directory, point to headers - # installed in $DESTDIR/$prefix/include/dpdk - if test -e "$DPDK_INCLUDE_PATH/rte_config.h"; then - DPDK_INCLUDE="-I$DPDK_INCLUDE_PATH" - elif test -e "$DPDK_INCLUDE_PATH/dpdk/rte_config.h"; then - DPDK_INCLUDE="-I$DPDK_INCLUDE_PATH/dpdk" - fi - DPDK_LIB_DIR="$with_dpdk/lib" - DPDK_LIB="-ldpdk" - ;; + "shared") + PKG_CHECK_MODULES([DPDK], [libdpdk], [ + DPDK_INCLUDE="$DPDK_CFLAGS" + DPDK_LIB="$DPDK_LIBS"], [ + DPDK_INCLUDE="-I/usr/local/include/dpdk -I/usr/include/dpdk" + DPDK_LIB="-ldpdk"]) + ;; + "static" | "yes") + PKG_CHECK_MODULES_STATIC([DPDK], [libdpdk], [ + DPDK_INCLUDE="$DPDK_CFLAGS" + DPDK_LIB="$DPDK_LIBS"], [ + DPDK_INCLUDE="-I/usr/local/include/dpdk -I/usr/include/dpdk" + DPDK_LIB="-ldpdk"]) + + dnl Statically linked private DPDK objects of form + dnl -l:file.a must be positioned between + dnl --whole-archive ... --no-whole-archive linker parameters. + dnl Old pkg-config versions misplace --no-whole-archive parameter + dnl and put it next to --whole-archive. + AC_MSG_CHECKING([for faulty pkg-config version]) + echo "$DPDK_LIB" | grep -q 'whole-archive.*l:lib.*no-whole-archive' + status=$? + case $status in + 0) + AC_MSG_RESULT([no]) + ;; + 1) + AC_MSG_RESULT([yes]) + AC_MSG_ERROR([Please upgrade pkg-config]) + ;; + *) + AC_MSG_ERROR([grep exited with status $status]) + ;; + esac esac ovs_save_CFLAGS="$CFLAGS" ovs_save_LDFLAGS="$LDFLAGS" CFLAGS="$CFLAGS $DPDK_INCLUDE" - if test "$DPDK_AUTO_DISCOVER" = "false"; then - LDFLAGS="$LDFLAGS -L${DPDK_LIB_DIR}" - fi AC_CHECK_HEADERS([rte_config.h], [], [ AC_MSG_ERROR([unable to find rte_config.h in $with_dpdk]) @@ -422,20 +433,18 @@ AC_DEFUN([OVS_CHECK_DPDK], [ [AC_MSG_RESULT([yes]) DPDKLIB_FOUND=true], [AC_MSG_RESULT([no]) - if test "$DPDK_AUTO_DISCOVER" = "true"; then - AC_MSG_ERROR(m4_normalize([ - Could not find DPDK library in default search path, Use --with-dpdk - to specify the DPDK library installed in non-standard location])) - else - AC_MSG_ERROR([Could not find DPDK libraries in $DPDK_LIB_DIR]) - fi + AC_MSG_ERROR(m4_normalize([ + Could not find DPDK library in default search path, update + PKG_CONFIG_PATH for pkg-config to find the .pc file in + non-standard location])) ]) CFLAGS="$ovs_save_CFLAGS" LDFLAGS="$ovs_save_LDFLAGS" - if test "$DPDK_AUTO_DISCOVER" = "false"; then - OVS_LDFLAGS="$OVS_LDFLAGS -L$DPDK_LIB_DIR" - fi + # Stripping out possible instruction set specific configuration that DPDK + # forces in pkg-config since this could override user-specified options. + # It's enough to have -mssse3 to build with DPDK headers. + DPDK_INCLUDE=$(echo "$DPDK_INCLUDE" | sed 's/-march=[[^ ]]*//g') OVS_CFLAGS="$OVS_CFLAGS $DPDK_INCLUDE" OVS_ENABLE_OPTION([-mssse3]) @@ -444,17 +453,15 @@ AC_DEFUN([OVS_CHECK_DPDK], [ # This happens because the rest of the DPDK code doesn't use any symbol in # the pmd driver objects, and the drivers register themselves using an # __attribute__((constructor)) function. - # - # These options are specified inside a single -Wl directive to prevent - # autotools from reordering them. - # - # OTOH newer versions of dpdk pkg-config (generated with Meson) - # will already have flagged just the right set of libs with - # --whole-archive - in those cases do not wrap it once more. - case "$DPDK_LIB" in - *whole-archive*) DPDK_vswitchd_LDFLAGS=$DPDK_LIB;; - *) DPDK_vswitchd_LDFLAGS=-Wl,--whole-archive,$DPDK_LIB,--no-whole-archive - esac + # Wrap the DPDK libraries inside a single -Wl directive + # after comma separation to prevent autotools from reordering them. + DPDK_vswitchd_LDFLAGS=$(echo "$DPDK_LIB"| tr -s ' ' ',' | sed 's/-Wl,//g') + # Replace -pthread with -lpthread for LD and remove the last extra comma. + DPDK_vswitchd_LDFLAGS=$(echo "$DPDK_vswitchd_LDFLAGS"| sed 's/,$//' | \ + sed 's/-pthread/-lpthread/g') + # Prepend "-Wl,". + DPDK_vswitchd_LDFLAGS="-Wl,$DPDK_vswitchd_LDFLAGS" + AC_SUBST([DPDK_vswitchd_LDFLAGS]) AC_DEFINE([DPDK_NETDEV], [1], [System uses the DPDK module.]) fi diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk index 974ad3fe5..e96637119 100644 --- a/include/sparse/automake.mk +++ b/include/sparse/automake.mk @@ -11,7 +11,9 @@ noinst_HEADERS += \ include/sparse/netpacket/packet.h \ include/sparse/pthread.h \ include/sparse/rte_atomic.h \ + include/sparse/rte_mbuf.h \ include/sparse/rte_memcpy.h \ + include/sparse/rte_trace_point.h \ include/sparse/sys/socket.h \ include/sparse/sys/sysmacros.h \ include/sparse/sys/types.h \ diff --git a/include/sparse/rte_mbuf.h b/include/sparse/rte_mbuf.h new file mode 100644 index 000000000..981cdb441 --- /dev/null +++ b/include/sparse/rte_mbuf.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2020 Intel, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +/* sparse doesn't know about gcc atomic builtins. */ +#ifndef __ATOMIC_ACQ_REL +#define __ATOMIC_ACQ_REL 0 +#define __ATOMIC_RELAXED 1 +#define __atomic_add_fetch(p, val, memorder) (*(p) = *(p) + (val)) +#define __atomic_store_n(p, val, memorder) (*(p) = (val)) +#endif + +/* Get actual definitions for us to annotate and build on. */ +#include_next diff --git a/include/sparse/rte_trace_point.h b/include/sparse/rte_trace_point.h new file mode 100644 index 000000000..803923275 --- /dev/null +++ b/include/sparse/rte_trace_point.h @@ -0,0 +1,28 @@ +/* Copyright 2020, Red Hat, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CHECKER__ +#error "Use this header only with sparse. It is not a correct implementation." +#endif + +/* sparse doesn't know about gcc atomic builtins. */ +#ifndef __ATOMIC_ACQUIRE +#define __ATOMIC_ACQUIRE 0 +#define __atomic_load_n(p, memorder) *(p) +#endif + +/* Get actual definitions for us to annotate and + * build on. */ +#include_next diff --git a/lib/dpdk.c b/lib/dpdk.c index 2f235a742..319540394 100644 --- a/lib/dpdk.c +++ b/lib/dpdk.c @@ -443,7 +443,7 @@ dpdk_init__(const struct smap *ovs_other_config) /** * NOTE: This is an unsophisticated mechanism for determining the DPDK - * lcore for the DPDK Master. + * main core. */ if (auto_determine) { const struct ovs_numa_info_core *core; diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 75dffefb8..2640a421a 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -26,12 +26,6 @@ #include #include -/* Include rte_compat.h first to allow experimental API's needed for the - * rte_meter.h rfc4115 functions. Once they are no longer marked as - * experimental the #define and rte_compat.h include can be removed. - */ -#define ALLOW_EXPERIMENTAL_API -#include #include #include #include @@ -1312,7 +1306,7 @@ static int vhost_common_construct(struct netdev *netdev) OVS_REQUIRES(dpdk_mutex) { - int socket_id = rte_lcore_to_socket_id(rte_get_master_lcore()); + int socket_id = rte_lcore_to_socket_id(rte_get_main_lcore()); struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); dev->vhost_rxq_enabled = dpdk_rte_mzalloc(OVS_VHOST_MAX_QUEUE_NUM * @@ -1463,7 +1457,6 @@ netdev_dpdk_destruct(struct netdev *netdev) struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); struct rte_device *rte_dev; struct rte_eth_dev *eth_dev; - bool remove_on_close; ovs_mutex_lock(&dpdk_mutex); @@ -1475,20 +1468,15 @@ netdev_dpdk_destruct(struct netdev *netdev) * FIXME: avoid direct access to DPDK internal array rte_eth_devices. */ eth_dev = &rte_eth_devices[dev->port_id]; - remove_on_close = - eth_dev->data && - (eth_dev->data->dev_flags & RTE_ETH_DEV_CLOSE_REMOVE); rte_dev = eth_dev->device; /* Remove the eth device. */ rte_eth_dev_close(dev->port_id); - /* Remove this rte device and all its eth devices if flag - * RTE_ETH_DEV_CLOSE_REMOVE is not supported (which means representors - * are not supported), or if all the eth devices belonging to the rte - * device are closed. + /* Remove this rte device and all its eth devices if all the eth + * devices belonging to the rte device are closed. */ - if (!remove_on_close || !netdev_dpdk_get_num_ports(rte_dev)) { + if (!netdev_dpdk_get_num_ports(rte_dev)) { int ret = rte_dev_remove(rte_dev); if (ret < 0) { -- GitLab From 4241d652e465827d5111b24092b27bff630980e8 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 18 Nov 2020 16:39:46 -0800 Subject: [PATCH 408/432] jsonrpc: Avoid disconnecting prematurely due to long poll intervals. Open vSwitch has a few different jsonrpc-based protocols that depend on jsonrpc_session to make sure that the connection is up and working. In turn, jsonrpc_session uses the "reconnect" state machine to send probes if nothing is received. This works fine in normal circumstances. In unusual circumstances, though, it can happen that the program is busy and doesn't even try to receive anything for a long time. Then the timer can time out without a good reason; if it had tried to receive something, it would have. There's a solution that the clients of jsonrpc_session could adopt. Instead of first calling jsonrpc_session_run(), which is what calls into "reconnect" to deal with timing out, and then calling into jsonrpc_session_recv(), which is what tries to receive something, they could use the opposite order. That would make sure that the timeout was always based on a recent attempt to receive something. Great. The actual code in OVS that uses jsonrpc_session, though, tends to use the opposite order, and there are enough users and this is a subtle enough issue that it could get flipped back around even if we fixed it now. So this commit takes a different approach. Instead of fixing this in the users of jsonrpc_session, we fix it in the users of reconnect: make them tell when they've tried to receive something (or disable this particular feature). This commit fixes the problem that way. It's kind of hard to reproduce but I'm pretty sure that I've seen it a number of times in testing. Signed-off-by: Ben Pfaff Acked-by: Ilya Maximets --- lib/jsonrpc.c | 5 ++++- lib/reconnect.c | 25 +++++++++++++++++++++++-- lib/reconnect.h | 1 + tests/test-reconnect.c | 1 + 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c index 08aaff061..8c5126ffc 100644 --- a/lib/jsonrpc.c +++ b/lib/jsonrpc.c @@ -1155,13 +1155,16 @@ jsonrpc_session_recv(struct jsonrpc_session *s) received_bytes = jsonrpc_get_received_bytes(s->rpc); jsonrpc_recv(s->rpc, &msg); + + long long int now = time_msec(); + reconnect_receive_attempted(s->reconnect, now); if (received_bytes != jsonrpc_get_received_bytes(s->rpc)) { /* Data was successfully received. * * Previously we only counted receiving a full message as activity, * but with large messages or a slow connection that policy could * time out the session mid-message. */ - reconnect_activity(s->reconnect, time_msec()); + reconnect_activity(s->reconnect, now); } if (msg) { diff --git a/lib/reconnect.c b/lib/reconnect.c index c89abab88..a929ddfd2 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -61,6 +61,7 @@ struct reconnect { long long int last_activity; long long int last_connected; long long int last_disconnected; + long long int last_receive_attempt; unsigned int max_tries; unsigned int backoff_free_tries; @@ -109,6 +110,7 @@ reconnect_create(long long int now) fsm->last_activity = now; fsm->last_connected = LLONG_MAX; fsm->last_disconnected = LLONG_MAX; + fsm->last_receive_attempt = now; fsm->max_tries = UINT_MAX; fsm->creation_time = now; @@ -501,6 +503,19 @@ reconnect_activity(struct reconnect *fsm, long long int now) fsm->last_activity = now; } +/* Tell 'fsm' that some attempt to receive data on the connection was made at + * 'now'. The FSM only allows probe interval timer to expire when some attempt + * to receive data on the connection was received after the time when it should + * have expired. This helps in the case where there's a long delay in the poll + * loop and then reconnect_run() executes before the code to try to receive + * anything from the remote runs. (To disable this feature, just call + * reconnect_receive_attempted(fsm, LLONG_MAX).) */ +void +reconnect_receive_attempted(struct reconnect *fsm, long long int now) +{ + fsm->last_receive_attempt = now; +} + static void reconnect_transition__(struct reconnect *fsm, long long int now, enum state state) @@ -541,13 +556,19 @@ reconnect_deadline__(const struct reconnect *fsm) case S_ACTIVE: if (fsm->probe_interval) { long long int base = MAX(fsm->last_activity, fsm->state_entered); - return base + fsm->probe_interval; + long long int expiration = base + fsm->probe_interval; + if (fsm->last_receive_attempt >= expiration) { + return expiration; + } } return LLONG_MAX; case S_IDLE: if (fsm->probe_interval) { - return fsm->state_entered + fsm->probe_interval; + long long int expiration = fsm->state_entered + fsm->probe_interval; + if (fsm->last_receive_attempt >= expiration) { + return expiration; + } } return LLONG_MAX; diff --git a/lib/reconnect.h b/lib/reconnect.h index 9f2d469e2..40cc569c4 100644 --- a/lib/reconnect.h +++ b/lib/reconnect.h @@ -83,6 +83,7 @@ void reconnect_connected(struct reconnect *, long long int now); void reconnect_connect_failed(struct reconnect *, long long int now, int error); void reconnect_activity(struct reconnect *, long long int now); +void reconnect_receive_attempted(struct reconnect *, long long int now); enum reconnect_action { RECONNECT_CONNECT = 1, diff --git a/tests/test-reconnect.c b/tests/test-reconnect.c index 5a14e7fe5..bf0463e25 100644 --- a/tests/test-reconnect.c +++ b/tests/test-reconnect.c @@ -48,6 +48,7 @@ test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED) now = 1000; reconnect = reconnect_create(now); + reconnect_receive_attempted(reconnect, LLONG_MAX); reconnect_set_name(reconnect, "remote"); reconnect_get_stats(reconnect, now, &prev); printf("### t=%d ###\n", now); -- GitLab From 75439c4bdc2372e9e9d3344424102ffeb46c5280 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 20 Nov 2020 10:55:59 -0800 Subject: [PATCH 409/432] ovsdb-idl: Avoid redundant clearing and parsing of received data. ovsdb_idl_db_parse_monitor_reply() clears the IDL and parses the received data. There's no need to do it again afterward. Signed-off-by: Ben Pfaff Fixes: 1b1d2e6daa56 ("ovsdb: Introduce experimental support for clustered databases.") Acked-by: Ilya Maximets --- lib/ovsdb-idl.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index efaa08a1e..fb638c499 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -819,9 +819,6 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg) ovsdb_idl_db_parse_monitor_reply(&idl->data, msg->result, OVSDB_IDL_MM_MONITOR); idl->data.change_seqno++; - ovsdb_idl_clear(idl); - ovsdb_idl_db_parse_update(&idl->data, msg->result, - OVSDB_IDL_MM_MONITOR); break; case IDL_S_MONITORING: -- GitLab From de914f4ee58e8222098f6a5e2553c9add336397c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 20 Nov 2020 23:17:41 -0800 Subject: [PATCH 410/432] ovsdb-idl: Fix memory leak sending messages without a session. When there's no open session, we still have to free the messages that we make but cannot send. I'm not confident that these fix actual bugs, because it seems possible that these code paths can only be hit when the session is nonnull. Signed-off-by: Ben Pfaff Acked-by: Ilya Maximets --- lib/ovsdb-idl.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index fb638c499..02a49b324 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -710,6 +710,8 @@ ovsdb_idl_send_request(struct ovsdb_idl *idl, struct jsonrpc_msg *request) idl->request_id = json_clone(request->id); if (idl->session) { jsonrpc_session_send(idl->session, request); + } else { + jsonrpc_msg_destroy(request); } } @@ -4489,8 +4491,10 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn) if (!any_updates) { txn->status = TXN_UNCHANGED; json_destroy(operations); - } else if (txn->db->idl->session - && !jsonrpc_session_send( + } else if (!txn->db->idl->session) { + txn->status = TXN_TRY_AGAIN; + json_destroy(operations); + } else if (!jsonrpc_session_send( txn->db->idl->session, jsonrpc_create_request( "transact", operations, &txn->request_id))) { @@ -5198,6 +5202,8 @@ ovsdb_idl_set_lock(struct ovsdb_idl *idl, const char *lock_name) } if (idl->session) { jsonrpc_session_send(idl->session, msg); + } else { + jsonrpc_msg_destroy(msg); } } } -- GitLab From ba67afcf2b95bb3d2a31d0968246b5382f3b2f5c Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 1 Dec 2020 16:54:10 -0800 Subject: [PATCH 411/432] ovsdb-idl: Remove prototype for function that is not defined or used. Signed-off-by: Ben Pfaff Acked-by: Ilya Maximets --- lib/ovsdb-idl.h | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index a1a577664..789337df9 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -76,7 +76,6 @@ void ovsdb_idl_set_lock(struct ovsdb_idl *, const char *lock_name); bool ovsdb_idl_has_lock(const struct ovsdb_idl *); bool ovsdb_idl_is_lock_contended(const struct ovsdb_idl *); -const struct uuid * ovsdb_idl_get_monitor_id(const struct ovsdb_idl *); unsigned int ovsdb_idl_get_seqno(const struct ovsdb_idl *); bool ovsdb_idl_has_ever_connected(const struct ovsdb_idl *); void ovsdb_idl_enable_reconnect(struct ovsdb_idl *); -- GitLab From 47da7fa5a0ae80d86f896dd13ff6c4abc1f2a640 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 1 Dec 2020 16:54:45 -0800 Subject: [PATCH 412/432] ovsdb-idl: Improve prototypes. Adding parameter names makes these prototypes clearer. Signed-off-by: Ben Pfaff Acked-by: Ilya Maximets --- lib/ovsdb-idl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index 789337df9..05bb48d66 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -62,8 +62,8 @@ struct ovsdb_idl *ovsdb_idl_create(const char *remote, bool retry); struct ovsdb_idl *ovsdb_idl_create_unconnected( const struct ovsdb_idl_class *, bool monitor_everything_by_default); -void ovsdb_idl_set_remote(struct ovsdb_idl *, const char *, bool); -void ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *, bool); +void ovsdb_idl_set_remote(struct ovsdb_idl *, const char *remote, bool retry); +void ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *, bool shuffle); void ovsdb_idl_reset_min_index(struct ovsdb_idl *); void ovsdb_idl_destroy(struct ovsdb_idl *); -- GitLab From 26531558744096ac9cba5c43b0c1571ee8b8d691 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 1 Dec 2020 16:58:24 -0800 Subject: [PATCH 413/432] ovsdb-idl: Add comment. Signed-off-by: Ben Pfaff Acked-by: Ilya Maximets --- lib/ovsdb-idl.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 02a49b324..ee2fbfa3f 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -617,6 +617,14 @@ ovsdb_idl_destroy(struct ovsdb_idl *idl) } } +/* By default, or if 'leader_only' is true, when 'idl' connects to a clustered + * database, the IDL will avoid servers other than the cluster leader. This + * ensures that any data that it reads and reports is up-to-date. If + * 'leader_only' is false, the IDL will accept any server in the cluster, which + * means that for read-only transactions it can report and act on stale data + * (transactions that modify the database are always serialized even with false + * 'leader_only'). Refer to Understanding Cluster Consistency in ovsdb(7) for + * more information. */ void ovsdb_idl_set_leader_only(struct ovsdb_idl *idl, bool leader_only) { -- GitLab From 02f76fb42ae93ecf17dcf012908733dd2d3df2cd Mon Sep 17 00:00:00 2001 From: David Marchand Date: Sat, 19 Dec 2020 09:40:30 +0100 Subject: [PATCH 414/432] github: Fix Ubuntu package installation. Before trying to install a package, APT cache must be updated to avoid asking for an unavailable version of a package. Fixes: 6cb2f5a630e3 ("github: Add GitHub Actions workflow.") Signed-off-by: David Marchand Signed-off-by: Ilya Maximets --- .github/workflows/build-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index c83066138..b29c300c5 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -133,6 +133,8 @@ jobs: path: dpdk-dir key: ${{ env.matrix_key }}-${{ env.ci_key }} + - name: update APT cache + run: sudo apt update - name: install common dependencies if: matrix.deb_package == '' run: sudo apt install -y ${{ env.dependencies }} -- GitLab From ced0d8fb9267139226e5e4157eac9475c3d4086c Mon Sep 17 00:00:00 2001 From: XiaoXiong Ding Date: Wed, 30 Sep 2020 14:44:00 +0800 Subject: [PATCH 415/432] ofproto-dpif-xlate: Stop forwarding MLD reports to group ports. According with rfc4541 section 2.1.1, a snooping switch should forward membership reports only to ports with routers attached.The current code violates the RFC forwarding membership reports to group ports as well. The same issue doesn't exist with IPv4. Fixes: 06994f879c ("mcast-snooping: Add Multicast Listener Discovery support") Signed-off-by: XiaoXiong Ding Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- ofproto/ofproto-dpif-xlate.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 11aa20754..4ea776052 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3100,6 +3100,7 @@ xlate_normal(struct xlate_ctx *ctx) xlate_report(ctx, OFT_DETAIL, "MLD query, flooding"); xlate_normal_flood(ctx, in_xbundle, &xvlan); } + return; } else { if (is_ip_local_multicast(flow, wc)) { /* RFC4541: section 2.1.2, item 2: Packets with a dst IP -- GitLab From a27d70a8984879bc0a66afc2d7c35149659be24d Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Mon, 12 Oct 2020 07:38:38 -0400 Subject: [PATCH 416/432] conntrack: add generic IP protocol support Currently, userspace conntrack only tracks TCP, UDP, and ICMP, and all other IP protocols are discarded, and the +inv state is returned. This is not in line with the kernel conntrack. Where if no L4 information can be extracted it's treated as generic L3. The change below mimics the behavior of the kernel. Signed-off-by: Eelco Chaudron Acked-by: Flavio Leitner Signed-off-by: Ilya Maximets --- NEWS | 3 +++ lib/conntrack-private.h | 3 +++ lib/conntrack.c | 29 +++++++++++++++++++---------- tests/system-traffic.at | 29 +++++++++++++++++++++++++++++ 4 files changed, 54 insertions(+), 10 deletions(-) diff --git a/NEWS b/NEWS index 1a39cc661..e62d815fa 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ Post-v2.14.0 restricts a flow dump to a single PMD thread if set. * New 'options:dpdk-vf-mac' field for DPDK interface of VF ports, that allows configuring the MAC address of a VF representor. + * Add generic IP protocol support to conntrack. With this change, all + none UDP, TCP, and ICMP traffic will be treated as general L3 + traffic, i.e. using 3 tupples. - The environment variable OVS_UNBOUND_CONF, if set, is now used as the DNS resolver's (unbound) configuration file. - Linux datapath: diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 789af82ff..3895bc688 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -59,6 +59,9 @@ struct conn_key { uint8_t nw_proto; }; +/* Verify that nw_proto stays uint8_t as it's used to index into l4_protos[] */ +BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct conn_key, nw_proto) == sizeof(uint8_t)); + /* This is used for alg expectations; an expectation is a * context created in preparation for establishing a data * connection. The expectation is created by the control diff --git a/lib/conntrack.c b/lib/conntrack.c index 930ed0be6..bba38f9f5 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -146,12 +146,7 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx, static void expectation_clean(struct conntrack *ct, const struct conn_key *parent_key); -static struct ct_l4_proto *l4_protos[] = { - [IPPROTO_TCP] = &ct_proto_tcp, - [IPPROTO_UDP] = &ct_proto_other, - [IPPROTO_ICMP] = &ct_proto_icmp4, - [IPPROTO_ICMPV6] = &ct_proto_icmp6, -}; +static struct ct_l4_proto *l4_protos[UINT8_MAX + 1]; static void handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx, @@ -293,6 +288,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg, struct conntrack * conntrack_init(void) { + static struct ovsthread_once setup_l4_once = OVSTHREAD_ONCE_INITIALIZER; struct conntrack *ct = xzalloc(sizeof *ct); ovs_rwlock_init(&ct->resources_lock); @@ -320,6 +316,18 @@ conntrack_init(void) ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct); ct->ipf = ipf_init(); + /* Initialize the l4 protocols. */ + if (ovsthread_once_start(&setup_l4_once)) { + for (int i = 0; i < ARRAY_SIZE(l4_protos); i++) { + l4_protos[i] = &ct_proto_other; + } + /* IPPROTO_UDP uses ct_proto_other, so no need to initialize it. */ + l4_protos[IPPROTO_TCP] = &ct_proto_tcp; + l4_protos[IPPROTO_ICMP] = &ct_proto_icmp4; + l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6; + + ovsthread_once_done(&setup_l4_once); + } return ct; } @@ -1982,9 +1990,10 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related, return (!related || check_l4_icmp6(key, data, size, l3, validate_checksum)) && extract_l4_icmp6(key, data, size, related); - } else { - return false; } + + /* For all other protocols we do not have L4 keys, so keep them zero. */ + return true; } static bool @@ -2267,8 +2276,8 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn, conn->nat_info->nat_action & NAT_ACTION_SRC_PORT ? true : false; union ct_addr first_addr = ct_addr; - bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP && - conn->key.nw_proto != IPPROTO_ICMPV6; + bool pat_enabled = conn->key.nw_proto == IPPROTO_TCP || + conn->key.nw_proto == IPPROTO_UDP; while (true) { if (conn->nat_info->nat_action & NAT_ACTION_SRC) { diff --git a/tests/system-traffic.at b/tests/system-traffic.at index 14f349b5b..d2a4dbffe 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -2333,6 +2333,35 @@ NXST_FLOW reply: OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP +AT_SETUP([conntrack - generic IP protocol]) +CHECK_CONNTRACK() +OVS_TRAFFIC_VSWITCHD_START() +AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg ofproto_dpif_upcall:dbg]) + +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24") +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24") + +AT_DATA([flows.txt], [dnl +table=0, priority=1,action=drop +table=0, priority=10,arp,action=normal +table=0, priority=100,ip,action=ct(table=1) +table=1, priority=100,in_port=1,ip,ct_state=+trk+new,action=ct(commit) +table=1, priority=100,in_port=1,ct_state=+trk+est,action=normal +]) + +AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) + +AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=01005e00001200005e000101080045c0002800000000ff7019cdc0a8001ee0000012210164010001ba52c0a800010000000000000000000000000000 actions=resubmit(,0)"]) + +AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=192\.168\.0\.30,"], [], [dnl +112,orig=(src=192.168.0.30,dst=224.0.0.18,sport=0,dport=0),reply=(src=224.0.0.18,dst=192.168.0.30,sport=0,dport=0) +]) + +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + AT_SETUP([conntrack - ICMP related]) AT_SKIP_IF([test $HAVE_NC = no]) CHECK_CONNTRACK() -- GitLab From e8451e1443e7c677190da9ddce5dbd4dfffc3590 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 25 Nov 2020 11:12:59 +0100 Subject: [PATCH 417/432] raft: Add some debugging information to cluster/status command. Introduce the following info useful for cluster debugging to cluster/status command: - time elapsed from last start/complete election - election trigger (e.g. timeout) - number of disconnections - time elapsed from last raft messaged received Acked-by: Dumitru Ceara Signed-off-by: Lorenzo Bianconi Signed-off-by: Ilya Maximets --- ovsdb/raft-private.h | 2 ++ ovsdb/raft.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h index 76b097b89..a69e37e5c 100644 --- a/ovsdb/raft-private.h +++ b/ovsdb/raft-private.h @@ -90,6 +90,8 @@ struct raft_server { /* For use in adding and removing servers: */ struct uuid requester_sid; /* Nonzero if requested via RPC. */ struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */ + + long long int last_msg_ts; /* Last received msg timestamp in ms. */ }; void raft_server_destroy(struct raft_server *); diff --git a/ovsdb/raft.c b/ovsdb/raft.c index 760dfca6d..ea91d1fdb 100644 --- a/ovsdb/raft.c +++ b/ovsdb/raft.c @@ -264,6 +264,12 @@ struct raft { long long int election_base; /* Time of last heartbeat from leader. */ long long int election_timeout; /* Time at which we start an election. */ + long long int election_start; /* Start election time. */ + long long int election_won; /* Time of election completion. */ + bool leadership_transfer; /* Was the leadership transferred? */ + + unsigned int n_disconnections; + /* Used for joining a cluster. */ bool joining; /* Attempting to join the cluster? */ struct sset remote_addresses; /* Addresses to try to find other servers. */ @@ -1708,6 +1714,10 @@ raft_start_election(struct raft *raft, bool leadership_transfer) raft->n_votes = 0; + raft->election_start = time_msec(); + raft->election_won = 0; + raft->leadership_transfer = leadership_transfer; + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); if (!VLOG_DROP_INFO(&rl)) { long long int now = time_msec(); @@ -1857,6 +1867,7 @@ raft_run(struct raft *raft) struct raft_conn *next; LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) { if (!raft_conn_should_stay_open(raft, conn)) { + raft->n_disconnections++; raft_conn_close(conn); } } @@ -2597,6 +2608,7 @@ raft_become_leader(struct raft *raft) ovs_assert(raft->role != RAFT_LEADER); raft->role = RAFT_LEADER; + raft->election_won = time_msec(); raft_set_leader(raft, &raft->sid); raft_reset_election_timer(raft); raft_reset_ping_timer(raft); @@ -3714,6 +3726,7 @@ raft_handle_add_server_request(struct raft *raft, s->requester_sid = rq->common.sid; s->requester_conn = NULL; s->phase = RAFT_PHASE_CATCHUP; + s->last_msg_ts = time_msec(); /* Start sending the log. If this is the first time we've tried to add * this server, then this will quickly degenerate into an InstallSnapshot @@ -4273,6 +4286,11 @@ raft_handle_execute_command_reply( static void raft_handle_rpc(struct raft *raft, const union raft_rpc *rpc) { + struct raft_server *s = raft_find_server(raft, &rpc->common.sid); + if (s) { + s->last_msg_ts = time_msec(); + } + uint64_t term = raft_rpc_get_term(rpc); if (term && !raft_should_suppress_disruptive_server(raft, rpc) @@ -4485,6 +4503,17 @@ raft_unixctl_status(struct unixctl_conn *conn, raft_put_sid("Vote", &raft->vote, raft, &s); ds_put_char(&s, '\n'); + if (raft->election_start) { + ds_put_format(&s, + "Last Election started %"PRIu64" ms ago, reason: %s\n", + (uint64_t) (time_msec() - raft->election_start), + raft->leadership_transfer + ? "leadership_transfer" : "timeout"); + } + if (raft->election_won) { + ds_put_format(&s, "Last Election won: %"PRIu64" ms ago\n", + (uint64_t) (time_msec() - raft->election_won)); + } ds_put_format(&s, "Election timer: %"PRIu64, raft->election_timer); if (raft->role == RAFT_LEADER && raft->election_timer_new) { ds_put_format(&s, " (changing to %"PRIu64")", @@ -4512,6 +4541,8 @@ raft_unixctl_status(struct unixctl_conn *conn, } ds_put_char(&s, '\n'); + ds_put_format(&s, "Disconnections: %u\n", raft->n_disconnections); + ds_put_cstr(&s, "Servers:\n"); struct raft_server *server; HMAP_FOR_EACH (server, hmap_node, &raft->servers) { @@ -4536,6 +4567,10 @@ raft_unixctl_status(struct unixctl_conn *conn, ds_put_format(&s, " next_index=%"PRIu64" match_index=%"PRIu64, server->next_index, server->match_index); } + if (server->last_msg_ts) { + ds_put_format(&s, " last msg %"PRIu64" ms ago", + (uint64_t) (time_msec() - server->last_msg_ts)); + } ds_put_char(&s, '\n'); } -- GitLab From b2b7e388f4d75baaa894e0433d51e09f944f2c5a Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 17 Dec 2020 18:20:05 +0100 Subject: [PATCH 418/432] test-stream: Silence memory leak report. AddressSanitizer reports this as a leak. Let's just free the memory before exiting to avoid the noise. 'stream_close()' doesn't update the pointer, so this will not change the return value. Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner Acked-by: Paolo Valerio --- tests/test-stream.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test-stream.c b/tests/test-stream.c index 4af44200e..68ce2c544 100644 --- a/tests/test-stream.c +++ b/tests/test-stream.c @@ -42,5 +42,6 @@ main(int argc, char *argv[]) VLOG_ERR("stream_open_block(%s) failure: %s", argv[1], ovs_strerror(error)); } + stream_close(stream); return (error || !stream) ? 1 : 0; } -- GitLab From 9eeb44aadd63be7a1ef9f7fe5cfef8005935313b Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Thu, 17 Dec 2020 18:22:12 +0100 Subject: [PATCH 419/432] ovsdb-tool: Fix datum leak in the show-log command. Fixes: 4e92542cefb7 ("ovsdb-tool: Make "show-log" convert raw JSON to easier-to-read syntax.") Signed-off-by: Ilya Maximets Acked-by: Dumitru Ceara --- ovsdb/ovsdb-tool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c index 30d0472b2..1b49b6fc8 100644 --- a/ovsdb/ovsdb-tool.c +++ b/ovsdb/ovsdb-tool.c @@ -720,6 +720,7 @@ print_db_changes(struct shash *tables, struct smap *names, ds_init(&s); ovsdb_datum_to_string(&datum, type, &s); value_string = ds_steal_cstr(&s); + ovsdb_datum_destroy(&datum, type); } else { ovsdb_error_destroy(error); } -- GitLab From a35e9ab30215cb1841707704dee0658bb714ec44 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Fri, 18 Dec 2020 18:16:14 +0100 Subject: [PATCH 420/432] NEWS: Move '--offload-stats' entry to correct release. Patch landed to 2.13, not 2.12. Fixes: 164413156cf9 ("Add offload packets statistics") Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner --- NEWS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e62d815fa..2ac7c00d1 100644 --- a/NEWS +++ b/NEWS @@ -126,6 +126,9 @@ v2.13.0 - 14 Feb 2020 - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and type filter supports new filters: "dpdk" and "partially-offloaded". + - Add new argument '--offload-stats' for command + 'ovs-appctl bridge/dump-flows', + so it can display offloaded packets statistics. v2.12.0 - 03 Sep 2019 --------------------- @@ -200,9 +203,6 @@ v2.12.0 - 03 Sep 2019 * Add support for conntrack zone-based timeout policy. - 'ovs-dpctl dump-flows' is no longer suitable for dumping offloaded flows. 'ovs-appctl dpctl/dump-flows' should be used instead. - - Add new argument '--offload-stats' for command - 'ovs-appctl bridge/dump-flows', - so it can display offloaded packets statistics. - Add L2 GRE tunnel over IPv6 support. v2.11.0 - 19 Feb 2019 -- GitLab From 731378733619086403dee9d88b3bf34cee2544f9 Mon Sep 17 00:00:00 2001 From: Brad Cowie Date: Wed, 12 Sep 2018 11:53:28 +1200 Subject: [PATCH 421/432] Update tutorial for newer versions of Faucet and Open vSwitch. Newer versions of Faucet use a dynamic OpenFlow pipeline based on what features are enabled in the configuration file. Update log output, flow table dumps and explanations to be consistent with newer Faucet versions. Remove mentions of bugs that we have since fixed in Faucet since the tutorial was originally written. Adds documentation on changes to Open vSwitch commands to recommend using a version that is compatible with the features of the tutorial. Reported-by: Matthias Ableidinger Reported-at: https://mail.openvswitch.org/pipermail/ovs-discuss/2018-August/047180.html Signed-off-by: Brad Cowie Signed-off-by: Ilya Maximets --- Documentation/tutorials/faucet.rst | 596 +++++++++++++++-------------- 1 file changed, 304 insertions(+), 292 deletions(-) diff --git a/Documentation/tutorials/faucet.rst b/Documentation/tutorials/faucet.rst index b7bfb575b..6aa4d39aa 100644 --- a/Documentation/tutorials/faucet.rst +++ b/Documentation/tutorials/faucet.rst @@ -68,9 +68,9 @@ approaches: $ git clone https://github.com/openvswitch/ovs.git $ cd ovs - The default checkout is the master branch. You can check out a tag - (such as v2.8.0) or a branch (such as origin/branch-2.8), if you - prefer. + The default checkout is the master branch. You will need to use the master + branch for this tutorial as it includes some functionality required for this + tutorial. 2. If you do not already have an installed copy of Open vSwitch on your system, or if you do not want to use it for the sandbox (the sandbox will not @@ -80,6 +80,13 @@ approaches: $ tutorial/ovs-sandbox + .. note:: + + The default behaviour for some of the commands used in this tutorial + changed in Open vSwitch versions 2.9.x and 2.10.x which breaks the + tutorial. We recommend following step 3 and building master from + source or using a system Open vSwitch that is version 2.8.x or older. + If it is successful, you will find yourself in a subshell environment, which is the sandbox (you can exit with ``exit`` or Control+D). If so, you're finished and do not need to complete the rest of the steps. If it fails, @@ -90,6 +97,12 @@ approaches: tutorial, there is no need to compile the Linux kernel module, or to use any of the optional libraries such as OpenSSL, DPDK, or libcap-ng. + If you are using a Linux system that uses apt and have some ``deb-src`` + repos listed in ``/etc/apt/sources.list``, often an easy way to install + the build dependencies for a package is to use ``build-dep``:: + + $ sudo apt-get build-dep openvswitch + 4. Configure and build Open vSwitch:: $ ./boot.sh @@ -130,7 +143,7 @@ between one and the other. 2. Build a docker container image:: - $ docker build -t faucet/faucet . + $ sudo docker build -t faucet/faucet -f Dockerfile.faucet . This will take a few minutes. @@ -147,7 +160,7 @@ between one and the other. 4. Create a container and start Faucet:: - $ docker run -d --name faucet --restart=always -v $(pwd)/inst/:/etc/faucet/ -v $(pwd)/inst/:/var/log/faucet/ -p 6653:6653 -p 9302:9302 faucet/faucet + $ sudo docker run -d --name faucet --restart=always -v $(pwd)/inst/:/etc/faucet/ -v $(pwd)/inst/:/var/log/faucet/ -p 6653:6653 -p 9302:9302 faucet/faucet 5. Look in ``inst/faucet.log`` to verify that Faucet started. It will probably start with an exception and traceback because we have not @@ -156,17 +169,17 @@ between one and the other. 6. Later on, to make a new or updated Faucet configuration take effect quickly, you can run:: - $ docker exec faucet pkill -HUP -f faucet.faucet + $ sudo docker exec faucet pkill -HUP -f faucet.faucet Another way is to stop and start the Faucet container:: - $ docker restart faucet + $ sudo docker restart faucet You can also stop and delete the container; after this, to start it again, you need to rerun the ``docker run`` command:: - $ docker stop faucet - $ docker rm faucet + $ sudo docker stop faucet + $ sudo docker rm faucet Overview -------- @@ -260,17 +273,16 @@ to be 0x1. This also sets high MAC learning and ARP timeouts. The defaults are 5 minutes and about 8 minutes, which are fine in production but - sometimes too fast for manual experimentation. (Don't use a timeout - bigger than about 65000 seconds because it will crash Faucet.) + sometimes too fast for manual experimentation. Now restart Faucet so that the configuration takes effect, e.g.:: - $ docker restart faucet + $ sudo docker restart faucet Assuming that the configuration update is successful, you should now see a new line at the end of ``inst/faucet.log``:: - Jan 06 15:14:35 faucet INFO Add new datapath DPID 1 (0x1) + Sep 10 06:44:10 faucet INFO Add new datapath DPID 1 (0x1) Faucet is now waiting for a switch with datapath ID 0x1 to connect to it over OpenFlow, so our next step is to create a switch with OVS and @@ -319,18 +331,24 @@ information, run ``man ovs-vswitchd.conf.db`` and search for means that, for example, there is never a time when the controller is set but it has not yet been configured as out-of-band. +Faucet requires ports to be in the up state before it will configure them. In +Open vSwitch versions earlier than 2.11.0 dummy ports started in the down state. +You will need to force them to come up with the following ``ovs-appctl`` command +(please skip this step if using a newer version of Open vSwitch):: + + $ ovs-appctl netdev-dummy/set-admin-state up + Now, if you look at ``inst/faucet.log`` again, you should see that Faucet recognized and configured the new switch and its ports:: - Jan 06 15:17:10 faucet INFO DPID 1 (0x1) connected - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Cold start configuring DP - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Configuring VLAN 100 vid:100 ports:Port 1,Port 2,Port 3 - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Configuring VLAN 200 vid:200 ports:Port 4,Port 5 - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 1 up, configuring - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 2 up, configuring - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 3 up, configuring - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 4 up, configuring - Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 5 up, configuring + Sep 10 06:45:03 faucet.valve INFO DPID 1 (0x1) switch-1 Cold start configuring DP + Sep 10 06:45:03 faucet.valve INFO DPID 1 (0x1) switch-1 Configuring VLAN 100 vid:100 ports:Port 1,Port 2,Port 3 + Sep 10 06:45:03 faucet.valve INFO DPID 1 (0x1) switch-1 Configuring VLAN 200 vid:200 ports:Port 4,Port 5 + Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 1 (1) up + Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 2 (2) up + Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 3 (3) up + Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 4 (4) up + Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 5 (5) up Over on the Open vSwitch side, you can see a lot of related activity if you take a look in ``sandbox/ovs-vswitchd.log``. For example, here @@ -340,51 +358,48 @@ ports and capabilities:: rconn|INFO|br0<->tcp:127.0.0.1:6653: connecting... vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_HELLO (OF1.4) (xid=0x1): version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05 - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_HELLO (OF1.3) (xid=0x2f24810a): + vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_HELLO (OF1.3) (xid=0xdb9dab08): version bitmap: 0x01, 0x02, 0x03, 0x04 vconn|DBG|tcp:127.0.0.1:6653: negotiated OpenFlow version 0x04 (we support version 0x05 and earlier, peer supports version 0x04 and earlier) rconn|INFO|br0<->tcp:127.0.0.1:6653: connected - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_ECHO_REQUEST (OF1.3) (xid=0x2f24810b): 0 bytes of payload - vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_ECHO_REPLY (OF1.3) (xid=0x2f24810b): 0 bytes of payload - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FEATURES_REQUEST (OF1.3) (xid=0x2f24810c): - vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_FEATURES_REPLY (OF1.3) (xid=0x2f24810c): dpid:0000000000000001 - n_tables:254, n_buffers:0 - capabilities: FLOW_STATS TABLE_STATS PORT_STATS GROUP_STATS QUEUE_STATS - vconn|DBG|tcp:127.0.0.1:6653: received: OFPST_PORT_DESC request (OF1.3) (xid=0x2f24810d): port=ANY - vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPST_PORT_DESC reply (OF1.3) (xid=0x2f24810d): + vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FEATURES_REQUEST (OF1.3) (xid=0xdb9dab09): + 00040|vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_FEATURES_REPLY (OF1.3) (xid=0xdb9dab09): dpid:0000000000000001 + n_tables:254, n_buffers:0 + capabilities: FLOW_STATS TABLE_STATS PORT_STATS GROUP_STATS QUEUE_STATS + vconn|DBG|tcp:127.0.0.1:6653: received: OFPST_PORT_DESC request (OF1.3) (xid=0xdb9dab0a): port=ANY + vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPST_PORT_DESC reply (OF1.3) (xid=0xdb9dab0a): 1(p1): addr:aa:55:aa:55:00:14 - config: PORT_DOWN - state: LINK_DOWN + config: 0 + state: LIVE speed: 0 Mbps now, 0 Mbps max 2(p2): addr:aa:55:aa:55:00:15 - config: PORT_DOWN - state: LINK_DOWN + config: 0 + state: LIVE speed: 0 Mbps now, 0 Mbps max 3(p3): addr:aa:55:aa:55:00:16 - config: PORT_DOWN - state: LINK_DOWN + config: 0 + state: LIVE speed: 0 Mbps now, 0 Mbps max 4(p4): addr:aa:55:aa:55:00:17 - config: PORT_DOWN - state: LINK_DOWN + config: 0 + state: LIVE speed: 0 Mbps now, 0 Mbps max 5(p5): addr:aa:55:aa:55:00:18 - config: PORT_DOWN - state: LINK_DOWN + config: 0 + state: LIVE speed: 0 Mbps now, 0 Mbps max - LOCAL(br0): addr:c6:64:ff:59:48:41 - config: PORT_DOWN - state: LINK_DOWN + LOCAL(br0): addr:42:51:a1:c4:97:45 + config: 0 + state: LIVE speed: 0 Mbps now, 0 Mbps max After that, you can see Faucet delete all existing flows and then start adding new ones:: - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0x2f24810e): DEL table:255 priority=0 actions=drop - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_BARRIER_REQUEST (OF1.3) (xid=0x2f24810f): - vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_BARRIER_REPLY (OF1.3) (xid=0x2f24810f): - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0x2f248110): ADD priority=0 cookie:0x5adc15c0 out_port:0 actions=drop - vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0x2f248111): ADD table:1 priority=0 cookie:0x5adc15c0 out_port:0 actions=drop + vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab0f): DEL table:255 priority=0 actions=drop + vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab10): ADD priority=0 cookie:0x5adc15c0 out_port:0 actions=drop + vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab11): ADD table:1 priority=0 cookie:0x5adc15c0 out_port:0 actions=goto_table:2 + vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab12): ADD table:2 priority=0 cookie:0x5adc15c0 out_port:0 actions=goto_table:3 ... OpenFlow Layer @@ -393,7 +408,8 @@ OpenFlow Layer Let's take a look at the OpenFlow tables that Faucet set up. Before we do that, it's helpful to take a look at ``docs/architecture.rst`` in the Faucet documentation to learn how Faucet structures its flow -tables. In summary, this document says: +tables. In summary, this document says that when all features are enabled +our table layout will be: Table 0 Port-based ACLs @@ -456,38 +472,43 @@ this:: $ dump-flows br0 -First, table 0 has a flow that just jumps to table 1 for each -configured port, and drops other unrecognized packets. Presumably it -will do more if we configured port-based ACLs:: +To reduce resource utilisation on hardware switches, Faucet will try to install +the minimal set of OpenFlow tables to match the features enabled in +``faucet.yaml``. Since we have only enabled switching we will end up +with 4 tables. If we inspect the contents of ``inst/faucet.log`` Faucet will +tell us what each table does:: - priority=9099,in_port=p1 actions=goto_table:1 - priority=9099,in_port=p2 actions=goto_table:1 - priority=9099,in_port=p3 actions=goto_table:1 - priority=9099,in_port=p4 actions=goto_table:1 - priority=9099,in_port=p5 actions=goto_table:1 - priority=0 actions=drop + Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 0 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: vlan next_tables: ['eth_src'] output: True set_fields: ('vlan_vid',) size: 32 table_id: 0 vlan_port_scale: 1.5 + Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 1 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_src', False), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: eth_dst name: eth_src next_tables: ['eth_dst', 'flood'] output: True set_fields: ('vlan_vid', 'eth_dst') size: 32 table_id: 1 vlan_port_scale: 4.1 + Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 2 table config dec_ttl: None exact_match: True match_types: (('eth_dst', False), ('vlan_vid', False)) meter: None miss_goto: flood name: eth_dst next_tables: [] output: True set_fields: None size: 41 table_id: 2 vlan_port_scale: 4.1 + Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 3 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: flood next_tables: [] output: True set_fields: None size: 32 table_id: 3 vlan_port_scale: 2.1 -Table 1, for ingress VLAN processing, has a bunch of flows that drop -inappropriate packets, such as LLDP and STP:: +Currently, we have: - table=1, priority=9099,dl_dst=01:80:c2:00:00:00 actions=drop - table=1, priority=9099,dl_dst=01:00:0c:cc:cc:cd actions=drop - table=1, priority=9099,dl_type=0x88cc actions=drop +Table 0 (vlan) + Ingress VLAN processing + +Table 1 (eth_src) + Ingress L2 processing, MAC learning + +Table 2 (eth_dst) + Egress L2 processing -Table 1 also has some more interesting flows that recognize packets -without a VLAN header on each of our ports -(``vlan_tci=0x0000/0x1fff``), push on the VLAN configured for the -port, and proceed to table 3. Presumably these skip table 2 because -we did not configure any VLAN-based ACLs. There is also a fallback -flow to drop other packets, which in practice means that if any -received packet already has a VLAN header then it will be dropped:: +Table 3 (flood) + Flooding - table=1, priority=9000,in_port=p1,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:3 - table=1, priority=9000,in_port=p2,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:3 - table=1, priority=9000,in_port=p3,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:3 - table=1, priority=9000,in_port=p4,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:3 - table=1, priority=9000,in_port=p5,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:3 - table=1, priority=0 actions=drop +In Table 0 we see flows that recognize packets without a VLAN header on each of +our ports (``vlan_tci=0x0000/0x1fff``), push on the VLAN configured for the +port, and proceed to table 3. There is also a fallback flow to drop other +packets, which in practice means that if any received packet already has a +VLAN header then it will be dropped:: + + priority=9000,in_port=p1,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:1 + priority=9000,in_port=p2,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:1 + priority=9000,in_port=p3,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:1 + priority=9000,in_port=p4,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:1 + priority=9000,in_port=p5,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:1 + priority=0 actions=drop .. note:: @@ -497,82 +518,54 @@ received packet already has a VLAN header then it will be dropped:: since 4196 is 0x1064, this action sets VLAN value 0x64, which in decimal is 100. -Table 2 isn't used because there are no VLAN-based ACLs. It just has -a drop flow:: +Table 1 starts off with a flow that drops some inappropriate packets, +in this case EtherType 0x9000 (Ethernet Configuration Testing Protocol), +which should not be forwarded by a switch:: - table=2, priority=0 actions=drop + table=1, priority=9099,dl_type=0x9000 actions=drop -Table 3 is used for MAC learning but the controller hasn't learned any -MAC yet. It also drops some inappropriate packets such as those that claim -to be from a broadcast source address (why not from all multicast source -addresses, though?). We'll come back here later:: +Table 1 is primarily used for MAC learning but the controller hasn't learned +any MAC addresses yet. It also drops some more inappropriate packets such as +those that claim to be from a broadcast source address (why not from all +multicast source addresses, though?). We'll come back here later:: - table=3, priority=9099,dl_src=ff:ff:ff:ff:ff:ff actions=drop - table=3, priority=9001,dl_src=0e:00:00:00:00:01 actions=drop - table=3, priority=0 actions=drop - table=3, priority=9000 actions=CONTROLLER:96,goto_table:7 + table=1, priority=9099,dl_src=ff:ff:ff:ff:ff:ff actions=drop + table=1, priority=9001,dl_src=0e:00:00:00:00:01 actions=drop + table=1, priority=9000,dl_vlan=100 actions=CONTROLLER:96,goto_table:2 + table=1, priority=9000,dl_vlan=200 actions=CONTROLLER:96,goto_table:2 + table=1, priority=0 actions=goto_table:2 -Tables 4, 5, and 6 aren't used because we haven't configured any -routing:: +Table 2 is used to direct packets to learned MACs but Faucet hasn't +learned any MACs yet, so it just sends all the packets along to table 3:: - table=4, priority=0 actions=drop - table=5, priority=0 actions=drop - table=6, priority=0 actions=drop + table=2, priority=0 actions=goto_table:3 -Table 7 is used to direct packets to learned MACs but Faucet hasn't -learned any MACs yet, so it just sends all the packets along to table -8:: +Table 3 does some more dropping of packets we don't want to forward, +in this case STP:: - table=7, priority=0 actions=drop - table=7, priority=9000 actions=goto_table:8 + table=3, priority=9099,dl_dst=01:00:0c:cc:cc:cd actions=drop + table=3, priority=9099,dl_dst=01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 actions=drop -Table 8 implements flooding, broadcast, and multicast. The flows for +Table 3 implements flooding, broadcast, and multicast. The flows for broadcast and flood are easy to understand: if the packet came in on a given port and needs to be flooded or broadcast, output it to all the other ports in the same VLAN:: - table=8, priority=9008,in_port=p1,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p2,output:p3 - table=8, priority=9008,in_port=p2,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p3 - table=8, priority=9008,in_port=p3,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p2 - table=8, priority=9008,in_port=p4,dl_vlan=200,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p5 - table=8, priority=9008,in_port=p5,dl_vlan=200,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p4 - table=8, priority=9000,in_port=p1,dl_vlan=100 actions=pop_vlan,output:p2,output:p3 - table=8, priority=9000,in_port=p2,dl_vlan=100 actions=pop_vlan,output:p1,output:p3 - table=8, priority=9000,in_port=p3,dl_vlan=100 actions=pop_vlan,output:p1,output:p2 - table=8, priority=9000,in_port=p4,dl_vlan=200 actions=pop_vlan,output:p5 - table=8, priority=9000,in_port=p5,dl_vlan=200 actions=pop_vlan,output:p4 - -.. note:: - - These flows could apparently be simpler because OpenFlow says that - ``output:`` is ignored if ```` is the input port. That - means that the first three flows above could apparently be collapsed - into just:: - - table=8, priority=9008,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p2,output:p3 - - There might be some reason why this won't work or isn't practical, - but that isn't obvious from looking at the flow table. + table=3, priority=9004,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p2,output:p3 + table=3, priority=9004,dl_vlan=200,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p4,output:p5 + table=3, priority=9000,dl_vlan=100 actions=pop_vlan,output:p1,output:p2,output:p3 + table=3, priority=9000,dl_vlan=200 actions=pop_vlan,output:p4,output:p5 There are also some flows for handling some standard forms of multicast, and a fallback drop flow:: - table=8, priority=9006,in_port=p1,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p2,output:p3 - table=8, priority=9006,in_port=p2,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p1,output:p3 - table=8, priority=9006,in_port=p3,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p1,output:p2 - table=8, priority=9006,in_port=p4,dl_vlan=200,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p5 - table=8, priority=9006,in_port=p5,dl_vlan=200,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p4 - table=8, priority=9002,in_port=p1,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p2,output:p3 - table=8, priority=9002,in_port=p2,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p3 - table=8, priority=9002,in_port=p3,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2 - table=8, priority=9004,in_port=p1,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p2,output:p3 - table=8, priority=9004,in_port=p2,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p3 - table=8, priority=9004,in_port=p3,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2 - table=8, priority=9002,in_port=p4,dl_vlan=200,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p5 - table=8, priority=9002,in_port=p5,dl_vlan=200,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4 - table=8, priority=9004,in_port=p4,dl_vlan=200,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p5 - table=8, priority=9004,in_port=p5,dl_vlan=200,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4 - table=8, priority=0 actions=drop + table=3, priority=9003,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p1,output:p2,output:p3 + table=3, priority=9003,dl_vlan=200,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p4,output:p5 + table=3, priority=9001,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2,output:p3 + table=3, priority=9002,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2,output:p3 + table=3, priority=9001,dl_vlan=200,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4,output:p5 + table=3, priority=9002,dl_vlan=200,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4,output:p5 + table=3, priority=0 actions=drop Tracing ~~~~~~~ @@ -602,25 +595,25 @@ trivial example:: bridge("br0") ------------- - 0. in_port=1, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid - goto_table:3 - 3. priority 9000, cookie 0x5adc15c0 + goto_table:1 + 1. dl_vlan=100, priority 9000, cookie 0x5adc15c0 CONTROLLER:96 - goto_table:7 - 7. priority 9000, cookie 0x5adc15c0 - goto_table:8 - 8. in_port=1,dl_vlan=100, priority 9000, cookie 0x5adc15c0 + goto_table:2 + 2. priority 0, cookie 0x5adc15c0 + goto_table:3 + 3. dl_vlan=100, priority 9000, cookie 0x5adc15c0 pop_vlan + output:1 + >> skipping output to input port output:2 output:3 Final flow: unchanged Megaflow: recirc_id=0,eth,in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x0000 - Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,flags=1,recirc_id=1,rule_cookie=0x5adc15c0,controller_id=0,max_len=96)),pop_vlan,2,3 + Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,dont_send=1,continuation=0,recirc_id=1,rule_cookie=0x5adc15c0,controller_id=0,max_len=96)),pop_vlan,2,3 The first line of output, beginning with ``Flow:``, just repeats our request in a more verbose form, including the L2 fields that were @@ -628,10 +621,10 @@ zeroed. Each of the numbered items under ``bridge("br0")`` shows what would happen to our hypothetical packet in the table with the given number. -For example, we see in table 1 that the packet matches a flow that +For example, we see in table 0 that the packet matches a flow that push on a VLAN header, set the VLAN ID to 100, and goes on to further -processing in table 3. In table 3, the packet gets sent to the -controller to allow MAC learning to take place, and then table 8 +processing in table 1. In table 1, the packet gets sent to the +controller to allow MAC learning to take place, and then table 3 floods the packet to the other ports in the same VLAN. Summary information follows the numbered tables. The packet hasn't @@ -662,7 +655,7 @@ here. But, take a look at ``inst/faucet.log`` now. It should now include a line at the end that says that it learned about our MAC 00:11:11:00:00:00, like this:: - Jan 06 15:56:02 faucet.valve INFO DPID 1 (0x1) L2 learned 00:11:11:00:00:00 (L2 type 0x0000, L3 src None) on Port 1 on VLAN 100 (1 hosts total + Sep 10 08:16:28 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:11:11:00:00:00 (L2 type 0x0000, L3 src None, L3 dst None) Port 1 VLAN 100 (1 hosts total) Now compare the flow tables that we saved to the current ones:: @@ -671,8 +664,8 @@ Now compare the flow tables that we saved to the current ones:: The result should look like this, showing new flows for the learned MACs:: - +table=3 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3601 actions=goto_table:7 - +table=7 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3601 actions=pop_vlan,output:1 + +table=1 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3605 actions=goto_table:2 + +table=2 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3605 actions=pop_vlan,output:1 To demonstrate the usefulness of the learned MAC, try tracing (with side effects) a packet arriving on ``p2`` (or ``p3``) and destined to @@ -686,31 +679,29 @@ address:: bridge("br0") ------------- - 0. in_port=2, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid - goto_table:3 - 3. priority 9000, cookie 0x5adc15c0 + goto_table:1 + 1. dl_vlan=100, priority 9000, cookie 0x5adc15c0 CONTROLLER:96 - goto_table:7 - 7. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0 + goto_table:2 + 2. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0 pop_vlan output:1 If you check ``inst/faucet.log``, you can see that ``p2``'s MAC has been learned too:: - Jan 06 15:58:09 faucet.valve INFO DPID 1 (0x1) L2 learned 00:22:22:00:00:00 (L2 type 0x0000, L3 src None) on Port 2 on VLAN 100 (2 hosts total) + Sep 10 08:17:45 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:22:22:00:00:00 (L2 type 0x0000, L3 src None, L3 dst None) Port 2 VLAN 100 (2 hosts total) Similarly for ``diff-flows``:: $ diff-flows flows1 br0 - +table=3 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3601 actions=goto_table:7 - +table=3 priority=9098,in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00 hard_timeout=3604 actions=goto_table:7 - +table=7 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3601 actions=pop_vlan,output:1 - +table=7 priority=9099,dl_vlan=100,dl_dst=00:22:22:00:00:00 idle_timeout=3604 actions=pop_vlan,output:2 + +table=1 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3605 actions=goto_table:2 + +table=1 priority=9098,in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00 hard_timeout=3599 actions=goto_table:2 + +table=2 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3605 actions=pop_vlan,output:1 + +table=2 priority=9099,dl_vlan=100,dl_dst=00:22:22:00:00:00 idle_timeout=3599 actions=pop_vlan,output:2 Then, if you re-run either of the ``ofproto/trace`` commands (with or without ``-generate``), you can see that the packets go back and forth @@ -721,15 +712,13 @@ without any further MAC learning, e.g.:: bridge("br0") ------------- - 0. in_port=2, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid - goto_table:3 - 3. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0 - goto_table:7 - 7. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0 + goto_table:1 + 1. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0 + goto_table:2 + 2. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0 pop_vlan output:1 @@ -812,15 +801,13 @@ at the most recent ``ofproto/trace`` output:: bridge("br0") ------------- - 0. in_port=2, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid - goto_table:3 - 3. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0 - goto_table:7 - 7. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0 + goto_table:1 + 1. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0 + goto_table:2 + 2. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0 pop_vlan output:1 @@ -844,17 +831,17 @@ megaflow entry includes: visited: ``in_port`` - In tables 0, 1, and 3. + In tables 0 and 1. ``vlan_tci`` - In tables 1, 3, and 7 (``vlan_tci`` includes the VLAN ID and PCP + In tables 0, 1, and 2 (``vlan_tci`` includes the VLAN ID and PCP fields and``dl_vlan`` is just the VLAN ID). ``dl_src`` - In table 3 + In table 1. ``dl_dst`` - In table 7. + In table 2. * All of the fields matched by flows that had to be ruled out to ensure that the ones that actually matched were the highest priority @@ -865,12 +852,12 @@ The last one is important. Notice how the megaflow matches on ``dl_type`` (the Ethernet type). One reason is because of this flow in OpenFlow table 1 (which shows up in ``dump-flows`` output):: - table=1, priority=9099,dl_type=0x88cc actions=drop + table=1, priority=9099,dl_type=0x9000 actions=drop This flow has higher priority than the flow in table 1 that actually matched. This means that, to put it in the megaflow cache, ``ovs-vswitchd`` has to add a match on ``dl_type`` to ensure that the -cache entry doesn't match LLDP packets (with Ethertype 0x88cc). +cache entry doesn't match ECTP packets (with Ethertype 0x9000). .. note:: @@ -935,59 +922,81 @@ each VLAN and define a router between them. The ``dps`` section is unchanged:: router-1: vlans: [100, 200] -Then we restart Faucet:: +Then we can tell Faucet to reload its configuration:: - $ docker restart faucet - -.. note:: - - One should be able to tell Faucet to re-read its configuration file - without restarting it. I sometimes saw anomalous behavior when I - did this, although I didn't characterize it well enough to make a - quality bug report. I found restarting the container to be - reliable. + $ sudo docker exec faucet pkill -HUP -f faucet.faucet OpenFlow Layer ~~~~~~~~~~~~~~ -Back in the OVS sandbox, let's see how the flow table has changed, with:: +Now that we have an additional feature enabled (routing) we will notice some +additional OpenFlow tables if we check ``inst/faucet.log``:: - $ diff-flows flows1 br0 + Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 0 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: vlan next_tables: ['eth_src'] output: True set_fields: ('vlan_vid',) size: 32 table_id: 0 vlan_port_scale: 1.5 + Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 1 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_src', False), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: eth_dst name: eth_src next_tables: ['ipv4_fib', 'vip', 'eth_dst', 'flood'] output: True set_fields: ('vlan_vid', 'eth_dst') size: 32 table_id: 1 vlan_port_scale: 4.1 + Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 2 table config dec_ttl: True exact_match: None match_types: (('eth_type', False), ('ipv4_dst', True), ('vlan_vid', False)) meter: None miss_goto: None name: ipv4_fib next_tables: ['vip', 'eth_dst', 'flood'] output: True set_fields: ('eth_dst', 'eth_src', 'vlan_vid') size: 32 table_id: 2 vlan_port_scale: 3.1 + Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 3 table config dec_ttl: None exact_match: None match_types: (('arp_tpa', False), ('eth_dst', False), ('eth_type', False), ('icmpv6_type', False), ('ip_proto', False)) meter: None miss_goto: None name: vip next_tables: ['eth_dst', 'flood'] output: True set_fields: None size: 32 table_id: 3 vlan_port_scale: None + Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 4 table config dec_ttl: None exact_match: True match_types: (('eth_dst', False), ('vlan_vid', False)) meter: None miss_goto: flood name: eth_dst next_tables: [] output: True set_fields: None size: 41 table_id: 4 vlan_port_scale: 4.1 + Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 5 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: flood next_tables: [] output: True set_fields: None size: 32 table_id: 5 vlan_port_scale: 2.1 + +So now we have an additional FIB and VIP table: + +Table 0 (vlan) + Ingress VLAN processing + +Table 1 (eth_src) + Ingress L2 processing, MAC learning + +Table 2 (ipv4_fib) + L3 forwarding for IPv4 -First, table 3 has new flows to direct ARP packets to table 6 (the +Table 3 (vip) + Virtual IP processing, e.g. for router IP addresses implemented by Faucet + +Table 4 (eth_dst) + Egress L2 processing + +Table 5 (flood) + Flooding + +Back in the OVS sandbox, let's see what new flow rules have been added, with:: + + $ diff-flows flows1 br0 | grep + + +First, table 1 has new flows to direct ARP packets to table 3 (the virtual IP processing table), presumably to handle ARP for the router IPs. New flows also send IP packets destined to a particular Ethernet -address to table 4 (the L3 forwarding table); we can make the educated +address to table 2 (the L3 forwarding table); we can make the educated guess that the Ethernet address is the one used by the Faucet router:: - +table=3 priority=9131,arp,dl_vlan=100 actions=goto_table:6 - +table=3 priority=9131,arp,dl_vlan=200 actions=goto_table:6 - +table=3 priority=9099,ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01 actions=goto_table:4 - +table=3 priority=9099,ip,dl_vlan=200,dl_dst=0e:00:00:00:00:01 actions=goto_table:4 - -The new flows in table 4 appear to be verifying that the packets are -indeed addressed to a network or IP address that Faucet knows how to -route:: - - +table=4 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.254 actions=goto_table:6 - +table=4 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.254 actions=goto_table:6 - +table=4 priority=9123,ip,dl_vlan=100,nw_dst=10.100.0.0/24 actions=goto_table:6 - +table=4 priority=9123,ip,dl_vlan=200,nw_dst=10.100.0.0/24 actions=goto_table:6 - +table=4 priority=9123,ip,dl_vlan=100,nw_dst=10.200.0.0/24 actions=goto_table:6 - +table=4 priority=9123,ip,dl_vlan=200,nw_dst=10.200.0.0/24 actions=goto_table:6 - -Table 6 has a few different things going on. It sends ARP requests -for the router IPs to the controller; presumably the controller will -generate replies and send them back to the requester. It switches -other ARP packets, either broadcasting them if they have a broadcast + +table=1 priority=9131,arp,dl_vlan=100 actions=goto_table:3 + +table=1 priority=9131,arp,dl_vlan=200 actions=goto_table:3 + +table=1 priority=9099,ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01 actions=goto_table:2 + +table=1 priority=9099,ip,dl_vlan=200,dl_dst=0e:00:00:00:00:01 actions=goto_table:2 + +In the new ``ipv4_fib`` table (table 2) there appear to be flows for verifying +that the packets are indeed addressed to a network or IP address that Faucet +knows how to route:: + + +table=2 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.254 actions=goto_table:3 + +table=2 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.254 actions=goto_table:3 + +table=2 priority=9123,ip,dl_vlan=200,nw_dst=10.100.0.0/24 actions=goto_table:3 + +table=2 priority=9123,ip,dl_vlan=100,nw_dst=10.100.0.0/24 actions=goto_table:3 + +table=2 priority=9123,ip,dl_vlan=200,nw_dst=10.200.0.0/24 actions=goto_table:3 + +table=2 priority=9123,ip,dl_vlan=100,nw_dst=10.200.0.0/24 actions=goto_table:3 + +In our new ``vip`` table (table 3) there are a few different things going on. +It sends ARP requests for the router IPs to the controller; presumably the +controller will generate replies and send them back to the requester. +It switches other ARP packets, either broadcasting them if they have a broadcast destination or attempting to unicast them otherwise. It sends all other IP packets to the controller:: - +table=6 priority=9133,arp,arp_tpa=10.100.0.254 actions=CONTROLLER:128 - +table=6 priority=9133,arp,arp_tpa=10.200.0.254 actions=CONTROLLER:128 - +table=6 priority=9132,arp,dl_dst=ff:ff:ff:ff:ff:ff actions=goto_table:8 - +table=6 priority=9131,arp actions=goto_table:7 - +table=6 priority=9130,ip actions=CONTROLLER:128 + +table=3 priority=9133,arp,arp_tpa=10.100.0.254 actions=CONTROLLER:128 + +table=3 priority=9133,arp,arp_tpa=10.200.0.254 actions=CONTROLLER:128 + +table=3 priority=9132,arp,dl_dst=ff:ff:ff:ff:ff:ff actions=goto_table:4 + +table=3 priority=9131,arp actions=goto_table:4 + +table=3 priority=9130,ip actions=CONTROLLER:128 Performance is clearly going to be poor if every packet that needs to be routed has to go to the controller, but it's unlikely that's the @@ -1039,27 +1048,27 @@ The important part of the output is where it shows that the packet was recognized as an ARP request destined to the router gateway and therefore sent to the controller:: - 6. arp,arp_tpa=10.100.0.254, priority 9133, cookie 0x5adc15c0 - CONTROLLER:128 + 3. arp,arp_tpa=10.100.0.254, priority 9133, cookie 0x5adc15c0 + CONTROLLER:128 The Faucet log shows that Faucet learned the host's MAC address, its MAC-to-IP mapping, and responded to the ARP request:: - Jan 06 16:12:23 faucet.valve INFO DPID 1 (0x1) Adding new route 10.100.0.1/32 via 10.100.0.1 (00:01:02:03:04:05) on VLAN 100 - Jan 06 16:12:23 faucet.valve INFO DPID 1 (0x1) Responded to ARP request for 10.100.0.254 from 10.100.0.1 (00:01:02:03:04:05) on VLAN 100 - Jan 06 16:12:23 faucet.valve INFO DPID 1 (0x1) L2 learned 00:01:02:03:04:05 (L2 type 0x0806, L3 src 10.100.0.1) on Port 1 on VLAN 100 (1 hosts total) + Sep 10 08:52:46 faucet.valve INFO DPID 1 (0x1) switch-1 Adding new route 10.100.0.1/32 via 10.100.0.1 (00:01:02:03:04:05) on VLAN 100 + Sep 10 08:52:46 faucet.valve INFO DPID 1 (0x1) switch-1 Resolve response to 10.100.0.254 from 00:01:02:03:04:05 (L2 type 0x0806, L3 src 10.100.0.1, L3 dst 10.100.0.254) Port 1 VLAN 100 + Sep 10 08:52:46 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:01:02:03:04:05 (L2 type 0x0806, L3 src 10.100.0.1, L3 dst 10.100.0.254) Port 1 VLAN 100 (1 hosts total) We can also look at the changes to the flow tables:: $ diff-flows flows2 br0 - +table=3 priority=9098,in_port=1,dl_vlan=100,dl_src=00:01:02:03:04:05 hard_timeout=3600 actions=goto_table:7 - +table=4 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:7 - +table=4 priority=9131,ip,dl_vlan=200,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:7 - +table=7 priority=9099,dl_vlan=100,dl_dst=00:01:02:03:04:05 idle_timeout=3600 actions=pop_vlan,output:1 + +table=1 priority=9098,in_port=1,dl_vlan=100,dl_src=00:01:02:03:04:05 hard_timeout=3605 actions=goto_table:4 + +table=2 priority=9131,ip,dl_vlan=200,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:4 + +table=2 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:4 + +table=4 priority=9099,dl_vlan=100,dl_dst=00:01:02:03:04:05 idle_timeout=3605 actions=pop_vlan,output:1 -The new flows include one in table 3 and one in table 7 for the +The new flows include one in table 1 and one in table 4 for the learned MAC, which have the same forms we saw before. The new flows -in table 4 are different. They matches packets directed to 10.100.0.1 +in table 2 are different. They matches packets directed to 10.100.0.1 (in two VLANs) and forward them to the host by updating the Ethernet source and destination addresses appropriately, decrementing the TTL, and skipping ahead to unicast output in table 7. This means that @@ -1083,7 +1092,7 @@ And dump the reply packet:: $ /usr/sbin/tcpdump -evvvr sandbox/p1.pcap reading from file sandbox/p1.pcap, link-type EN10MB (Ethernet) - 16:14:47.670727 0e:00:00:00:00:01 (oui Unknown) > 00:01:02:03:04:05 (oui Unknown), ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Reply 10.100.0.254 is-at 0e:00:00:00:00:01 (oui Unknown), length 46 + 20:55:13.186932 0e:00:00:00:00:01 (oui Unknown) > 00:01:02:03:04:05 (oui Unknown), ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Reply 10.100.0.254 is-at 0e:00:00:00:00:01 (oui Unknown), length 46 We clearly see the ARP reply, which tells us that the Faucet router's Ethernet address is 0e:00:00:00:00:01 (as we guessed before from the @@ -1105,26 +1114,24 @@ this:: bridge("br0") ------------- - 0. in_port=1, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid + goto_table:1 + 1. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0 + goto_table:2 + 2. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0 goto_table:3 - 3. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0 - goto_table:4 - 4. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0 - goto_table:6 - 6. ip, priority 9130, cookie 0x5adc15c0 + 3. ip, priority 9130, cookie 0x5adc15c0 CONTROLLER:128 Final flow: udp,in_port=1,dl_vlan=100,dl_vlan_pcp=0,vlan_tci1=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=0 Megaflow: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_dst=10.200.0.0/25,nw_frag=no - Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,flags=0,recirc_id=6,rule_cookie=0x5adc15c0,controller_id=0,max_len=128)) + Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=6,rule_cookie=0x5adc15c0,controller_id=0,max_len=128)) Observe that the packet gets recognized as destined to the router, in -table 3, and then as properly destined to the 10.200.0.0/24 network, -in table 4. In table 6, however, it gets sent to the controller. +table 1, and then as properly destined to the 10.200.0.0/24 network, +in table 2. In table 3, however, it gets sent to the controller. Presumably, this is because Faucet has not yet resolved an Ethernet address for the destination host 10.200.0.1. It probably sent out an ARP request. Let's take a look in the next step. @@ -1140,13 +1147,13 @@ Let's make sure:: $ /usr/sbin/tcpdump -evvvr sandbox/p4.pcap reading from file sandbox/p4.pcap, link-type EN10MB (Ethernet) - 16:17:43.174006 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46 + 20:57:31.116097 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46 and:: $ /usr/sbin/tcpdump -evvvr sandbox/p5.pcap reading from file sandbox/p5.pcap, link-type EN10MB (Ethernet) - 16:17:43.174268 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46 + 20:58:04.129735 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46 For good measure, let's make sure that it wasn't sent to ``p3``:: @@ -1164,37 +1171,34 @@ reply:: bridge("br0") ------------- - 0. in_port=4, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=4,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=4,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4296->vlan_vid + goto_table:1 + 1. arp,dl_vlan=200, priority 9131, cookie 0x5adc15c0 goto_table:3 - 3. arp,dl_vlan=200, priority 9131, cookie 0x5adc15c0 - goto_table:6 - 6. arp,arp_tpa=10.200.0.254, priority 9133, cookie 0x5adc15c0 + 3. arp,arp_tpa=10.200.0.254, priority 9133, cookie 0x5adc15c0 CONTROLLER:128 Final flow: arp,in_port=4,dl_vlan=200,dl_vlan_pcp=0,vlan_tci1=0x0000,dl_src=00:10:20:30:40:50,dl_dst=0e:00:00:00:00:01,arp_spa=10.200.0.1,arp_tpa=10.200.0.254,arp_op=2,arp_sha=00:10:20:30:40:50,arp_tha=0e:00:00:00:00:01 - Megaflow: recirc_id=0,eth,arp,in_port=4,vlan_tci=0x0000/0x1fff,dl_dst=0e:00:00:00:00:01,arp_tpa=10.200.0.254 - Datapath actions: push_vlan(vid=200,pcp=0),userspace(pid=0,controller(reason=1,flags=0,recirc_id=7,rule_cookie=0x5adc15c0,controller_id=0,max_len=128)) + Megaflow: recirc_id=0,eth,arp,in_port=4,vlan_tci=0x0000/0x1fff,arp_tpa=10.200.0.254 + Datapath actions: push_vlan(vid=200,pcp=0),userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=7,rule_cookie=0x5adc15c0,controller_id=0,max_len=128)) It shows up in ``inst/faucet.log``:: - Jan 06 03:20:11 faucet.valve INFO DPID 1 (0x1) Adding new route 10.200.0.1/32 via 10.200.0.1 (00:10:20:30:40:50) on VLAN 200 - Jan 06 03:20:11 faucet.valve INFO DPID 1 (0x1) ARP response 10.200.0.1 (00:10:20:30:40:50) on VLAN 200 - Jan 06 03:20:11 faucet.valve INFO DPID 1 (0x1) L2 learned 00:10:20:30:40:50 (L2 type 0x0806, L3 src 10.200.0.1) on Port 4 on VLAN 200 (1 hosts total) + Sep 10 08:59:02 faucet.valve INFO DPID 1 (0x1) switch-1 Adding new route 10.200.0.1/32 via 10.200.0.1 (00:10:20:30:40:50) on VLAN 200 + Sep 10 08:59:02 faucet.valve INFO DPID 1 (0x1) switch-1 Received advert for 10.200.0.1 from 00:10:20:30:40:50 (L2 type 0x0806, L3 src 10.200.0.1, L3 dst 10.200.0.254) Port 4 VLAN 200 + Sep 10 08:59:02 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:10:20:30:40:50 (L2 type 0x0806, L3 src 10.200.0.1, L3 dst 10.200.0.254) Port 4 VLAN 200 (1 hosts total) and in the OVS flow tables:: $ diff-flows flows2 br0 - +table=3 priority=9098,in_port=4,dl_vlan=200,dl_src=00:10:20:30:40:50 hard_timeout=3601 actions=goto_table:7 + +table=1 priority=9098,in_port=4,dl_vlan=200,dl_src=00:10:20:30:40:50 hard_timeout=3598 actions=goto_table:4 ... - +table=4 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:7 - +table=4 priority=9131,ip,dl_vlan=100,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:7 + +table=2 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:4 + +table=2 priority=9131,ip,dl_vlan=100,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:4 ... - +table=4 priority=9123,ip,dl_vlan=100,nw_dst=10.200.0.0/24 actions=goto_table:6 - +table=7 priority=9099,dl_vlan=200,dl_dst=00:10:20:30:40:50 idle_timeout=3601 actions=pop_vlan,output:4 + +table=4 priority=9099,dl_vlan=200,dl_dst=00:10:20:30:40:50 idle_timeout=3598 actions=pop_vlan,output:4 Step 6: IP Packet Delivery ++++++++++++++++++++++++++ @@ -1213,25 +1217,23 @@ for the original sending host to re-send the packet. We can do that by re-running the trace:: $ ovs-appctl ofproto/trace br0 in_port=p1,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,udp,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_ttl=64 -generate - Flow: udp,in_port=1,vlan_tci=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=0 + Flow: udp,in_port=1,vlan_tci=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=0 bridge("br0") ------------- - 0. in_port=1, priority 9099, cookie 0x5adc15c0 - goto_table:1 - 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 + 0. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid - goto_table:3 - 3. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0 - goto_table:4 - 4. ip,dl_vlan=100,nw_dst=10.200.0.1, priority 9131, cookie 0x5adc15c0 + goto_table:1 + 1. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0 + goto_table:2 + 2. ip,dl_vlan=100,nw_dst=10.200.0.1, priority 9131, cookie 0x5adc15c0 set_field:4296->vlan_vid set_field:0e:00:00:00:00:01->eth_src set_field:00:10:20:30:40:50->eth_dst dec_ttl - goto_table:7 - 7. dl_vlan=200,dl_dst=00:10:20:30:40:50, priority 9099, cookie 0x5adc15c0 + goto_table:4 + 4. dl_vlan=200,dl_dst=00:10:20:30:40:50, priority 9099, cookie 0x5adc15c0 pop_vlan output:4 @@ -1325,18 +1327,27 @@ the ways that OVS tries to optimize megaflows. Update actions: allow: 1 -Then restart Faucet:: +Then reload Faucet:: - $ docker restart faucet + $ sudo docker exec faucet pkill -HUP -f faucet.faucet -On port 1, this new configuration blocks all traffic to TCP port 8080 -and allows all other traffic. The resulting change in the flow table -shows this clearly too:: +We will now find Faucet has added a new table to the start of the pipeline +for processing port ACLs. Let's take a look at our new table 0 with +``dump-flows br0``:: - $ diff-flows flows2 br0 - -priority=9099,in_port=1 actions=goto_table:1 - +priority=9098,in_port=1 actions=goto_table:1 - +priority=9099,tcp,in_port=1,tp_dst=8080 actions=drop + priority=9099,tcp,in_port=p1,tp_dst=8080 actions=drop + priority=9098,in_port=p1 actions=goto_table:1 + priority=9099,in_port=p2 actions=goto_table:1 + priority=9099,in_port=p3 actions=goto_table:1 + priority=9099,in_port=p4 actions=goto_table:1 + priority=9099,in_port=p5 actions=goto_table:1 + priority=0 actions=drop + +We now have a flow that just jumps to table 1 (vlan) for each configured port, +and a low priority rule to drop other unrecognized packets. We also see a flow +rule for dropping TCP port 8080 traffic on port 1. If we compare this rule to +the ACL we configured, we can clearly see how Faucet has converted this ACL to +fit into the OpenFlow pipeline. The most interesting question here is performance. If you recall the earlier discussion, when a packet through the flow table encounters a @@ -1357,6 +1368,7 @@ Let's see what happens, by sending a packet to port 80 (instead of 8080):: $ ovs-appctl ofproto/trace br0 in_port=p1,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,tcp,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_ttl=64,tp_dst=80 -generate + src=10.100.0.1,nw_dst=10.200.0.1,nw_ttl=64,tp_dst=80 -generate Flow: tcp,in_port=1,vlan_tci=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0 bridge("br0") @@ -1366,17 +1378,17 @@ Let's see what happens, by sending a packet to port 80 (instead of 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0 push_vlan:0x8100 set_field:4196->vlan_vid + goto_table:2 + 2. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0 goto_table:3 - 3. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0 + 3. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0 goto_table:4 - 4. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0 - goto_table:6 - 6. ip, priority 9130, cookie 0x5adc15c0 + 4. ip, priority 9130, cookie 0x5adc15c0 CONTROLLER:128 Final flow: tcp,in_port=1,dl_vlan=100,dl_vlan_pcp=0,vlan_tci1=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0 - Megaflow: recirc_id=0,eth,tcp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_dst=10.200.0.1,nw_frag=no,tp_dst=0x0/0xf000 - Datapath actions: push_vlan(vid=100,pcp=0) + Megaflow: recirc_id=0,eth,tcp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_dst=10.200.0.0/25,nw_frag=no,tp_dst=0x0/0xf000 + Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=8,rule_cookie=0x5adc15c0,controller_id=0,max_len=128)) Take a look at the Megaflow line and in particular the match on ``tp_dst``, which says ``tp_dst=0x0/0xf000``. What this means is that @@ -1406,8 +1418,8 @@ Finishing Up ------------ When you're done, you probably want to exit the sandbox session, with -Control+D or ``exit``, and stop the Faucet controller with ``docker -stop faucet; docker rm faucet``. +Control+D or ``exit``, and stop the Faucet controller with ``sudo docker +stop faucet; sudo docker rm faucet``. Further Directions ------------------ -- GitLab From c5b4b0ce95a31f1a2fadc8eecd8027434357b9eb Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Thu, 17 Dec 2020 02:47:32 +0000 Subject: [PATCH 422/432] dpif-netlink: Fix issues of the offloaded flows counter. The n_offloaded_flows counter is saved in dpif, and this is the first one when ofproto is created. When flow operation is done by ovs-appctl commands, such as, dpctl/add-flow, a new dpif is opened, and the n_offloaded_flows in it can't be used. So, instead of using counter, the number of offloaded flows is queried from each netdev, then sum them up. To achieve this, a new API is added in netdev_flow_api to get how many flows assigned to a netdev. In order to get better performance, this number is calculated directly from tc_to_ufid hmap for netdev-offload-tc, because flow dumping by tc takes much time if there are many flows offloaded. Fixes: af0618470507 ("dpif-netlink: Count the number of offloaded rules") Signed-off-by: Jianbo Liu Signed-off-by: Ilya Maximets --- lib/dpif-netlink.c | 9 --------- lib/dpif.c | 23 +++++++++++++++++++++++ lib/dpif.h | 3 ++- lib/netdev-offload-provider.h | 4 ++++ lib/netdev-offload-tc.c | 19 +++++++++++++++++++ lib/netdev-offload.c | 27 +++++++++++++++++++++++++++ lib/netdev-offload.h | 3 +++ ofproto/ofproto-dpif-upcall.c | 11 +++++------ tests/system-offloads-traffic.at | 4 ++++ 9 files changed, 87 insertions(+), 16 deletions(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 6858ba612..2f881e4fa 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -208,7 +208,6 @@ struct dpif_netlink { /* Change notification. */ struct nl_sock *port_notifier; /* vport multicast group subscriber. */ bool refresh_channels; - struct atomic_count n_offloaded_flows; }; static void report_loss(struct dpif_netlink *, struct dpif_channel *, @@ -654,7 +653,6 @@ dpif_netlink_run(struct dpif *dpif_) static int dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) { - struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_netlink_dp dp; struct ofpbuf *buf; int error; @@ -680,7 +678,6 @@ dpif_netlink_get_stats(const struct dpif *dpif_, struct dpif_dp_stats *stats) } ofpbuf_delete(buf); } - stats->n_offloaded_flows = atomic_count_get(&dpif->n_offloaded_flows); return error; } @@ -2192,9 +2189,6 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op) } err = parse_flow_put(dpif, put); - if (!err && (put->flags & DPIF_FP_CREATE)) { - atomic_count_inc(&dpif->n_offloaded_flows); - } log_flow_put_message(&dpif->dpif, &this_module, put, 0); break; } @@ -2209,9 +2203,6 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op) dpif_normalize_type(dpif_type(&dpif->dpif)), del->ufid, del->stats); - if (!err) { - atomic_count_dec(&dpif->n_offloaded_flows); - } log_flow_del_message(&dpif->dpif, &this_module, del, 0); break; } diff --git a/lib/dpif.c b/lib/dpif.c index ac2860764..56d0b4a65 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -2018,3 +2018,26 @@ dpif_bond_stats_get(struct dpif *dpif, uint32_t bond_id, ? dpif->dpif_class->bond_stats_get(dpif, bond_id, n_bytes) : EOPNOTSUPP; } + +int +dpif_get_n_offloaded_flows(struct dpif *dpif, uint64_t *n_flows) +{ + const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif)); + struct dpif_port_dump port_dump; + struct dpif_port dpif_port; + int ret, n_devs = 0; + uint64_t nflows; + + *n_flows = 0; + DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { + ret = netdev_ports_get_n_flows(dpif_type_str, dpif_port.port_no, + &nflows); + if (!ret) { + *n_flows += nflows; + } else if (ret == EOPNOTSUPP) { + continue; + } + n_devs++; + } + return n_devs ? 0 : EOPNOTSUPP; +} diff --git a/lib/dpif.h b/lib/dpif.h index 7ef148c6d..ecda896c7 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -429,7 +429,6 @@ struct dpif_dp_stats { uint64_t n_missed; /* Number of flow table misses. */ uint64_t n_lost; /* Number of misses not sent to userspace. */ uint64_t n_flows; /* Number of flows present. */ - uint64_t n_offloaded_flows; /* Number of offloaded flows present. */ uint64_t n_mask_hit; /* Number of mega flow masks visited for flow table matches. */ uint32_t n_masks; /* Number of mega flow masks. */ @@ -438,6 +437,8 @@ int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *); int dpif_set_features(struct dpif *, uint32_t new_features); +int dpif_get_n_offloaded_flows(struct dpif *dpif, uint64_t *n_flows); + /* Port operations. */ diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h index 0bed7bf61..cf859d1b4 100644 --- a/lib/netdev-offload-provider.h +++ b/lib/netdev-offload-provider.h @@ -83,6 +83,10 @@ struct netdev_flow_api { int (*flow_del)(struct netdev *, const ovs_u128 *ufid, struct dpif_flow_stats *); + /* Get the number of flows offloaded to netdev. + * Return 0 if successful, otherwise returns a positive errno value. */ + int (*flow_get_n_flows)(struct netdev *, uint64_t *n_flows); + /* Initializies the netdev flow api. * Return 0 if successful, otherwise returns a positive errno value. */ int (*init_flow_api)(struct netdev *); diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 2a772a971..717a987d1 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1904,6 +1904,24 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, return error; } +static int +netdev_tc_get_n_flows(struct netdev *netdev, uint64_t *n_flows) +{ + struct ufid_tc_data *data; + uint64_t total = 0; + + ovs_mutex_lock(&ufid_lock); + HMAP_FOR_EACH (data, tc_to_ufid_node, &tc_to_ufid) { + if (data->netdev == netdev) { + total++; + } + } + ovs_mutex_unlock(&ufid_lock); + + *n_flows = total; + return 0; +} + static void probe_multi_mask_per_prio(int ifindex) { @@ -2044,5 +2062,6 @@ const struct netdev_flow_api netdev_offload_tc = { .flow_put = netdev_tc_flow_put, .flow_get = netdev_tc_flow_get, .flow_del = netdev_tc_flow_del, + .flow_get_n_flows = netdev_tc_get_n_flows, .init_flow_api = netdev_tc_init_flow_api, }; diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c index 2da3bc701..6237667c3 100644 --- a/lib/netdev-offload.c +++ b/lib/netdev-offload.c @@ -280,6 +280,17 @@ netdev_flow_del(struct netdev *netdev, const ovs_u128 *ufid, : EOPNOTSUPP; } +int +netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows) +{ + const struct netdev_flow_api *flow_api = + ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api); + + return (flow_api && flow_api->flow_get_n_flows) + ? flow_api->flow_get_n_flows(netdev, n_flows) + : EOPNOTSUPP; +} + int netdev_init_flow_api(struct netdev *netdev) { @@ -602,6 +613,22 @@ netdev_ports_remove(odp_port_t port_no, const char *dpif_type) return ret; } +int +netdev_ports_get_n_flows(const char *dpif_type, odp_port_t port_no, + uint64_t *n_flows) +{ + struct port_to_netdev_data *data; + int ret = EOPNOTSUPP; + + ovs_rwlock_rdlock(&netdev_hmap_rwlock); + data = netdev_ports_lookup(port_no, dpif_type); + if (data) { + ret = netdev_flow_get_n_flows(data->netdev, n_flows); + } + ovs_rwlock_unlock(&netdev_hmap_rwlock); + return ret; +} + odp_port_t netdev_ifindex_to_odp_port(int ifindex) { diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index 4c0ed2ae8..18b48790f 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -103,6 +103,7 @@ bool netdev_any_oor(void); bool netdev_is_flow_api_enabled(void); void netdev_set_flow_api_enabled(const struct smap *ovs_other_config); bool netdev_is_offload_rebalance_policy_enabled(void); +int netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows); struct dpif_port; int netdev_ports_insert(struct netdev *, const char *dpif_type, @@ -124,6 +125,8 @@ int netdev_ports_flow_get(const char *dpif_type, struct match *match, struct dpif_flow_stats *stats, struct dpif_flow_attrs *attrs, struct ofpbuf *buf); +int netdev_ports_get_n_flows(const char *dpif_type, + odp_port_t port_no, uint64_t *n_flows); #ifdef __cplusplus } diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index 19b92dfe0..d79f48aa7 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -175,7 +175,6 @@ struct udpif { /* n_flows_mutex prevents multiple threads updating these concurrently. */ atomic_uint n_flows; /* Number of flows in the datapath. */ - atomic_uint n_offloaded_flows; /* Number of the offloaded flows. */ atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */ struct ovs_mutex n_flows_mutex; @@ -731,8 +730,6 @@ udpif_get_n_flows(struct udpif *udpif) dpif_get_dp_stats(udpif->dpif, &stats); flow_count = stats.n_flows; atomic_store_relaxed(&udpif->n_flows, flow_count); - atomic_store_relaxed(&udpif->n_offloaded_flows, - stats.n_offloaded_flows); ovs_mutex_unlock(&udpif->n_flows_mutex); } else { atomic_read_relaxed(&udpif->n_flows, &flow_count); @@ -2904,10 +2901,10 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED) { struct ds ds = DS_EMPTY_INITIALIZER; + uint64_t n_offloaded_flows; struct udpif *udpif; LIST_FOR_EACH (udpif, list_node, &all_udpifs) { - unsigned int n_offloaded_flows; unsigned int flow_limit; bool ufid_enabled; size_t i; @@ -2919,8 +2916,10 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED, ds_put_format(&ds, " flows : (current %lu)" " (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif), udpif->avg_n_flows, udpif->max_n_flows, flow_limit); - atomic_read_relaxed(&udpif->n_offloaded_flows, &n_offloaded_flows); - ds_put_format(&ds, " offloaded flows : %u\n", n_offloaded_flows); + if (!dpif_get_n_offloaded_flows(udpif->dpif, &n_offloaded_flows)) { + ds_put_format(&ds, " offloaded flows : %"PRIu64"\n", + n_offloaded_flows); + } ds_put_format(&ds, " dump duration : %lldms\n", udpif->dump_duration); ds_put_format(&ds, " ufid enabled : "); if (ufid_enabled) { diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at index 379a8a5e9..4f601ef93 100644 --- a/tests/system-offloads-traffic.at +++ b/tests/system-offloads-traffic.at @@ -32,6 +32,8 @@ in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used: AT_CHECK([ovs-appctl dpctl/dump-flows type=offloaded], [0], []) +AT_CHECK([test $(ovs-appctl upcall/show | grep -c "offloaded flows") -eq 0], [0], [ignore]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP @@ -64,5 +66,7 @@ in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used: in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output ]) +AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [ignore]) + OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP -- GitLab From 55f2b065acd477a6810d5279fcace8b42bd594f5 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Mon, 21 Dec 2020 16:01:04 +0100 Subject: [PATCH 423/432] odp-util: Fix netlink message overflow with userdata. Too big userdata could overflow netlink message leading to out-of-bound memory accesses or assertion while formatting nested actions. Fix that by checking the size and returning correct error code. Credit to OSS-Fuzz. Reported-at: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=27640 Fixes: e995e3df57ea ("Allow OVS_USERSPACE_ATTR_USERDATA to be variable length.") Signed-off-by: Ilya Maximets Acked-by: Flavio Leitner --- lib/odp-util.c | 43 +++++++++++++++++++++++++---------- lib/odp-util.h | 11 +++++---- ofproto/ofproto-dpif-upcall.c | 2 +- ofproto/ofproto-dpif-xlate.c | 13 +++++------ tests/odp.at | 37 ++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 25 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index 252a91bfa..d65ebb541 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -1455,14 +1455,20 @@ parse_odp_userspace_action(const char *s, struct ofpbuf *actions) int n1 = -1; if (ovs_scan(&s[n], ",tunnel_out_port=%"SCNi32")%n", &tunnel_out_port, &n1)) { - odp_put_userspace_action(pid, user_data, user_data_size, - tunnel_out_port, include_actions, actions); - res = n + n1; + res = odp_put_userspace_action(pid, user_data, user_data_size, + tunnel_out_port, include_actions, + actions, NULL); + if (!res) { + res = n + n1; + } goto out; } else if (s[n] == ')') { - odp_put_userspace_action(pid, user_data, user_data_size, - ODPP_NONE, include_actions, actions); - res = n + 1; + res = odp_put_userspace_action(pid, user_data, user_data_size, + ODPP_NONE, include_actions, + actions, NULL); + if (!res) { + res = n + 1; + } goto out; } } @@ -7557,15 +7563,18 @@ odp_key_fitness_to_string(enum odp_key_fitness fitness) /* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies * Netlink PID 'pid'. If 'userdata' is nonnull, adds a userdata attribute - * whose contents are the 'userdata_size' bytes at 'userdata' and returns the - * offset within 'odp_actions' of the start of the cookie. (If 'userdata' is - * null, then the return value is not meaningful.) */ -size_t + * whose contents are the 'userdata_size' bytes at 'userdata' and sets + * 'odp_actions_ofs' if nonnull with the offset within 'odp_actions' of the + * start of the cookie. (If 'userdata' is null, then the 'odp_actions_ofs' + * value is not meaningful.) + * + * Returns negative error code on failure. */ +int odp_put_userspace_action(uint32_t pid, const void *userdata, size_t userdata_size, odp_port_t tunnel_out_port, bool include_actions, - struct ofpbuf *odp_actions) + struct ofpbuf *odp_actions, size_t *odp_actions_ofs) { size_t userdata_ofs; size_t offset; @@ -7573,6 +7582,9 @@ odp_put_userspace_action(uint32_t pid, offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE); nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid); if (userdata) { + if (nl_attr_oversized(userdata_size)) { + return -E2BIG; + } userdata_ofs = odp_actions->size + NLA_HDRLEN; /* The OVS kernel module before OVS 1.11 and the upstream Linux kernel @@ -7598,9 +7610,16 @@ odp_put_userspace_action(uint32_t pid, if (include_actions) { nl_msg_put_flag(odp_actions, OVS_USERSPACE_ATTR_ACTIONS); } + if (nl_attr_oversized(odp_actions->size - offset - NLA_HDRLEN)) { + return -E2BIG; + } nl_msg_end_nested(odp_actions, offset); - return userdata_ofs; + if (odp_actions_ofs) { + *odp_actions_ofs = userdata_ofs; + } + + return 0; } void diff --git a/lib/odp-util.h b/lib/odp-util.h index 623a66aa2..a1d0d0fba 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -356,11 +356,12 @@ struct user_action_cookie { }; BUILD_ASSERT_DECL(sizeof(struct user_action_cookie) == 48); -size_t odp_put_userspace_action(uint32_t pid, - const void *userdata, size_t userdata_size, - odp_port_t tunnel_out_port, - bool include_actions, - struct ofpbuf *odp_actions); +int odp_put_userspace_action(uint32_t pid, + const void *userdata, size_t userdata_size, + odp_port_t tunnel_out_port, + bool include_actions, + struct ofpbuf *odp_actions, + size_t *odp_actions_ofs); void odp_put_tunnel_action(const struct flow_tnl *tunnel, struct ofpbuf *odp_actions, const char *tnl_type); diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c index d79f48aa7..5fae46adf 100644 --- a/ofproto/ofproto-dpif-upcall.c +++ b/ofproto/ofproto-dpif-upcall.c @@ -1084,7 +1084,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout, } odp_put_userspace_action(pid, &cookie, sizeof cookie, - ODPP_NONE, false, buf); + ODPP_NONE, false, buf, NULL); if (meter_id != UINT32_MAX) { nl_msg_end_nested(buf, ac_offset); diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 4ea776052..2715a142b 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3223,12 +3223,11 @@ compose_sample_action(struct xlate_ctx *ctx, odp_port_t odp_port = ofp_port_to_odp_port( ctx->xbridge, ctx->xin->flow.in_port.ofp_port); uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port); - size_t cookie_offset = odp_put_userspace_action(pid, cookie, - sizeof *cookie, - tunnel_out_port, - include_actions, - ctx->odp_actions); - + size_t cookie_offset; + int res = odp_put_userspace_action(pid, cookie, sizeof *cookie, + tunnel_out_port, include_actions, + ctx->odp_actions, &cookie_offset); + ovs_assert(res == 0); if (is_sample) { nl_msg_end_nested(ctx->odp_actions, actions_offset); nl_msg_end_nested(ctx->odp_actions, sample_offset); @@ -4832,7 +4831,7 @@ put_controller_user_action(struct xlate_ctx *ctx, ctx->xin->flow.in_port.ofp_port); uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port); odp_put_userspace_action(pid, &cookie, sizeof cookie, ODPP_NONE, - false, ctx->odp_actions); + false, ctx->odp_actions, NULL); } static void diff --git a/tests/odp.at b/tests/odp.at index 1ebdf0515..b762ebb2b 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -398,6 +398,43 @@ odp_actions_from_string: error ]) AT_CLEANUP +AT_SETUP([OVS datapath actions parsing and formatting - userdata overflow]) +dnl Userdata should fit in a single netlink message, i.e. should be less than +dnl UINT16_MAX - NLA_HDRLEN = 65535 - 4 = 65531 bytes. OVS should not accept +dnl larger userdata. OTOH, userdata is part of a nested netlink message, that +dnl should not be oversized too. 'pid' takes NLA_HDRLEN + 4 = 8 bytes. +dnl Plus NLA_HDRLEN for the nested header. 'actions' flag takes NLA_HDRLEN = 4 +dnl and 'tunnel_out_port' takes NLA_HDRLEN + 4 = 8 bytes. +dnl So, for the variant with 'actions' maximum length of userdata should be: +dnl UINT16_MAX - NLA_HDRLEN - (NLA_HDRLEN + 4) - NLA_HDRLEN - NLA_HDRLEN +dnl total max nested header pid actions userdata +dnl Result: 65515 bytes for the actual userdata. +dnl For the case with 'tunnel_out_port': 65511 +dnl Size of userdata will be rounded up to be multiple of 4, so highest +dnl acceptable sizes are 65512 and 65508. + +dnl String with length 65512 * 2 = 131024 is valid, while 131026 is not. +data_valid=$( printf '%*s' 131024 | tr ' ' "a") +data_invalid=$(printf '%*s' 131026 | tr ' ' "a") + +echo "userspace(pid=1234567,userdata(${data_valid}),actions)" > actions.txt +echo "userspace(pid=1234567,userdata(${data_invalid}),actions)" >> actions.txt + +dnl String with length 65508 * 2 = 131016 is valid, while 131018 is not. +data_valid=$( printf '%*s' 131016 | tr ' ' "a") +data_invalid=$(printf '%*s' 131018 | tr ' ' "a") + +echo "userspace(pid=1234567,userdata(${data_valid}),tunnel_out_port=10)" >> actions.txt +echo "userspace(pid=1234567,userdata(${data_invalid}),tunnel_out_port=10)" >> actions.txt + +AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0], [dnl +`cat actions.txt | head -1` +odp_actions_from_string: error +`cat actions.txt | head -3 | tail -1` +odp_actions_from_string: error +]) +AT_CLEANUP + AT_SETUP([OVS datapath keys parsing and formatting - 33 nested encap ]) AT_DATA([odp-in.txt], [dnl encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap())))))))))))))))))))))))))))))))) -- GitLab From ebe0e518b0489aafbe385ba90133f6bacba33353 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Thu, 17 Dec 2020 12:48:41 +0530 Subject: [PATCH 424/432] tunnel: Bareudp Tunnel Support. There are various L3 encapsulation standards using UDP being discussed to leverage the UDP based load balancing capability of different networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. The Bareudp tunnel provides a generic L3 encapsulation support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. An example to create bareudp device to tunnel MPLS traffic is given $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=0x8847 options:dst_port=6635 The bareudp device supports special handling for MPLS & IP as they can have multiple ethertypes. MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). The bareudp device to tunnel L3 traffic with multiple ethertypes (MPLS & IP) can be created by passing the L3 protocol name as string in the field payload_type. An example to create bareudp device to tunnel MPLS unicast & multicast traffic is given below.:: $ ovs-vsctl add-port br_mpls udp_port -- set interface udp_port \ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ options:payload_type=mpls options:dst_port=6635 Signed-off-by: Martin Varghese Acked-By: Greg Rose Tested-by: Greg Rose Acked-by: Eelco Chaudron Signed-off-by: Ilya Maximets --- Documentation/automake.mk | 1 + Documentation/faq/bareudp.rst | 82 ++++++++++++++++ Documentation/faq/index.rst | 1 + Documentation/faq/releases.rst | 1 + NEWS | 4 + .../linux/compat/include/linux/openvswitch.h | 9 ++ lib/dpif-netlink-rtnl.c | 50 ++++++++++ lib/dpif-netlink.c | 5 + lib/netdev-vport.c | 35 ++++++- lib/netdev.h | 1 + ofproto/ofproto-dpif-xlate.c | 1 + tests/system-layer3-tunnels.at | 96 +++++++++++++++++++ vswitchd/vswitch.xml | 42 ++++++-- 13 files changed, 320 insertions(+), 8 deletions(-) create mode 100644 Documentation/faq/bareudp.rst diff --git a/Documentation/automake.mk b/Documentation/automake.mk index f85c4320e..ea3475f35 100644 --- a/Documentation/automake.mk +++ b/Documentation/automake.mk @@ -88,6 +88,7 @@ DOC_SOURCE = \ Documentation/faq/terminology.rst \ Documentation/faq/vlan.rst \ Documentation/faq/vxlan.rst \ + Documentation/faq/bareudp.rst \ Documentation/internals/index.rst \ Documentation/internals/authors.rst \ Documentation/internals/bugs.rst \ diff --git a/Documentation/faq/bareudp.rst b/Documentation/faq/bareudp.rst new file mode 100644 index 000000000..026b73013 --- /dev/null +++ b/Documentation/faq/bareudp.rst @@ -0,0 +1,82 @@ +.. + Licensed under the Apache License, Version 2.0 (the "License"); you may + not use this file except in compliance with the License. You may obtain + a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + License for the specific language governing permissions and limitations + under the License. + + Convention for heading levels in Open vSwitch documentation: + + ======= Heading 0 (reserved for the title in a document) + ------- Heading 1 + ~~~~~~~ Heading 2 + +++++++ Heading 3 + ''''''' Heading 4 + + Avoid deeper levels because they do not render well. + +======= +Bareudp +======= + +Q: What is Bareudp? + + A: There are various L3 encapsulation standards using UDP being discussed + to leverage the UDP based load balancing capability of different + networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among + them. + + The Bareudp tunnel provides a generic L3 encapsulation support for + tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP + tunnel. + + An example to create bareudp device to tunnel MPLS unicast traffic is + given below.:: + + $ ovs-vsctl add-port br0 mpls_udp_port -- set interface udp_port \ + type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ + options:payload_type=0x8847 options:dst_port=6635 + + The option payload_type specifies the ethertype of the l3 protocol which + the bareudp device will be tunnelling. + + The bareudp device supports special handling for MPLS & IP as they can + have multiple ethertypes. + MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & + ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4) + & ETH_P_IPV6 (v6). + + The bareudp device to tunnel L3 traffic with multiple ethertypes + (MPLS & IP) can be created by passing the L3 protocol name as string in + the field payload_type. + + An example to create bareudp device to tunnel + MPLS unicast & multicast traffic is given below.:: + + $ ovs-vsctl add-port br0 mpls_udp_port -- set interface udp_port \ + type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ + options:payload_type=mpls options:dst_port=6635 + + The below example ovs rule shows how a bareudp tunnel port is used to + tunnel an MPLS packet inside a UDP tunnel.:: + + $ ovs-ofctl -O OpenFlow13 add-flow br0 "in_port=10,dl_type=0x0800,\ + actions=push_mpls:0x8847,set_field:3->mpls_label,\ + output:mpls_udp_port" + + This rule does MPLS encapsulation on IP packets and sends the l3 MPLS + packets on a bareudp tunnel port which has its payload_type configured + to 0x8847. + + An example to create bareudp device to tunnel + IPv4 & IPv6 traffic is given below.:: + + $ ovs-vsctl add-port br0 ip_udp_port -- set interface udp_port \ + type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \ + options:payload_type=ip options:dst_port=6636 diff --git a/Documentation/faq/index.rst b/Documentation/faq/index.rst index 334b828b2..1dd29986a 100644 --- a/Documentation/faq/index.rst +++ b/Documentation/faq/index.rst @@ -30,6 +30,7 @@ Open vSwitch FAQ .. toctree:: :maxdepth: 2 + bareudp configuration contributing design diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst index a52df2205..4b9620015 100644 --- a/Documentation/faq/releases.rst +++ b/Documentation/faq/releases.rst @@ -138,6 +138,7 @@ Q: Are all features available with all datapaths? Tunnel - ERSPAN 4.18 2.10 2.10 NO Tunnel - ERSPAN-IPv6 4.18 2.10 2.10 NO Tunnel - GTP-U NO NO 2.14 NO + Tunnel - Bareudp 5.7 NO NO NO QoS - Policing YES 1.1 2.6 NO QoS - Shaping YES 1.1 NO NO sFlow YES 1.0 1.0 NO diff --git a/NEWS b/NEWS index 2ac7c00d1..d357da31d 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,10 @@ Post-v2.14.0 OpenFlow bundle actions. - Support for GitHub Actions based continuous integration builds has been added. + - Bareudp Tunnel + * Bareudp device support is present in linux kernel from version 5.7 + * Kernel bareudp device is not backported to ovs tree. + * Userspace datapath support is not added v2.14.0 - 17 Aug 2020 diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 2d884312f..875de2025 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -246,6 +246,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_IP6ERSPAN = 108, /* ERSPAN tunnel. */ OVS_VPORT_TYPE_IP6GRE = 109, OVS_VPORT_TYPE_GTPU = 110, + OVS_VPORT_TYPE_BAREUDP = 111, /* Bareudp tunnel. */ __OVS_VPORT_TYPE_MAX }; @@ -308,6 +309,14 @@ enum { #define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1) +enum { + OVS_BAREUDP_EXT_UNSPEC, + OVS_BAREUDP_EXT_MULTIPROTO_MODE, + __OVS_BAREUDP_EXT_MAX, +}; + +#define OVS_BAREUDP_EXT_MAX (__OVS_BAREUDP_EXT_MAX - 1) + /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. */ enum { diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c index fd157ce2d..4fc42daed 100644 --- a/lib/dpif-netlink-rtnl.c +++ b/lib/dpif-netlink-rtnl.c @@ -58,6 +58,18 @@ VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl); #define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10 #endif +#ifndef IFLA_BAREUDP_MAX +#define IFLA_BAREUDP_MAX 0 +#endif +#if IFLA_BAREUDP_MAX < 4 +#define IFLA_BAREUDP_PORT 1 +#define IFLA_BAREUDP_ETHERTYPE 2 +#define IFLA_BAREUDP_SRCPORT_MIN 3 +#define IFLA_BAREUDP_MULTIPROTO_MODE 4 +#endif + +#define BAREUDP_SRCPORT_MIN 49153 + static const struct nl_policy rtlink_policy[] = { [IFLA_LINKINFO] = { .type = NL_A_NESTED }, }; @@ -81,6 +93,10 @@ static const struct nl_policy geneve_policy[] = { [IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 }, [IFLA_GENEVE_PORT] = { .type = NL_A_U16 }, }; +static const struct nl_policy bareudp_policy[] = { + [IFLA_BAREUDP_PORT] = { .type = NL_A_U16 }, + [IFLA_BAREUDP_ETHERTYPE] = { .type = NL_A_U16 }, +}; static const char * vport_type_to_kind(enum ovs_vport_type type, @@ -113,6 +129,8 @@ vport_type_to_kind(enum ovs_vport_type type, } case OVS_VPORT_TYPE_GTPU: return NULL; + case OVS_VPORT_TYPE_BAREUDP: + return "bareudp"; case OVS_VPORT_TYPE_NETDEV: case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: @@ -243,6 +261,24 @@ dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config *tnl_cfg, return err; } +static int +dpif_netlink_rtnl_bareudp_verify(const struct netdev_tunnel_config *tnl_cfg, + const char *kind, struct ofpbuf *reply) +{ + struct nlattr *bareudp[ARRAY_SIZE(bareudp_policy)]; + int err; + + err = rtnl_policy_parse(kind, reply, bareudp_policy, bareudp, + ARRAY_SIZE(bareudp_policy)); + if (!err) { + if ((tnl_cfg->dst_port != nl_attr_get_be16(bareudp[IFLA_BAREUDP_PORT])) + || (tnl_cfg->payload_ethertype + != nl_attr_get_be16(bareudp[IFLA_BAREUDP_ETHERTYPE]))) { + err = EINVAL; + } + } + return err; +} static int dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg, @@ -275,6 +311,9 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg, case OVS_VPORT_TYPE_GENEVE: err = dpif_netlink_rtnl_geneve_verify(tnl_cfg, kind, reply); break; + case OVS_VPORT_TYPE_BAREUDP: + err = dpif_netlink_rtnl_bareudp_verify(tnl_cfg, kind, reply); + break; case OVS_VPORT_TYPE_NETDEV: case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: @@ -357,6 +396,16 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg, nl_msg_put_u8(&request, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1); nl_msg_put_be16(&request, IFLA_GENEVE_PORT, tnl_cfg->dst_port); break; + case OVS_VPORT_TYPE_BAREUDP: + nl_msg_put_be16(&request, IFLA_BAREUDP_ETHERTYPE, + tnl_cfg->payload_ethertype); + nl_msg_put_u16(&request, IFLA_BAREUDP_SRCPORT_MIN, + BAREUDP_SRCPORT_MIN); + nl_msg_put_be16(&request, IFLA_BAREUDP_PORT, tnl_cfg->dst_port); + if (tnl_cfg->exts & (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE)) { + nl_msg_put_flag(&request, IFLA_BAREUDP_MULTIPROTO_MODE); + } + break; case OVS_VPORT_TYPE_NETDEV: case OVS_VPORT_TYPE_INTERNAL: case OVS_VPORT_TYPE_LISP: @@ -470,6 +519,7 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type) case OVS_VPORT_TYPE_ERSPAN: case OVS_VPORT_TYPE_IP6ERSPAN: case OVS_VPORT_TYPE_IP6GRE: + case OVS_VPORT_TYPE_BAREUDP: return dpif_netlink_rtnl_destroy(name); case OVS_VPORT_TYPE_NETDEV: case OVS_VPORT_TYPE_INTERNAL: diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 2f881e4fa..ceb56c685 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -749,6 +749,9 @@ get_vport_type(const struct dpif_netlink_vport *vport) case OVS_VPORT_TYPE_GTPU: return "gtpu"; + case OVS_VPORT_TYPE_BAREUDP: + return "bareudp"; + case OVS_VPORT_TYPE_UNSPEC: case __OVS_VPORT_TYPE_MAX: break; @@ -784,6 +787,8 @@ netdev_to_ovs_vport_type(const char *type) return OVS_VPORT_TYPE_GRE; } else if (!strcmp(type, "gtpu")) { return OVS_VPORT_TYPE_GTPU; + } else if (!strcmp(type, "bareudp")) { + return OVS_VPORT_TYPE_BAREUDP; } else { return OVS_VPORT_TYPE_UNSPEC; } diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c index 0252b61de..15567e524 100644 --- a/lib/netdev-vport.c +++ b/lib/netdev-vport.c @@ -47,6 +47,7 @@ #include "unaligned.h" #include "unixctl.h" #include "openvswitch/vlog.h" +#include "openvswitch/ofp-parse.h" #ifdef __linux__ #include "netdev-linux.h" #endif @@ -112,7 +113,7 @@ netdev_vport_needs_dst_port(const struct netdev *dev) return (class->get_config == get_tunnel_config && (!strcmp("geneve", type) || !strcmp("vxlan", type) || !strcmp("lisp", type) || !strcmp("stt", type) || - !strcmp("gtpu", type))); + !strcmp("gtpu", type) || !strcmp("bareudp",type))); } const char * @@ -219,6 +220,8 @@ netdev_vport_construct(struct netdev *netdev_) dev->tnl_cfg.dst_port = port ? htons(port) : htons(STT_DST_PORT); } else if (!strcmp(type, "gtpu")) { dev->tnl_cfg.dst_port = port ? htons(port) : htons(GTPU_DST_PORT); + } else if (!strcmp(type, "bareudp")) { + dev->tnl_cfg.dst_port = htons(port); } dev->tnl_cfg.dont_fragment = true; @@ -438,6 +441,8 @@ tunnel_supported_layers(const char *type, return TNL_L2 | TNL_L3; } else if (!strcmp(type, "gtpu")) { return TNL_L3; + } else if (!strcmp(type, "bareudp")) { + return TNL_L3; } else { return TNL_L2; } @@ -745,6 +750,23 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp) goto out; } } + } else if (!strcmp(node->key, "payload_type")) { + if (!strcmp(node->value, "mpls")) { + tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS); + tnl_cfg.exts |= (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE); + } else if (!strcmp(node->value, "ip")) { + tnl_cfg.payload_ethertype = htons(ETH_TYPE_IP); + tnl_cfg.exts |= (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE); + } else { + uint16_t payload_ethertype; + + if (str_to_u16(node->value, "payload_type", + &payload_ethertype)) { + err = EINVAL; + goto out; + } + tnl_cfg.payload_ethertype = htons(payload_ethertype); + } } else { ds_put_format(&errors, "%s: unknown %s argument '%s'\n", name, type, node->key); @@ -917,7 +939,8 @@ get_tunnel_config(const struct netdev *dev, struct smap *args) (!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) || (!strcmp("lisp", type) && dst_port != LISP_DST_PORT) || (!strcmp("stt", type) && dst_port != STT_DST_PORT) || - (!strcmp("gtpu", type) && dst_port != GTPU_DST_PORT)) { + (!strcmp("gtpu", type) && dst_port != GTPU_DST_PORT) || + !strcmp("bareudp", type)) { smap_add_format(args, "dst_port", "%d", dst_port); } } @@ -1243,6 +1266,14 @@ netdev_vport_tunnel_register(void) }, {{NULL, NULL, 0, 0}} }, + { "udp_sys", + { + TUNNEL_FUNCTIONS_COMMON, + .type = "bareudp", + .get_ifindex = NETDEV_VPORT_GET_IFINDEX, + }, + {{NULL, NULL, 0, 0}} + }, }; static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; diff --git a/lib/netdev.h b/lib/netdev.h index fb5073056..b705a9e56 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -107,6 +107,7 @@ struct netdev_tunnel_config { bool out_key_flow; ovs_be64 out_key; + ovs_be16 payload_ethertype; ovs_be16 dst_port; bool ip_src_flow; diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 2715a142b..7108c8a30 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -3573,6 +3573,7 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac, case OVS_VPORT_TYPE_VXLAN: case OVS_VPORT_TYPE_GENEVE: case OVS_VPORT_TYPE_GTPU: + case OVS_VPORT_TYPE_BAREUDP: nw_proto = IPPROTO_UDP; break; case OVS_VPORT_TYPE_LISP: diff --git a/tests/system-layer3-tunnels.at b/tests/system-layer3-tunnels.at index 1232964bb..d21fd777d 100644 --- a/tests/system-layer3-tunnels.at +++ b/tests/system-layer3-tunnels.at @@ -152,3 +152,99 @@ AT_CHECK([tail -1 stdout], [0], OVS_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([layer3 - ping over MPLS Bareudp]) +OVS_CHECK_MIN_KERNEL(5, 7) +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "36:b1:ee:7c:01:01") +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24", "36:b1:ee:7c:01:02") + +ADD_OVS_TUNNEL([bareudp], [br0], [at_bareudp0], [8.1.1.3], [8.1.1.2/24], + [ options:local_ip=8.1.1.2 options:packet_type="legacy_l3" options:payload_type=mpls options:dst_port=6635]) + +ADD_OVS_TUNNEL([bareudp], [br1], [at_bareudp1], [8.1.1.2], [8.1.1.3/24], + [options:local_ip=8.1.1.3 options:packet_type="legacy_l3" options:payload_type=mpls options:dst_port=6635]) + +AT_DATA([flows0.txt], [dnl +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,output:at_bareudp0 +table=0,priority=100,dl_type=0x8847 in_port=at_bareudp0 actions=pop_mpls:0x0800,set_field:36:b1:ee:7c:01:01->dl_dst,set_field:36:b1:ee:7c:01:02->dl_src,output:ovs-p0 +table=0,priority=10 actions=normal +]) + +AT_DATA([flows1.txt], [dnl +table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,output:at_bareudp1 +table=0,priority=100,dl_type=0x8847 in_port=at_bareudp1 actions=pop_mpls:0x0800,set_field:36:b1:ee:7c:01:02->dl_dst,set_field:36:b1:ee:7c:01:01->dl_src,output:ovs-p1 +table=0,priority=10 actions=normal +]) + +AT_CHECK([ip link add patch0 type veth peer name patch1]) +on_exit 'ip link del patch0' + +AT_CHECK([ip link set dev patch0 up]) +AT_CHECK([ip link set dev patch1 up]) +AT_CHECK([ovs-vsctl add-port br0 patch0]) +AT_CHECK([ovs-vsctl add-port br1 patch1]) + + +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br0 flows0.txt]) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows1.txt]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP + +AT_SETUP([layer3 - ping over Bareudp]) +OVS_CHECK_MIN_KERNEL(5, 7) +OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])]) +ADD_NAMESPACES(at_ns0, at_ns1) + +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "36:b1:ee:7c:01:01") +ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24", "36:b1:ee:7c:01:02") + +ADD_OVS_TUNNEL([bareudp], [br0], [at_bareudp0], [8.1.1.3], [8.1.1.2/24], + [ options:local_ip=8.1.1.2 options:packet_type="legacy_l3" options:payload_type=ip options:dst_port=6636]) + +ADD_OVS_TUNNEL([bareudp], [br1], [at_bareudp1], [8.1.1.2], [8.1.1.3/24], + [options:local_ip=8.1.1.3 options:packet_type="legacy_l3" options:payload_type=ip options:dst_port=6636]) + +AT_DATA([flows0.txt], [dnl +table=0,priority=100,dl_type=0x0800 in_port=ovs-p0, actions=output:at_bareudp0 +table=0,priority=100,dl_type=0x0800 in_port=at_bareudp0 actions=set_field:36:b1:ee:7c:01:01->dl_dst,set_field:36:b1:ee:7c:01:02->dl_src,output:ovs-p0 +table=0,priority=10 actions=normal +]) + +AT_DATA([flows1.txt], [dnl +table=0,priority=100,dl_type=0x0800 in_port=ovs-p1 actions=output:at_bareudp1 +table=0,priority=100,dl_type=0x0800 in_port=at_bareudp1 actions=set_field:36:b1:ee:7c:01:02->dl_dst,set_field:36:b1:ee:7c:01:01->dl_src,output:ovs-p1 +table=0,priority=10 actions=normal +]) + +AT_CHECK([ip link add patch0 type veth peer name patch1]) +on_exit 'ip link del patch0' + +AT_CHECK([ip link set dev patch0 up]) +AT_CHECK([ip link set dev patch1 up]) +AT_CHECK([ovs-vsctl add-port br0 patch0]) +AT_CHECK([ovs-vsctl add-port br1 patch1]) + + +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br0 flows0.txt]) +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows1.txt]) + +NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) +OVS_TRAFFIC_VSWITCHD_STOP +AT_CLEANUP diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml index 89a876796..fee54b0fa 100644 --- a/vswitchd/vswitch.xml +++ b/vswitchd/vswitch.xml @@ -2694,6 +2694,15 @@

    +
    Bareudp
    +
    +

    + The Bareudp tunnel provides a generic L3 encapsulation support for + tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a + UDP tunnel. +

    +
    +
    @@ -2701,8 +2710,9 @@

    These options apply to interfaces with of - geneve, gre, ip6gre, - vxlan, lisp and stt. + geneve, bareudp, gre, + ip6gre, vxlan, lisp and + stt.

    @@ -2714,6 +2724,8 @@ one is matched first. is considered more specific than if a port defines one and another port defines the other. + is not applicable for bareudp + tunnels. Hence it is not considered while identifying a bareudp tunnel.

    @@ -2780,7 +2792,10 @@ -

    Optional. The key that received packets must contain, one of:

    +

    + Optional, not applicable for bareudp. The key that + received packets must contain, one of: +

    • @@ -2807,7 +2822,10 @@ -

      Optional. The key to be set on outgoing packets, one of:

      +

      + Optional, not applicable for bareudp. The key to be set + on outgoing packets, one of: +

      • @@ -2999,10 +3017,11 @@ - +

        gre, ip6gre, geneve, - and vxlan interfaces support these options. + bareudp and vxlan interfaces support these + options.

        @@ -3092,6 +3111,17 @@
        + + +

        + Specifies the ethertype of the l3 protocol the bareudp + device is tunnelling. For the tunnels which supports multiple + ethertypes of a l3 protocol (IP, MPLS) this field specifies the + protocol name as a string. +

        +
        +
        +

        These options apply only to patch ports, that is, interfaces -- GitLab From 22d0244a568b1a01b3aee4a80f942c62e12d8c5e Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 23 Dec 2020 00:34:18 +0100 Subject: [PATCH 425/432] AUTHORS: Add Renat Nurgaliyev. Signed-off-by: Ilya Maximets --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 7282ca607..4137a25f5 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -324,6 +324,7 @@ Ravi Kerur Ravi.Kerur@telekom.com Raymond Burkholder ray@oneunified.net Reid Price Remko Tronçon git@el-tramo.be +Renat Nurgaliyev impleman@gmail.com Rich Lane rlane@bigswitch.com Richard Oliver richard@richard-oliver.co.uk Rishi Bamba rishi.bamba@tcs.com -- GitLab From def6eb1ea269806b12399a350a5af16e25da23ff Mon Sep 17 00:00:00 2001 From: Justin Pettit Date: Thu, 24 Dec 2020 10:50:10 -0800 Subject: [PATCH 426/432] security.rst: Add more information about the Downstream mailing list. Signed-off-by: Justin Pettit Acked-by: Flavio Leitner --- Documentation/internals/security.rst | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/internals/security.rst b/Documentation/internals/security.rst index f6a31ad01..8b4e5c3f4 100644 --- a/Documentation/internals/security.rst +++ b/Documentation/internals/security.rst @@ -247,10 +247,13 @@ immediate (esp. if it's already publicly known) to a few weeks. As a basic default policy, we expect report date to disclosure date to be 10 to 15 business days. -Operating system vendors are obvious downstream stakeholders. It may not be -necessary to be too choosy about who to include: any major Open vSwitch user -who is interested and can be considered trustworthy enough could be included. -To become a downstream stakeholder, email the ovs-security mailing list. +Operating system vendors are obvious downstream stakeholders, however, +any major Open vSwitch user who is interested and can be considered +trustworthy enough could be included. To request being added to the +Downstream mailing list, email the ovs-security mailing list. Please +include a few sentences on how your organization uses Open vSwitch. If +possible, please provide a security-related email alias rather than a +direct end-user address. If the vulnerability is already public, skip this step. -- GitLab From c3a4c860f3274c89633d24166b71da8d873b1c89 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 9 Jan 2021 15:17:09 +0000 Subject: [PATCH 427/432] Bump dependency on libbpdk-dev >= 20.11 --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index ca258c60b..08aa82f89 100644 --- a/debian/control +++ b/debian/control @@ -14,7 +14,7 @@ Build-Depends: dh-python, graphviz, libcap-ng-dev, - libdpdk-dev (>= 19.11) [amd64 i386 ppc64el arm64], + libdpdk-dev (>= 20.11) [amd64 i386 ppc64el arm64], libnuma-dev [amd64 i386 ppc64el arm64], libpcap-dev [amd64 i386 ppc64el arm64], libssl-dev, -- GitLab From 420a9583a0d3a3f6ac0c4574ffcc6734cd6b0a84 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 9 Jan 2021 15:19:46 +0000 Subject: [PATCH 428/432] Refresh py3-compat.patch for v2.15 Update taken from 2.15.0~git20210104.def6eb1ea-0ubuntu3 --- debian/patches/py3-compat.patch | 138 +------------------------------- 1 file changed, 3 insertions(+), 135 deletions(-) diff --git a/debian/patches/py3-compat.patch b/debian/patches/py3-compat.patch index 3ce28880e..343f23bcb 100644 --- a/debian/patches/py3-compat.patch +++ b/debian/patches/py3-compat.patch @@ -126,93 +126,6 @@ Signed-off-by: James Page def ovs_vsctl_add_bridge(bridge): ---- a/utilities/bugtool/ovs-bugtool.in -+++ b/utilities/bugtool/ovs-bugtool.in -@@ -33,8 +33,6 @@ - # or func_output(). - # - --import StringIO --import commands - import fcntl - import getopt - import hashlib -@@ -48,10 +46,12 @@ import warnings - import zipfile - from select import select - from signal import SIGTERM --from subprocess import PIPE, Popen -+from subprocess import PIPE, Popen, check_output - - from xml.dom.minidom import getDOMImplementation, parse - -+from six.moves import StringIO -+ - warnings.filterwarnings(action="ignore", category=DeprecationWarning) - - OS_RELEASE = platform.release() -@@ -782,7 +782,7 @@ def dump_scsi_hosts(cap): - - - def module_info(cap): -- output = StringIO.StringIO() -+ output = StringIO() - modules = open(PROC_MODULES, 'r') - procs = [] - -@@ -806,7 +806,7 @@ def multipathd_topology(cap): - - - def dp_list(): -- output = StringIO.StringIO() -+ output = StringIO() - procs = [ProcOutput([OVS_DPCTL, 'dump-dps'], - caps[CAP_NETWORK_STATUS][MAX_TIME], output)] - -@@ -828,7 +828,7 @@ def collect_ovsdb(): - if os.path.isfile(OPENVSWITCH_COMPACT_DB): - os.unlink(OPENVSWITCH_COMPACT_DB) - -- output = StringIO.StringIO() -+ output = StringIO() - max_time = 5 - procs = [ProcOutput(['ovsdb-tool', 'compact', - OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB], -@@ -871,7 +871,7 @@ def fd_usage(cap): - - - def dump_rdac_groups(cap): -- output = StringIO.StringIO() -+ output = StringIO() - procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)] - - run_procs([procs]) -@@ -1095,7 +1095,7 @@ def make_inventory(inventory, subdir): - s.setAttribute('date', time.strftime('%c')) - s.setAttribute('hostname', platform.node()) - s.setAttribute('uname', ' '.join(platform.uname())) -- s.setAttribute('uptime', commands.getoutput(UPTIME)) -+ s.setAttribute('uptime', check_output([UPTIME])) - document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s) - - map(lambda k_v: inventory_entry(document, subdir, k_v[0], k_v[1]), -@@ -1391,13 +1391,13 @@ def get_free_disk_space(path): - return s.f_frsize * s.f_bfree - - --class StringIOmtime(StringIO.StringIO): -+class StringIOmtime(StringIO): - def __init__(self, buf=''): -- StringIO.StringIO.__init__(self, buf) -+ StringIO.__init__(self, buf) - self.mtime = time.time() - - def write(self, s): -- StringIO.StringIO.write(self, s) -+ StringIO.write(self, s) - self.mtime = time.time() - - --- a/utilities/ovs-check-dead-ifs.in +++ b/utilities/ovs-check-dead-ifs.in @@ -1,5 +1,7 @@ @@ -252,60 +165,15 @@ Signed-off-by: James Page (field, stats, action) = (results[0], results[1:-1], results[-1]) -@@ -592,7 +594,7 @@ def flows_read(ihdl, flow_db): - - try: - flow_db.flow_line_add(line) -- except ValueError, arg: -+ except ValueError as arg: - logging.error(arg) - - return flow_db -@@ -958,7 +960,7 @@ class FlowDB: - change order of fields of the same flow. - """ +@@ -963,7 +965,7 @@ class FlowDB: + if not isinstance(line, str): + line = str(line) - line = line.rstrip("\n") + line = line.rstrip(b"\n") (fields, stats, _) = flow_line_split(line) try: -@@ -988,7 +990,7 @@ class FlowDB: - - self.flow_event(fields_dict, stats_old_dict, stats_dict) - -- except ValueError, arg: -+ except ValueError as arg: - logging.error(arg) - self._error_count += 1 - raise -@@ -1192,7 +1194,7 @@ def flows_top(args): - flows_read(ihdl, flow_db) - finally: - ihdl.close() -- except OSError, arg: -+ except OSError as arg: - logging.critical(arg) - break - -@@ -1220,7 +1222,7 @@ def flows_top(args): - - # repeat output - for (count, line) in lines: -- print line -+ print(line) - - - def flows_script(args): -@@ -1249,7 +1251,7 @@ def flows_script(args): - render = Render(console_width, Render.FIELD_SELECT_SCRIPT) - - for line in render.format(flow_db): -- print line -+ print(line) - - - def main(): --- a/utilities/ovs-l3ping.in +++ b/utilities/ovs-l3ping.in @@ -18,8 +18,10 @@ opening holes in the firewall for the XM -- GitLab From f5925e4acc122ba61f3788c144ca7ed41cfc5d84 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 9 Jan 2021 15:39:24 +0000 Subject: [PATCH 429/432] Bump libopenvswitch to 2.14 Gbp-Dch: ignore --- debian/openvswitch-common.postinst.in | 2 +- debian/openvswitch-switch-dpdk.postinst.in | 2 +- debian/rules | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/debian/openvswitch-common.postinst.in b/debian/openvswitch-common.postinst.in index 079fff915..226a2a52a 100644 --- a/debian/openvswitch-common.postinst.in +++ b/debian/openvswitch-common.postinst.in @@ -4,7 +4,7 @@ set -e if [ "${1}" = "configure" ] ; then update-alternatives --install /usr/sbin/ovs-vswitchd ovs-vswitchd /usr/lib/openvswitch-common/ovs-vswitchd 100 \ - --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.13.so.0.0.0 libopenvswitch.so /usr/lib/openvswitch-common/libopenvswitch-2.13.so.0.0.0 + --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.14.so.0.0.90 libopenvswitch.so /usr/lib/openvswitch-common/libopenvswitch-2.14.so.0.0.90 fi #DEBHELPER# diff --git a/debian/openvswitch-switch-dpdk.postinst.in b/debian/openvswitch-switch-dpdk.postinst.in index b5cd6d0dc..e119e4c97 100644 --- a/debian/openvswitch-switch-dpdk.postinst.in +++ b/debian/openvswitch-switch-dpdk.postinst.in @@ -4,7 +4,7 @@ set -e if [ "${1}" = "configure" ] ; then update-alternatives --install /usr/sbin/ovs-vswitchd ovs-vswitchd /usr/lib/openvswitch-switch-dpdk/ovs-vswitchd-dpdk 200 \ - --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.13.so.0.0.0 libopenvswitch.so /usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.13.so.0.0.0 + --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.14.so.0.0.90 libopenvswitch.so /usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.14.so.0.0.90 fi #DEBHELPER# diff --git a/debian/rules b/debian/rules index 5f6122b0c..a8deefc35 100755 --- a/debian/rules +++ b/debian/rules @@ -197,8 +197,8 @@ override_dh_auto_install-arch: mkdir -p $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common mv $(CURDIR)/debian/tmp/usr/sbin/ovs-vswitchd \ $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common/ovs-vswitchd - mv $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.13.so.0.0.0 \ - $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common/libopenvswitch-2.13.so.0.0.0 + mv $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.14.so.0.0.90 \ + $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common/libopenvswitch-2.14.so.0.0.90 override_dh_auto_install-indep: $(MAKE) -C _debian DESTDIR=$(CURDIR)/debian/tmp install @@ -210,7 +210,7 @@ override_dh_auto_install-indep: override_dh_install: install -D -m 0644 utilities/ovs-vsctl-bashcomp.bash $(CURDIR)/debian/openvswitch-switch/usr/share/bash-completion/completions/ovs-vsctl - dh_install --exclude=usr/sbin/ovs-vswitchd --exclude=usr/lib/`dpkg-architecture -qDEB_HOST_MULTIARCH`/libopenvswitch-2.13.so.0.0.0 + dh_install --exclude=usr/sbin/ovs-vswitchd --exclude=usr/lib/`dpkg-architecture -qDEB_HOST_MULTIARCH`/libopenvswitch-2.14.so.0.0.90 rm -f $(CURDIR)/debian/tmp/usr/lib/*/*.la dh_installman --language=C @@ -218,7 +218,7 @@ override_dh_install: # remove the files managed via update-alternatives rm -f $(CURDIR)/debian/tmp/usr/sbin/ovs-vswitchd - rm -f $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.13.so.0.0.0 + rm -f $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.14.so.0.0.90 dh_missing --fail-missing # openvswitch-switch @@ -228,8 +228,8 @@ override_dh_install: ifneq (,$(filter i386 amd64 ppc64el arm64, $(DEB_HOST_ARCH))) install -v -D _dpdk/vswitchd/.libs/ovs-vswitchd \ $(CURDIR)/debian/openvswitch-switch-dpdk/usr/lib/openvswitch-switch-dpdk/ovs-vswitchd-dpdk - install -v -D _dpdk/lib/.libs/libopenvswitch-2.13.so.0.0.0 \ - $(CURDIR)/debian/openvswitch-switch-dpdk/usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.13.so.0.0.0 + install -v -D _dpdk/lib/.libs/libopenvswitch-2.14.so.0.0.90 \ + $(CURDIR)/debian/openvswitch-switch-dpdk/usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.14.so.0.0.90 endif override_dh_installinit: -- GitLab From 9835b2d8a658753d51d2d8a3bd9a84b3a40d9e02 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 10 Jan 2021 12:21:46 +0000 Subject: [PATCH 430/432] Use new --with-dpdk=shared configure flag value --- debian/rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/rules b/debian/rules index a8deefc35..077462863 100755 --- a/debian/rules +++ b/debian/rules @@ -32,7 +32,7 @@ ifneq (,$(filter i386 amd64 ppc64el arm64, $(DEB_HOST_ARCH))) test -e Makefile || \ ../configure --prefix=/usr --localstatedir=/var --enable-ssl --enable-shared \ --libdir=/usr/lib/$(DEB_HOST_MULTIARCH) \ - --with-dpdk --sysconfdir=/etc \ + --with-dpdk=shared --sysconfdir=/etc \ $(DATAPATH_CONFIGURE_OPTS)) endif -- GitLab From ccdf0afb3164f7390a2d6f84afe1c5f2dac0ce7a Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sun, 10 Jan 2021 12:22:23 +0000 Subject: [PATCH 431/432] Realign armhf test skip range due to upstream changes --- debian/rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/rules b/debian/rules index 077462863..38fabf5cf 100755 --- a/debian/rules +++ b/debian/rules @@ -53,7 +53,7 @@ TEST_LIST_DPDK = $(TEST_LIST) # 1021: ofproto-dpif - select group with weights FAILED (ofproto-dpif.at:535) # 1057: ofproto-dpif - controller action without megaflows FAILED (ofproto-dpif.at:1893) ifneq (,$(filter armhf, $(DEB_HOST_ARCH))) -TEST_LIST = 1-19 21-23 27-917 919-1020 1022-1056 1058- +TEST_LIST = 1-19 21-23 28-917 919-1020 1022-1056 1058- TEST_LIST_DPDK = $(TEST_LIST) endif # armhf -- GitLab From e4a60c22ea36e676651dc954c50d05bc475d3e50 Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Sat, 9 Jan 2021 15:21:30 +0000 Subject: [PATCH 432/432] Update changelog for 2.15.0~git20210104.def6eb1ea+dfsg1-1 release --- debian/changelog | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/debian/changelog b/debian/changelog index d2058b7ea..f0361edb8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +openvswitch (2.15.0~git20210104.def6eb1ea+dfsg1-1) UNRELEASED; urgency=medium + + * Merge branch 'master-dfsg' into 2.15 + * Bump dependency on libbpdk-dev >= 20.11 (Closes: #974588) + * Refresh py3-compat.patch for v2.15 + * Realign armhf test skip range due to upstream changes + * Use new --with-dpdk=shared configure flag value + + -- Luca Boccassi Sat, 09 Jan 2021 15:21:29 +0000 + openvswitch (2.13.0+dfsg1-16) unstable; urgency=medium * Generating postinst at build time to avoid using dpkg-architecture at -- GitLab