From b23b2a02ad58a98cec5a673a72be791a9cf17a04 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Wed, 6 Jul 2022 10:00:35 +0200 Subject: [PATCH 1/2] d/p/0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch Build requirement for OVN 22.03.1. Signed-off-by: Frode Nordahl --- ...t-write-only-changed-IDL-monitor-mod.patch | 330 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 331 insertions(+) create mode 100644 debian/patches/0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch diff --git a/debian/patches/0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch b/debian/patches/0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch new file mode 100644 index 000000000..9d12ca760 --- /dev/null +++ b/debian/patches/0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch @@ -0,0 +1,330 @@ +From 5935f9965934277522905ee130255e4ee422a684 Mon Sep 17 00:00:00 2001 +From: Dumitru Ceara +Date: Tue, 26 Apr 2022 12:37:08 +0200 +Subject: [PATCH] ovsdb-idl: Support write-only-changed IDL monitor mode. + +At a first glance, change tracking should never be allowed for +write-only columns. However, some clients (e.g., ovn-northd) that are +mostly exclusive writers of a database, use change tracking to avoid +duplicating the IDL row records into a local cache when implementing +incremental processing. + +The default behavior of the IDL is to automatically turn a write-only +column into a read-write column whenever the client enables change +tracking for that column. + +For the afore mentioned clients, this becomes a performance issue. +Commit 1cc618c32524 ("ovsdb-idl: Fix atomicity of writes that don't +change a column's value.") explains why writes that don't change a +column's value cannot be optimized out early if the column is +read/write. + +Furthermore, if there is at least one record in any table that +changed during a transaction, then *all* records that have been +written are added to the transaction, even if their values didn't +change. If there are many such rows (e.g., like in ovn-northd's +case) this incurs a significant overhead because: +a. the client has to build this large transaction +b. the transaction has to be sent over the network +c. the server needs to parse this (mostly) no-op update + +We now introduce new IDL APIs allowing users to set a new monitoring +mode flag, OVSDB_IDL_WRITE_CHANGED_ONLY, to indicate to the IDL that the +atomicity constraints may be relaxed and written columns that don't +change value can be skipped from the current transaction. + +We benchmarked ovn-northd performance when using this new mode +against NB and SB databases taken from ovn-kubernetes scale tests. +We noticed that when a minor change is performed to the Northbound +database (e.g., NB_Global.nb_cfg is incremented) the time it takes to +build the Southbound transaction becomes negligible (vs ~1.5 seconds +before this change). + +End-to-end ovn-kubernetes scale tests on 120-node clusters also show +significant reduction of latency to bring up pods; both average and P99 +latency decreased by ~30%. + +Acked-by: Han Zhou +Signed-off-by: Dumitru Ceara +Signed-off-by: Ilya Maximets +(cherry picked from commit d94cd0d3eec33e4290d7ca81918f5ac61444886e) +--- + lib/ovsdb-idl.c | 60 +++++++++++++++++++++++++++++++++++++++++----- + lib/ovsdb-idl.h | 16 +++++++++++-- + tests/ovsdb-idl.at | 31 +++++++++++++++++++++++- + tests/test-ovsdb.c | 18 ++++++++++---- + 4 files changed, 111 insertions(+), 14 deletions(-) + +diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c +index 882ede755..6fbc109cd 100644 +--- a/lib/ovsdb-idl.c ++++ b/lib/ovsdb-idl.c +@@ -1396,6 +1396,45 @@ ovsdb_idl_track_clear(struct ovsdb_idl *idl) + { + ovsdb_idl_track_clear__(idl, false); + } ++ ++/* Sets or clears (depending on 'enable') OVSDB_IDL_WRITE_CHANGED_ONLY ++ * for 'column' in 'idl', ensuring that the column will be included in a ++ * transaction only if its value has actually changed locally. Normally ++ * read/write columns that are written to are always included in the ++ * transaction but, in specific cases, when the application doesn't ++ * require atomicity of writes across different columns, the ones that ++ * don't change value may be skipped. ++ * ++ * This function should be called between ovsdb_idl_create() and ++ * the first call to ovsdb_idl_run(). ++ */ ++void ++ovsdb_idl_set_write_changed_only(struct ovsdb_idl *idl, ++ const struct ovsdb_idl_column *column, ++ bool enable) ++{ ++ if (enable) { ++ *ovsdb_idl_get_mode(idl, column) |= OVSDB_IDL_WRITE_CHANGED_ONLY; ++ } else { ++ *ovsdb_idl_get_mode(idl, column) &= ~OVSDB_IDL_WRITE_CHANGED_ONLY; ++ } ++} ++ ++/* Helper function to wrap calling ovsdb_idl_set_write_changed_only() for ++ * all columns that are part of 'idl'. ++ */ ++void ++ovsdb_idl_set_write_changed_only_all(struct ovsdb_idl *idl, bool enable) ++{ ++ for (size_t i = 0; i < idl->class_->n_tables; i++) { ++ const struct ovsdb_idl_table_class *tc = &idl->class_->tables[i]; ++ ++ for (size_t j = 0; j < tc->n_columns; j++) { ++ const struct ovsdb_idl_column *column = &tc->columns[j]; ++ ovsdb_idl_set_write_changed_only(idl, column, enable); ++ } ++ } ++} + + static void + log_parse_update_error(struct ovsdb_error *error) +@@ -3491,6 +3530,8 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, + { + struct ovsdb_idl_row *row = CONST_CAST(struct ovsdb_idl_row *, row_); + const struct ovsdb_idl_table_class *class; ++ unsigned char column_mode; ++ bool optimize_rewritten; + size_t column_idx; + bool write_only; + +@@ -3501,12 +3542,15 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, + + class = row->table->class_; + column_idx = column - class->columns; +- write_only = row->table->modes[column_idx] == OVSDB_IDL_MONITOR; ++ column_mode = row->table->modes[column_idx]; ++ write_only = column_mode == OVSDB_IDL_MONITOR; ++ optimize_rewritten = ++ write_only || (column_mode & OVSDB_IDL_WRITE_CHANGED_ONLY); ++ + + ovs_assert(row->new_datum != NULL); + ovs_assert(column_idx < class->n_columns); +- ovs_assert(row->old_datum == NULL || +- row->table->modes[column_idx] & OVSDB_IDL_MONITOR); ++ ovs_assert(row->old_datum == NULL || column_mode & OVSDB_IDL_MONITOR); + + if (row->table->idl->verify_write_only && !write_only) { + VLOG_ERR("Bug: Attempt to write to a read/write column (%s:%s) when" +@@ -3524,9 +3568,13 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, + * different value in that column since we read it. (But if a whole + * transaction only does writes of existing values, without making any real + * changes, we will drop the whole transaction later in +- * ovsdb_idl_txn_commit().) */ +- if (write_only && ovsdb_datum_equals(ovsdb_idl_read(row, column), +- datum, &column->type)) { ++ * ovsdb_idl_txn_commit().) ++ * ++ * The application may choose to bypass this restriction and always ++ * optimize by setting OVSDB_IDL_WRITE_CHANGED_ONLY. ++ */ ++ if (optimize_rewritten && ovsdb_datum_equals(ovsdb_idl_read(row, column), ++ datum, &column->type)) { + goto discard_datum; + } + +diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h +index d00599616..fbd9f671a 100644 +--- a/lib/ovsdb-idl.h ++++ b/lib/ovsdb-idl.h +@@ -158,7 +158,7 @@ bool ovsdb_idl_server_has_column(const struct ovsdb_idl *, + * IDL will change the value returned by ovsdb_idl_get_seqno() when the + * column's value changes in any row. + * +- * The possible mode combinations are: ++ * Typical mode combinations are: + * + * - 0, for a column that a client doesn't care about. This is the default + * for every column in every table, if the client passes false for +@@ -181,10 +181,17 @@ bool ovsdb_idl_server_has_column(const struct ovsdb_idl *, + * - (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_TRACK), for a column + * that a client wants to track using the change tracking + * ovsdb_idl_track_get_*() functions. ++ * ++ * - (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT | OVSDB_IDL_WRITE_CHANGED_ONLY) ++ * is similar to (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT) except that it ++ * only adds a written column to a transaction if the column's value ++ * has actually changed. + */ + #define OVSDB_IDL_MONITOR (1 << 0) /* Replicate this column? */ + #define OVSDB_IDL_ALERT (1 << 1) /* Alert client when column changes? */ +-#define OVSDB_IDL_TRACK (1 << 2) ++#define OVSDB_IDL_TRACK (1 << 2) /* Track column changes? */ ++#define OVSDB_IDL_WRITE_CHANGED_ONLY \ ++ (1 << 3) /* Write back only changed columns? */ + + void ovsdb_idl_add_column(struct ovsdb_idl *, const struct ovsdb_idl_column *); + void ovsdb_idl_add_table(struct ovsdb_idl *, +@@ -233,6 +240,11 @@ bool ovsdb_idl_track_is_updated(const struct ovsdb_idl_row *row, + const struct ovsdb_idl_column *column); + void ovsdb_idl_track_clear(struct ovsdb_idl *); + ++void ovsdb_idl_set_write_changed_only(struct ovsdb_idl *idl, ++ const struct ovsdb_idl_column *column, ++ bool enable); ++void ovsdb_idl_set_write_changed_only_all(struct ovsdb_idl *idl, bool enable); ++ + + /* Reading the database replica. */ + +diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at +index 62e2b6383..d1cfa59c5 100644 +--- a/tests/ovsdb-idl.at ++++ b/tests/ovsdb-idl.at +@@ -87,6 +87,20 @@ m4_define([OVSDB_CHECK_IDL_C], + OVSDB_SERVER_SHUTDOWN + AT_CLEANUP]) + ++# same as OVSDB_CHECK_IDL but uses OVSDB_IDL_WRITE_CHANGED_ONLY. ++m4_define([OVSDB_CHECK_IDL_WRITE_CHANGED_ONLY_C], ++ [AT_SETUP([$1 - write-changed-only - C]) ++ AT_KEYWORDS([ovsdb server idl positive $5]) ++ AT_CHECK([ovsdb_start_idltest]) ++ m4_if([$2], [], [], ++ [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) ++ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl unix:socket $3], ++ [0], [stdout], [ignore]) ++ AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), ++ [0], [$4]) ++ OVSDB_SERVER_SHUTDOWN ++ AT_CLEANUP]) ++ + # same as OVSDB_CHECK_IDL but uses tcp. + m4_define([OVSDB_CHECK_IDL_TCP_C], + [AT_SETUP([$1 - C - tcp]) +@@ -257,6 +271,7 @@ m4_define([OVSDB_CHECK_IDL_SSL_PY], + + m4_define([OVSDB_CHECK_IDL], + [OVSDB_CHECK_IDL_C($@) ++ OVSDB_CHECK_IDL_WRITE_CHANGED_ONLY_C($@) + OVSDB_CHECK_IDL_TCP_C($@) + OVSDB_CHECK_IDL_TCP6_C($@) + OVSDB_CHECK_IDL_PY($@) +@@ -1166,8 +1181,22 @@ m4_define([OVSDB_CHECK_IDL_TRACK_C], + OVSDB_SERVER_SHUTDOWN + AT_CLEANUP]) + ++m4_define([OVSDB_CHECK_IDL_TRACK_WRITE_CHANGED_ONLY_C], ++ [AT_SETUP([$1 - write-changed-only - C]) ++ AT_KEYWORDS([ovsdb server idl tracking positive $5]) ++ AT_CHECK([ovsdb_start_idltest]) ++ m4_if([$2], [], [], ++ [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) ++ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 -c -w idl unix:socket $3], ++ [0], [stdout], [ignore]) ++ AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), ++ [0], [$4]) ++ OVSDB_SERVER_SHUTDOWN ++ AT_CLEANUP]) ++ + m4_define([OVSDB_CHECK_IDL_TRACK], +- [OVSDB_CHECK_IDL_TRACK_C($@)]) ++ [OVSDB_CHECK_IDL_TRACK_C($@) ++ OVSDB_CHECK_IDL_TRACK_WRITE_CHANGED_ONLY_C($@)]) + + OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated], + [['["idltest", +diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c +index ca4e87b81..0fef1f978 100644 +--- a/tests/test-ovsdb.c ++++ b/tests/test-ovsdb.c +@@ -56,6 +56,7 @@ + VLOG_DEFINE_THIS_MODULE(test_ovsdb); + + struct test_ovsdb_pvt_context { ++ bool write_changed_only; + bool track; + }; + +@@ -91,6 +92,7 @@ parse_options(int argc, char *argv[], struct test_ovsdb_pvt_context *pvt) + {"timeout", required_argument, NULL, 't'}, + {"verbose", optional_argument, NULL, 'v'}, + {"change-track", optional_argument, NULL, 'c'}, ++ {"write-changed-only", optional_argument, NULL, 'w'}, + {"magic", required_argument, NULL, OPT_MAGIC}, + {"no-rename-open-files", no_argument, NULL, OPT_NO_RENAME_OPEN_FILES}, + {"help", no_argument, NULL, 'h'}, +@@ -125,6 +127,10 @@ parse_options(int argc, char *argv[], struct test_ovsdb_pvt_context *pvt) + pvt->track = true; + break; + ++ case 'w': ++ pvt->write_changed_only = true; ++ break; ++ + case OPT_MAGIC: + magic = optarg; + break; +@@ -2610,6 +2616,7 @@ update_conditions(struct ovsdb_idl *idl, char *commands) + static void + do_idl(struct ovs_cmdl_context *ctx) + { ++ struct test_ovsdb_pvt_context *pvt = ctx->pvt; + struct jsonrpc *rpc; + struct ovsdb_idl *idl; + unsigned int seqno = 0; +@@ -2618,9 +2625,6 @@ do_idl(struct ovs_cmdl_context *ctx) + int step = 0; + int error; + int i; +- bool track; +- +- track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track; + + idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true); + ovsdb_idl_set_leader_only(idl, false); +@@ -2637,10 +2641,14 @@ do_idl(struct ovs_cmdl_context *ctx) + rpc = NULL; + } + +- if (track) { ++ if (pvt->track) { + ovsdb_idl_track_add_all(idl); + } + ++ if (pvt->write_changed_only) { ++ ovsdb_idl_set_write_changed_only_all(idl, true); ++ } ++ + setvbuf(stdout, NULL, _IONBF, 0); + + symtab = ovsdb_symbol_table_create(); +@@ -2683,7 +2691,7 @@ do_idl(struct ovs_cmdl_context *ctx) + } + + /* Print update. */ +- if (track) { ++ if (pvt->track) { + print_idl_track(idl, step++, terse); + ovsdb_idl_track_clear(idl); + } else { +-- +2.36.1 + diff --git a/debian/patches/series b/debian/patches/series index 87a2a1d97..43c4c269c 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1 +1,2 @@ ovs-ctl-ipsec.patch +0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch -- GitLab From 1abaa10d471c77dcc40ee4458b5c410a9b93c768 Mon Sep 17 00:00:00 2001 From: Frode Nordahl Date: Mon, 1 Aug 2022 15:56:20 +0200 Subject: [PATCH 2/2] changelog: add note about patch for OVN build dep Signed-off-by: Frode Nordahl --- debian/changelog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/debian/changelog b/debian/changelog index 4e30df7ba..830942ef9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +openvswitch (2.17.2-3) experimental; urgency=medium + + * d/p/0001-ovsdb-idl-Support-write-only-changed-IDL-monitor-mod.patch: + Cherry pick feature to improve scalability and performance which is + required for OVN 22.03.1 (LP: #1980809). + + -- Frode Nordahl Mon, 01 Aug 2022 15:55:37 +0200 + openvswitch (2.17.2-2) experimental; urgency=medium * d/rules: exclude new tests that are failing on hppa -- GitLab