diff --git a/.travis/linux-build.sh b/.ci/linux-build.sh
similarity index 76%
rename from .travis/linux-build.sh
rename to .ci/linux-build.sh
index bb47b3ee190f5b095634257c470c0be4ee27edbf..3e5136fd4e118abda11a04db3d2e2f909fd135c9 100755
--- a/.travis/linux-build.sh
+++ b/.ci/linux-build.sh
@@ -6,7 +6,6 @@ set -x
CFLAGS_FOR_OVS="-g -O2"
SPARSE_FLAGS=""
EXTRA_OPTS="--enable-Werror"
-TARGET="x86_64-native-linuxapp-gcc"
function install_kernel()
{
@@ -35,7 +34,9 @@ function install_kernel()
url="${base_url}/linux-${version}.tar.xz"
# Download kernel sources. Try direct link on CDN failure.
- wget ${url} || wget ${url} || wget ${url/cdn/www}
+ wget ${url} ||
+ (rm -f linux-${version}.tar.xz && wget ${url}) ||
+ (rm -f linux-${version}.tar.xz && wget ${url/cdn/www})
tar xvf linux-${version}.tar.xz > /dev/null
pushd linux-${version}
@@ -86,6 +87,28 @@ function install_dpdk()
{
local DPDK_VER=$1
local VERSION_FILE="dpdk-dir/travis-dpdk-cache-version"
+ local DPDK_OPTS=""
+ local DPDK_LIB=""
+
+ if [ -z "$TRAVIS_ARCH" ] ||
+ [ "$TRAVIS_ARCH" == "amd64" ]; then
+ DPDK_LIB=$(pwd)/dpdk-dir/build/lib/x86_64-linux-gnu
+ elif [ "$TRAVIS_ARCH" == "aarch64" ]; then
+ DPDK_LIB=$(pwd)/dpdk-dir/build/lib/aarch64-linux-gnu
+ else
+ echo "Target is unknown"
+ exit 1
+ fi
+
+ if [ "$DPDK_SHARED" ]; then
+ EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=shared"
+ export LD_LIBRARY_PATH=$DPDK_LIB/:$LD_LIBRARY_PATH
+ else
+ EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=static"
+ fi
+
+ # Export the following path for pkg-config to find the .pc file.
+ export PKG_CONFIG_PATH=$DPDK_LIB/pkgconfig/:$PKG_CONFIG_PATH
if [ "${DPDK_VER##refs/*/}" != "${DPDK_VER}" ]; then
# Avoid using cache for git tree build.
@@ -99,7 +122,8 @@ function install_dpdk()
if [ -f "${VERSION_FILE}" ]; then
VER=$(cat ${VERSION_FILE})
if [ "${VER}" = "${DPDK_VER}" ]; then
- EXTRA_OPTS="${EXTRA_OPTS} --with-dpdk=$(pwd)/dpdk-dir/build"
+ # Update the library paths.
+ sudo ldconfig
echo "Found cached DPDK ${VER} build in $(pwd)/dpdk-dir"
return
fi
@@ -113,23 +137,24 @@ function install_dpdk()
pushd dpdk-dir
fi
- make config CC=gcc T=$TARGET
+ # Switching to 'default' machine to make dpdk-dir cache usable on
+ # different CPUs. We can't be sure that all CI machines are exactly same.
+ DPDK_OPTS="$DPDK_OPTS -Dmachine=default"
- if [ "$DPDK_SHARED" ]; then
- sed -i '/CONFIG_RTE_BUILD_SHARED_LIB=n/s/=n/=y/' build/.config
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/$TARGET/lib
- fi
+ # Disable building DPDK unit tests. Not needed for OVS build or tests.
+ DPDK_OPTS="$DPDK_OPTS -Dtests=false"
+
+ # Install DPDK using prefix.
+ DPDK_OPTS="$DPDK_OPTS --prefix=$(pwd)/build"
- # Disable building DPDK kernel modules. Not needed for OVS build or tests.
- sed -i '/CONFIG_RTE_EAL_IGB_UIO=y/s/=y/=n/' build/.config
- sed -i '/CONFIG_RTE_KNI_KMOD=y/s/=y/=n/' build/.config
+ CC=gcc meson $DPDK_OPTS build
+ ninja -C build
+ ninja -C build install
+
+ # Update the library paths.
+ sudo ldconfig
- # Enable pdump support in DPDK.
- sed -i '/CONFIG_RTE_LIBRTE_PMD_PCAP=n/s/=n/=y/' build/.config
- sed -i '/CONFIG_RTE_LIBRTE_PDUMP=n/s/=n/=y/' build/.config
- make -j4 CC=gcc EXTRA_CFLAGS='-fPIC'
- EXTRA_OPTS="$EXTRA_OPTS --with-dpdk=$(pwd)/build"
echo "Installed DPDK source in $(pwd)"
popd
echo "${DPDK_VER}" > ${VERSION_FILE}
@@ -159,17 +184,26 @@ function build_ovs()
fi
}
+if [ "$DEB_PACKAGE" ]; then
+ mk-build-deps --install --root-cmd sudo --remove debian/control
+ dpkg-checkbuilddeps
+ DEB_BUILD_OPTIONS='parallel=4 nocheck' fakeroot debian/rules binary
+ # Not trying to install ipsec package as there are issues with system-wide
+ # installed python3-openvswitch package and the pyenv used by Travis.
+ packages=$(ls $(pwd)/../*.deb | grep -v ipsec)
+ sudo apt install ${packages}
+ exit 0
+fi
+
if [ "$KERNEL" ]; then
install_kernel $KERNEL
fi
if [ "$DPDK" ] || [ "$DPDK_SHARED" ]; then
if [ -z "$DPDK_VER" ]; then
- DPDK_VER="19.11"
+ DPDK_VER="20.11"
fi
install_dpdk $DPDK_VER
- # Enable pdump support in OVS.
- EXTRA_OPTS="${EXTRA_OPTS} --enable-dpdk-pdump"
if [ "$CC" = "clang" ]; then
# Disregard cast alignment errors until DPDK is fixed
CFLAGS_FOR_OVS="${CFLAGS_FOR_OVS} -Wno-cast-align"
@@ -183,7 +217,7 @@ elif [ "$M32" ]; then
# Adding m32 flag directly to CC to avoid any posiible issues with API/ABI
# difference on 'configure' and 'make' stages.
export CC="$CC -m32"
-else
+elif [ "$TRAVIS_ARCH" != "aarch64" ]; then
OPTS="--enable-sparse"
if [ "$AFXDP" ]; then
# netdev-afxdp uses memset for 64M for umem initialization.
diff --git a/.ci/linux-prepare.sh b/.ci/linux-prepare.sh
new file mode 100755
index 0000000000000000000000000000000000000000..69a40011f4c535b65ff072cd294d91d003004095
--- /dev/null
+++ b/.ci/linux-prepare.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+set -ev
+
+if [ "$DEB_PACKAGE" ]; then
+ # We're not using sparse for debian packages, tests are skipped and
+ # all extra dependencies tracked by mk-build-deps.
+ exit 0
+fi
+
+# Build and install sparse.
+#
+# Explicitly disable sparse support for llvm because some travis
+# environments claim to have LLVM (llvm-config exists and works) but
+# linking against it fails.
+# Disabling sqlite support because sindex build fails and we don't
+# really need this utility being installed.
+git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git
+cd sparse
+make -j4 HAVE_LLVM= HAVE_SQLITE= install
+cd ..
+
+pip3 install --disable-pip-version-check --user flake8 hacking
+pip3 install --user --upgrade docutils
+pip3 install --user 'meson==0.47.1'
+
+if [ "$M32" ]; then
+ # Installing 32-bit libraries.
+ pkgs="gcc-multilib"
+ if [ -z "$GITHUB_WORKFLOW" ]; then
+ # 32-bit and 64-bit libunwind can not be installed at the same time.
+ # This will remove the 64-bit libunwind and install 32-bit version.
+ # GitHub Actions doesn't have 32-bit versions of these libs.
+ pkgs=$pkgs" libunwind-dev:i386 libunbound-dev:i386"
+ fi
+
+ sudo apt-get install -y $pkgs
+fi
+
+# IPv6 is supported by kernel but disabled in TravisCI images:
+# https://github.com/travis-ci/travis-ci/issues/8891
+# Enable it to avoid skipping of IPv6 related tests.
+sudo sysctl -w net.ipv6.conf.all.disable_ipv6=0
diff --git a/.travis/osx-build.sh b/.ci/osx-build.sh
similarity index 100%
rename from .travis/osx-build.sh
rename to .ci/osx-build.sh
diff --git a/.travis/osx-prepare.sh b/.ci/osx-prepare.sh
similarity index 100%
rename from .travis/osx-prepare.sh
rename to .ci/osx-prepare.sh
diff --git a/.cirrus.yml b/.cirrus.yml
index 1b32f55d65a84ff8e1c7bb85219bd7d0ec620bfe..263c2cd7ed54657aff6df42336629e4b0f7ae85c 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -3,7 +3,7 @@ freebsd_build_task:
freebsd_instance:
matrix:
image_family: freebsd-12-1-snap
- image_family: freebsd-11-3-snap
+ image_family: freebsd-11-4-snap
cpu: 4
memory: 8G
@@ -16,6 +16,7 @@ freebsd_build_task:
prepare_script:
- sysctl -w kern.coredump=0
+ - pkg update -f
- pkg install -y ${DEPENDENCIES}
configure_script:
diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml
new file mode 100644
index 0000000000000000000000000000000000000000..b29c300c5533fde97b482d884576d985e9de65f0
--- /dev/null
+++ b/.github/workflows/build-and-test.yml
@@ -0,0 +1,206 @@
+name: Build and Test
+
+on: [push, pull_request]
+
+jobs:
+ build-linux:
+ env:
+ dependencies: |
+ automake libtool gcc bc libjemalloc1 libjemalloc-dev \
+ libssl-dev llvm-dev libelf-dev libnuma-dev libpcap-dev \
+ ninja-build python3-openssl python3-pip \
+ python3-setuptools python3-sphinx python3-wheel \
+ selinux-policy-dev
+ deb_dependencies: |
+ linux-headers-$(uname -r) build-essential fakeroot devscripts equivs
+ AFXDP: ${{ matrix.afxdp }}
+ CC: ${{ matrix.compiler }}
+ DEB_PACKAGE: ${{ matrix.deb_package }}
+ DPDK: ${{ matrix.dpdk }}
+ DPDK_SHARED: ${{ matrix.dpdk_shared }}
+ KERNEL: ${{ matrix.kernel }}
+ KERNEL_LIST: ${{ matrix.kernel_list }}
+ LIBS: ${{ matrix.libs }}
+ M32: ${{ matrix.m32 }}
+ OPTS: ${{ matrix.opts }}
+ TESTSUITE: ${{ matrix.testsuite }}
+
+ name: linux ${{ join(matrix.*, ' ') }}
+ runs-on: ubuntu-18.04
+ timeout-minutes: 30
+
+ strategy:
+ fail-fast: false
+ matrix:
+ include:
+ - compiler: gcc
+ opts: --disable-ssl
+ - compiler: clang
+ opts: --disable-ssl
+
+ - compiler: gcc
+ testsuite: test
+ kernel: 3.16
+ - compiler: clang
+ testsuite: test
+ kernel: 3.16
+
+ - compiler: gcc
+ testsuite: test
+ opts: --enable-shared
+ - compiler: clang
+ testsuite: test
+ opts: --enable-shared
+
+ - compiler: gcc
+ testsuite: test
+ dpdk: dpdk
+ - compiler: clang
+ testsuite: test
+ dpdk: dpdk
+
+ - compiler: gcc
+ testsuite: test
+ libs: -ljemalloc
+ - compiler: clang
+ testsuite: test
+ libs: -ljemalloc
+
+ - compiler: gcc
+ kernel_list: 5.8 5.5 5.4 4.19
+ - compiler: clang
+ kernel_list: 5.8 5.5 5.4 4.19
+
+ - compiler: gcc
+ kernel_list: 4.14 4.9 4.4 3.16
+ - compiler: clang
+ kernel_list: 4.14 4.9 4.4 3.16
+
+ - compiler: gcc
+ afxdp: afxdp
+ kernel: 5.3
+ - compiler: clang
+ afxdp: afxdp
+ kernel: 5.3
+
+ - compiler: gcc
+ dpdk: dpdk
+ opts: --enable-shared
+ - compiler: clang
+ dpdk: dpdk
+ opts: --enable-shared
+
+ - compiler: gcc
+ dpdk_shared: dpdk-shared
+ - compiler: clang
+ dpdk_shared: dpdk-shared
+
+ - compiler: gcc
+ dpdk_shared: dpdk-shared
+ opts: --enable-shared
+ - compiler: clang
+ dpdk_shared: dpdk-shared
+ opts: --enable-shared
+
+ - compiler: gcc
+ m32: m32
+ opts: --disable-ssl
+
+ - compiler: gcc
+ deb_package: deb
+
+ steps:
+ - name: checkout
+ uses: actions/checkout@v2
+
+ - name: create ci signature file for the dpdk cache key
+ if: matrix.dpdk != '' || matrix.dpdk_shared != ''
+ # This will collect most of DPDK related lines, so hash will be different
+ # if something changed in a way we're building DPDK including DPDK_VER.
+ # This also allows us to use cache from any branch as long as version
+ # and a way we're building DPDK stays the same.
+ run: |
+ grep -irE 'RTE_|DPDK|meson|ninja' -r .ci/ > dpdk-ci-signature
+ cat dpdk-ci-signature
+
+ - name: cache
+ if: matrix.dpdk != '' || matrix.dpdk_shared != ''
+ uses: actions/cache@v2
+ env:
+ matrix_key: ${{ matrix.dpdk }}${{ matrix.dpdk_shared }}
+ ci_key: ${{ hashFiles('dpdk-ci-signature') }}
+ with:
+ path: dpdk-dir
+ key: ${{ env.matrix_key }}-${{ env.ci_key }}
+
+ - name: update APT cache
+ run: sudo apt update
+ - name: install common dependencies
+ if: matrix.deb_package == ''
+ run: sudo apt install -y ${{ env.dependencies }}
+ - name: install dependencies for debian packages
+ if: matrix.deb_package != ''
+ run: sudo apt install -y ${{ env.deb_dependencies }}
+ - name: install libunbound libunwind
+ if: matrix.m32 == ''
+ run: sudo apt install -y libunbound-dev libunwind-dev
+
+ - name: prepare
+ run: ./.ci/linux-prepare.sh
+
+ - name: build
+ run: PATH="$PATH:$HOME/bin:$HOME/.local/bin" ./.ci/linux-build.sh
+
+ - name: upload deb packages
+ if: matrix.deb_package != ''
+ uses: actions/upload-artifact@v2
+ with:
+ name: deb-packages
+ path: '/home/runner/work/ovs/*.deb'
+
+ - name: copy logs on failure
+ if: failure() || cancelled()
+ run: |
+ # upload-artifact@v2 throws exceptions if it tries to upload socket
+ # files and we could have some socket files in testsuite.dir.
+ # Also, upload-artifact@v2 doesn't work well enough with wildcards.
+ # So, we're just archiving everything here to avoid any issues.
+ mkdir logs
+ cp config.log ./logs/
+ cp -r ./*/_build/sub/tests/testsuite.* ./logs/ || true
+ tar -czvf logs.tgz logs/
+
+ - name: upload logs on failure
+ if: failure() || cancelled()
+ uses: actions/upload-artifact@v2
+ with:
+ name: logs-linux-${{ join(matrix.*, '-') }}
+ path: logs.tgz
+
+ build-osx:
+ env:
+ CC: clang
+ OPTS: --disable-ssl
+
+ name: osx clang --disable-ssl
+ runs-on: macos-latest
+ timeout-minutes: 30
+
+ strategy:
+ fail-fast: false
+
+ steps:
+ - name: checkout
+ uses: actions/checkout@v2
+ - name: install dependencies
+ run: brew install automake libtool
+ - name: prepare
+ run: ./.ci/osx-prepare.sh
+ - name: build
+ run: PATH="$PATH:$HOME/bin" ./.ci/osx-build.sh
+ - name: upload logs on failure
+ if: failure()
+ uses: actions/upload-artifact@v2
+ with:
+ name: logs-osx-clang---disable-ssl
+ path: config.log
diff --git a/.gitignore b/.gitignore
index 2ac9cdac75121e6720a5201db443bb48e0474de8..f1cdcf124f2bc8cd91ea80459f13ae0faee8d47a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,7 @@
/docs-check
/install-sh
/libtool
+/manpages.mk
/manpage-check
/missing
/missing-distfiles
diff --git a/.mailmap b/.mailmap
index 894062d48293b8df3d2142ec776c7a7ffe19cb33..dc3b2094df92e06b7397d1e8fa9bafb3d78ca839 100644
--- a/.mailmap
+++ b/.mailmap
@@ -31,6 +31,7 @@ Chandra Sekhar Vejendla
Daniele Di Proietto
Daniele Di Proietto
Ed Maste
+Eli Britstein
Ethan J. Jackson
Fischetti, Antonio
Flavio Fernandes
@@ -54,6 +55,7 @@ Justin Pettit
Kmindg
Kyle Mestery
Lance Richardson
+Mark Gray
Mauricio Vasquez
Miguel Angel Ajo
Neil McKee
@@ -65,6 +67,7 @@ Ralf Spenneberg
Rami Rosen
Ramu Ramamurthy
Robert Åkerblom-Andersson
+Roi Dayan
Romain Lenglet
Romain Lenglet
Russell Bryant
diff --git a/.travis.yml b/.travis.yml
index abd2a9117a331a4fdf49028cadc2c327c5d55ed9..51d051108091bb1ae3d9af2c8c11853982b3224c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,7 +1,4 @@
language: c
-compiler:
- - gcc
- - clang
os:
- linux
@@ -27,32 +24,33 @@ addons:
- selinux-policy-dev
- libunbound-dev
- libunwind-dev
+ - python3-setuptools
+ - python3-wheel
+ - ninja-build
-before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
+before_install: ./.ci/${TRAVIS_OS_NAME}-prepare.sh
before_script: export PATH=$PATH:$HOME/bin
-env:
- - OPTS="--disable-ssl"
- - TESTSUITE=1 KERNEL=3.16
- - TESTSUITE=1 OPTS="--enable-shared"
- - TESTSUITE=1 DPDK=1
- - TESTSUITE=1 LIBS=-ljemalloc
- - KERNEL_LIST="5.0 4.20 4.19 4.18 4.17 4.16"
- - KERNEL_LIST="4.15 4.14 4.9 4.4 3.19 3.16"
- - AFXDP=1 KERNEL=5.3
- - M32=1 OPTS="--disable-ssl"
- - DPDK=1 OPTS="--enable-shared"
- - DPDK_SHARED=1
- - DPDK_SHARED=1 OPTS="--enable-shared"
-
matrix:
include:
- - os: osx
+ - arch: arm64
+ compiler: gcc
+ env: TESTSUITE=1 DPDK=1
+ - arch: arm64
+ compiler: gcc
+ env: KERNEL_LIST="5.5 4.19"
+ - arch: arm64
+ compiler: gcc
+ env: KERNEL_LIST="4.9 3.16"
+ - arch: arm64
+ compiler: gcc
+ env: DPDK_SHARED=1
+ - arch: arm64
compiler: clang
env: OPTS="--disable-ssl"
-script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS
+script: ./.ci/${TRAVIS_OS_NAME}-build.sh $OPTS
notifications:
email:
diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh
deleted file mode 100755
index fda13e7d21a87dcbba7b441385ead8c41906ead8..0000000000000000000000000000000000000000
--- a/.travis/linux-prepare.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-set -ev
-
-# Build and install sparse.
-#
-# Explicitly disable sparse support for llvm because some travis
-# environments claim to have LLVM (llvm-config exists and works) but
-# linking against it fails.
-git clone git://git.kernel.org/pub/scm/devel/sparse/sparse.git
-cd sparse
-make -j4 HAVE_LLVM= install
-cd ..
-
-pip3 install --disable-pip-version-check --user flake8 hacking
-pip3 install --user --upgrade docutils
-
-if [ "$M32" ]; then
- # Installing 32-bit libraries.
- # 32-bit and 64-bit libunwind can not be installed at the same time.
- # This will remove the 64-bit libunwind and install 32-bit version.
- sudo apt-get install -y \
- libunwind-dev:i386 libunbound-dev:i386 gcc-multilib
-fi
-
-# IPv6 is supported by kernel but disabled in TravisCI images:
-# https://github.com/travis-ci/travis-ci/issues/8891
-# Enable it to avoid skipping of IPv6 related tests.
-sudo sysctl -w net.ipv6.conf.all.disable_ipv6=0
diff --git a/AUTHORS.rst b/AUTHORS.rst
index fe3935fca24c78bd396ce8efc178c558fd306efd..4137a25f59e55a9579dc02733461069737a50e0d 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -33,6 +33,7 @@ Name Email
================================== ===============================================
Aaron Conole aconole@redhat.com
Aaron Rosen arosen@clemson.edu
+Adrian Moreno amorenoz@redhat.com
Alan Pevec alan.pevec@redhat.com
Alessandro Pilotti apilotti@cloudbasesolutions.com
Alexander Duyck alexander.h.duyck@redhat.com
@@ -60,6 +61,7 @@ Andy Zhou azhou@ovn.org
Ankur Sharma ankursharma@vmware.com
Anoob Soman anoob.soman@citrix.com
Ansis Atteka aatteka@vmware.com
+Anton Ivanov anton.ivanov@cambridgegreys.com
Antonio Fischetti antonio.fischetti@intel.com
Anupam Chanda
Ariel Tubaltsev atubaltsev@vmware.com
@@ -78,6 +80,7 @@ Bert Vermeulen bert@biot.com
Bhanuprakash Bodireddy bhanuprakash.bodireddy@intel.com
Billy O'Mahony billy.o.mahony@intel.com
Binbin Xu xu.binbin1@zte.com.cn
+Boleslaw Tokarski boleslaw.tokarski@jollamobile.com
Brian Haley haleyb.dev@gmail.com
Brian Kruger bkruger+ovsdev@gmail.com
Bruce Davie bdavie@vmware.com
@@ -127,7 +130,8 @@ Ed Maste emaste@freebsd.org
Ed Swierk eswierk@skyportsystems.com
Edouard Bourguignon madko@linuxed.net
Eelco Chaudron echaudro@redhat.com
-Eli Britstein elibr@mellanox.com
+Eiichi Tsukata eiichi.tsukata@nutanix.com
+Eli Britstein elibr@nvidia.com
Emma Finn emma.finn@intel.com
Eric Lapointe elapointe@corsa.com
Esteban Rodriguez Betancourt estebarb@hpe.com
@@ -141,6 +145,7 @@ Eric Sesterhenn eric.sesterhenn@lsexperts.de
Ethan J. Jackson ejj@eecs.berkeley.edu
Ethan Rahn erahn@arista.com
Eziz Durdyyev ezizdurdy@gmail.com
+Fabrizio D'Angelo fdangelo@redhat.com
Flavio Fernandes flavio@flaviof.com
Flavio Leitner fbl@redhat.com
Francesco Fusco ffusco@redhat.com
@@ -189,10 +194,12 @@ Jason Kölker jason@koelker.net
Jason Wessel jason.wessel@windriver.com
Jasper Capel jasper@capel.tv
Jean Tourrilhes jt@hpl.hp.com
+Jeff Squyres jsquyres@cisco.com
Jeremy Stribling
Jeroen van Bemmel jvb127@gmail.com
Jesse Gross jesse@kernel.org
Jian Li lijian@ooclab.com
+Jiang Lidong jianglidong3@jd.com
Jianbo Liu jianbol@mellanox.com
Jing Ai jinga@google.com
Jiri Benc jbenc@redhat.com
@@ -250,7 +257,7 @@ Madhu Challa challa@noironetworks.com
Manohar K C manukc@gmail.com
Marcin Mirecki mmirecki@redhat.com
Mario Cabrera mario.cabrera@hpe.com
-Mark D. Gray mark.d.gray@intel.com
+Mark D. Gray mark.d.gray@redhat.com
Mark Hamilton
Mark Kavanagh mark.b.kavanagh81@gmail.com
Mark Maglana mmaglana@gmail.com
@@ -259,6 +266,7 @@ Markos Chandras mchandras@suse.de
Martin Casado casado@cs.stanford.edu
Martin Fong mwfong@csl.sri.com
Martino Fornasa mf@fornasa.it
+Martin Varghese martin.varghese@nokia.com
Martin Xu martinxu9.ovs@gmail.com
Martin Zhang martinbj2008@gmail.com
Maryam Tahhan maryam.tahhan@intel.com
@@ -299,6 +307,7 @@ Paul Fazzone pfazzone@vmware.com
Paul Ingram
Paul-Emmanuel Raoul skyper@skyplabs.net
Pavithra Ramesh paramesh@vmware.com
+Peng He hepeng.0320@bytedance.com
Peter Downs padowns@gmail.com
Philippe Jung phil.jung@free.fr
Pim van den Berg pim@nethuis.nl
@@ -315,6 +324,7 @@ Ravi Kerur Ravi.Kerur@telekom.com
Raymond Burkholder ray@oneunified.net
Reid Price
Remko Tronçon git@el-tramo.be
+Renat Nurgaliyev impleman@gmail.com
Rich Lane rlane@bigswitch.com
Richard Oliver richard@richard-oliver.co.uk
Rishi Bamba rishi.bamba@tcs.com
@@ -323,7 +333,7 @@ Robert Åkerblom-Andersson Robert.nr1@gmail.com
Robert Wojciechowicz robertx.wojciechowicz@intel.com
Rob Hoes rob.hoes@citrix.com
Rohith Basavaraja rohith.basavaraja@gmail.com
-Roi Dayan roid@mellanox.com
+Roi Dayan roid@nvidia.com
Róbert Mulik robert.mulik@ericsson.com
Romain Lenglet romain.lenglet@berabera.info
Roni Bar Yanai roniba@mellanox.com
@@ -352,7 +362,9 @@ Shih-Hao Li shihli@vmware.com
Shu Shen shu.shen@radisys.com
Simon Horman horms@verge.net.au
Simon Horman simon.horman@netronome.com
+Sivaprasad Tummala sivaprasad.tummala@intel.com
Sorin Vinturis svinturis@cloudbasesolutions.com
+Sriharsha Basavapatna sriharsha.basavapatna@broadcom.com
Steffen Gebert steffen.gebert@informatik.uni-wuerzburg.de
Sten Spans sten@blinkenlights.nl
Stephane A. Sezer sas@cd80.net
@@ -382,6 +394,7 @@ Tuan Nguyen tuan.nguyen@veriksystems.com
Tyler Coumbes coumbes@gmail.com
Tony van der Peet tony.vanderpeet@alliedtelesis.co.nz
Tonghao Zhang xiangxia.m.yue@gmail.com
+Usman Ansari ua1422@gmail.com
Valient Gough vgough@pobox.com
Venkata Anil Kommaddi vkommadi@redhat.com
Vishal Deep Ajmera vishal.deep.ajmera@ericsson.com
@@ -400,6 +413,7 @@ xu rong xu.rong@zte.com.cn
YAMAMOTO Takashi yamamoto@midokura.com
Yanqin Wei Yanqin.Wei@arm.com
Yasuhito Takamiya yasuhito@gmail.com
+Yi Li yili@winhong.com
Yi Yang yangyi01@inspur.com
Yi-Hung Wei yihung.wei@gmail.com
Yifeng Sun pkusunyifeng@gmail.com
@@ -419,6 +433,7 @@ Zhenyu Gao sysugaozhenyu@gmail.com
ZhiPeng Lu luzhipeng@uniudc.com
Zhou Yangchao 1028519445@qq.com
aginwala amginwal@gmail.com
+lzhecheng lzhecheng@vmware.com
parameswaran krishnamurthy parkrish@gmail.com
solomon liwei.solomon@gmail.com
wenxu wenxu@ucloud.cn
@@ -496,6 +511,7 @@ Edwin Chiu echiu@vmware.com
Eivind Bulie Haanaes
Enas Ahmad enas.ahmad@kaust.edu.sa
Eric Lopez
+Frank Wang (王培辉) wangpeihui@inspur.com
Frido Roose fr.roose@gmail.com
Gaetano Catalli gaetano.catalli@gmail.com
Gavin Remaley gavin_remaley@selinc.com
@@ -558,6 +574,7 @@ Krishna Miriyala miriyalak@vmware.com
Krishna Mohan Elluru elluru.kri.mohan@hpe.com
László Sürü laszlo.suru@ericsson.com
Len Gao leng@vmware.com
+Linhaifeng haifeng.lin@huawei.com
Logan Rosen logatronico@gmail.com
Luca Falavigna dktrkranz@debian.org
Luiz Henrique Ozaki luiz.ozaki@gmail.com
@@ -655,6 +672,7 @@ Ying Chen yingchen@vmware.com
Yongqiang Liu liuyq7809@gmail.com
ZHANG Zhiming zhangzhiming@yunshan.net.cn
Zhangguanghui zhang.guanghui@h3c.com
+Zheng Jingzhou glovejmm@163.com
Ziyou Wang ziyouw@vmware.com
ankur dwivedi ankurengg2003@gmail.com
chen zhang 3zhangchen9211@gmail.com
diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index 22976a3cd6cc510538c1c60e43e9a0b7a9a39091..ea3475f3567e88df00c53a72214d98d69d612d3a 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -36,11 +36,9 @@ DOC_SOURCE = \
Documentation/topics/dpdk/bridge.rst \
Documentation/topics/dpdk/jumbo-frames.rst \
Documentation/topics/dpdk/memory.rst \
- Documentation/topics/dpdk/pdump.rst \
Documentation/topics/dpdk/phy.rst \
Documentation/topics/dpdk/pmd.rst \
Documentation/topics/dpdk/qos.rst \
- Documentation/topics/dpdk/ring.rst \
Documentation/topics/dpdk/vdev.rst \
Documentation/topics/dpdk/vhost-user.rst \
Documentation/topics/fuzzing/index.rst \
@@ -90,6 +88,7 @@ DOC_SOURCE = \
Documentation/faq/terminology.rst \
Documentation/faq/vlan.rst \
Documentation/faq/vxlan.rst \
+ Documentation/faq/bareudp.rst \
Documentation/internals/index.rst \
Documentation/internals/authors.rst \
Documentation/internals/bugs.rst \
diff --git a/Documentation/faq/bareudp.rst b/Documentation/faq/bareudp.rst
new file mode 100644
index 0000000000000000000000000000000000000000..026b73013bb6f232d65bca08d9fedb91dee01209
--- /dev/null
+++ b/Documentation/faq/bareudp.rst
@@ -0,0 +1,82 @@
+..
+ Licensed under the Apache License, Version 2.0 (the "License"); you may
+ not use this file except in compliance with the License. You may obtain
+ a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ License for the specific language governing permissions and limitations
+ under the License.
+
+ Convention for heading levels in Open vSwitch documentation:
+
+ ======= Heading 0 (reserved for the title in a document)
+ ------- Heading 1
+ ~~~~~~~ Heading 2
+ +++++++ Heading 3
+ ''''''' Heading 4
+
+ Avoid deeper levels because they do not render well.
+
+=======
+Bareudp
+=======
+
+Q: What is Bareudp?
+
+ A: There are various L3 encapsulation standards using UDP being discussed
+ to leverage the UDP based load balancing capability of different
+ networks. MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among
+ them.
+
+ The Bareudp tunnel provides a generic L3 encapsulation support for
+ tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP
+ tunnel.
+
+ An example to create bareudp device to tunnel MPLS unicast traffic is
+ given below.::
+
+ $ ovs-vsctl add-port br0 mpls_udp_port -- set interface udp_port \
+ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \
+ options:payload_type=0x8847 options:dst_port=6635
+
+ The option payload_type specifies the ethertype of the l3 protocol which
+ the bareudp device will be tunnelling.
+
+ The bareudp device supports special handling for MPLS & IP as they can
+ have multiple ethertypes.
+ MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) &
+ ETH_P_MPLS_MC (multicast). IP protocol can have ethertypes ETH_P_IP (v4)
+ & ETH_P_IPV6 (v6).
+
+ The bareudp device to tunnel L3 traffic with multiple ethertypes
+ (MPLS & IP) can be created by passing the L3 protocol name as string in
+ the field payload_type.
+
+ An example to create bareudp device to tunnel
+ MPLS unicast & multicast traffic is given below.::
+
+ $ ovs-vsctl add-port br0 mpls_udp_port -- set interface udp_port \
+ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \
+ options:payload_type=mpls options:dst_port=6635
+
+ The below example ovs rule shows how a bareudp tunnel port is used to
+ tunnel an MPLS packet inside a UDP tunnel.::
+
+ $ ovs-ofctl -O OpenFlow13 add-flow br0 "in_port=10,dl_type=0x0800,\
+ actions=push_mpls:0x8847,set_field:3->mpls_label,\
+ output:mpls_udp_port"
+
+ This rule does MPLS encapsulation on IP packets and sends the l3 MPLS
+ packets on a bareudp tunnel port which has its payload_type configured
+ to 0x8847.
+
+ An example to create bareudp device to tunnel
+ IPv4 & IPv6 traffic is given below.::
+
+ $ ovs-vsctl add-port br0 ip_udp_port -- set interface udp_port \
+ type=bareudp options:remote_ip=2.1.1.3 options:local_ip=2.1.1.2 \
+ options:payload_type=ip options:dst_port=6636
diff --git a/Documentation/faq/configuration.rst b/Documentation/faq/configuration.rst
index ff3b71a5d4efe97f7ec3a3e760d49afbf6cc7f00..4a98740c5d4d551a2d19fc8f63aacb91849bcfd5 100644
--- a/Documentation/faq/configuration.rst
+++ b/Documentation/faq/configuration.rst
@@ -225,6 +225,19 @@ Q: Does Open vSwitch support IPv6 GRE?
options:remote_ip=fc00:100::1 \
options:packet_type=legacy_l2
+Q: Does Open vSwitch support GTP-U?
+
+ A: Yes. Starting with version 2.13, the Open vSwitch userspace
+ datapath supports GTP-U (GPRS Tunnelling Protocol User Plane
+ (GTPv1-U)). TEID is set by using tunnel key field.
+
+ ::
+
+ $ ovs-vsctl add-br br0
+ $ ovs-vsctl add-port br0 gtpu0 -- \
+ set int gtpu0 type=gtpu options:key= \
+ options:remote_ip=172.31.1.1
+
Q: How do I connect two bridges?
A: First, why do you want to do this? Two connected bridges are not much
diff --git a/Documentation/faq/index.rst b/Documentation/faq/index.rst
index 334b828b2229d3b438cb8bdb188d097606fe99e3..1dd29986a198d1bc0cd925d9234ad862e6449bed 100644
--- a/Documentation/faq/index.rst
+++ b/Documentation/faq/index.rst
@@ -30,6 +30,7 @@ Open vSwitch FAQ
.. toctree::
:maxdepth: 2
+ bareudp
configuration
contributing
design
diff --git a/Documentation/faq/openflow.rst b/Documentation/faq/openflow.rst
index 8c94891703a8daace950bc993e77e13594f319d9..0111de78a380e1d29fa8ab5b4ee96508959d6613 100644
--- a/Documentation/faq/openflow.rst
+++ b/Documentation/faq/openflow.rst
@@ -385,7 +385,7 @@ but OVS drops the packets instead.
$ ovs-ofctl add-flow br0 actions=load:0->NXM_OF_IN_PORT[],2,3,4,5,6
If the input port is important, then one may save and restore it on the
- stack:
+ stack::
$ ovs-ofctl add-flow br0 actions=push:NXM_OF_IN_PORT[],\
load:0->NXM_OF_IN_PORT[],\
diff --git a/Documentation/faq/qos.rst b/Documentation/faq/qos.rst
index 53ad8980967169250acf3e333a59ad5a9b857986..33c319166f370b6aa9d461d9c0da0e44b273aa06 100644
--- a/Documentation/faq/qos.rst
+++ b/Documentation/faq/qos.rst
@@ -102,7 +102,7 @@ Q: How do I configure ingress policing?
A: A policing policy can be configured on an interface to drop packets that
arrive at a higher rate than the configured value. For example, the
following commands will rate-limit traffic that vif1.0 may generate to
- 10Mbps:
+ 10Mbps::
$ ovs-vsctl set interface vif1.0 ingress_policing_rate=10000
$ ovs-vsctl set interface vif1.0 ingress_policing_burst=8000
diff --git a/Documentation/faq/releases.rst b/Documentation/faq/releases.rst
index 6702c58a2b6ff7927290669d3cd04a80c8d3727f..4b96200158a56832cc449bffef94cf5b9b90351b 100644
--- a/Documentation/faq/releases.rst
+++ b/Documentation/faq/releases.rst
@@ -32,7 +32,7 @@ Q: What does it mean for an Open vSwitch release to be LTS (long-term support)?
If a significant bug is identified in an LTS release, we will provide an
updated release that includes the fix. Releases that are not LTS may not
be fixed and may just be supplanted by the next major release. The current
- LTS release is 2.5.x.
+ LTS release is 2.13.x.
For more information on the Open vSwitch release process, refer to
:doc:`/internals/release-process`.
@@ -67,9 +67,12 @@ Q: What Linux kernel versions does each Open vSwitch release work with?
2.7.x 3.10 to 4.9
2.8.x 3.10 to 4.12
2.9.x 3.10 to 4.13
- 2.10.x 3.10 to 4.17
- 2.11.x 3.10 to 4.18
- 2.12.x 3.10 to 5.0
+ 2.10.x 3.16 to 4.17
+ 2.11.x 3.16 to 4.18
+ 2.12.x 3.16 to 5.0
+ 2.13.x 3.16 to 5.0
+ 2.14.x 3.16 to 5.5
+ 2.15.x 3.16 to 5.8
============ ==============
Open vSwitch userspace should also work with the Linux kernel module built
@@ -78,6 +81,10 @@ Q: What Linux kernel versions does each Open vSwitch release work with?
Open vSwitch userspace is not sensitive to the Linux kernel version. It
should build against almost any kernel, certainly against 2.6.32 and later.
+ Open vSwitch branches 2.10 through 2.14 will still compile against the
+ RHEL and CentOS 7 3.10 based kernels since they have diverged from the
+ Linux kernel.org 3.10 kernels.
+
Q: Are all features available with all datapaths?
A: Open vSwitch supports different datapaths on different platforms. Each
@@ -117,7 +124,7 @@ Q: Are all features available with all datapaths?
========================== ============== ============== ========= =======
Connection tracking 4.3 2.5 2.6 YES
Conntrack Fragment Reass. 4.3 2.6 2.12 YES
- Conntrack Timeout Policies 5.2 2.12 NO NO
+ Conntrack Timeout Policies 5.2 2.12 2.14 NO
Conntrack Zone Limit 4.18 2.10 2.13 YES
Conntrack NAT 4.6 2.6 2.8 YES
Tunnel - LISP NO 2.11 NO NO
@@ -130,6 +137,8 @@ Q: Are all features available with all datapaths?
Tunnel - Geneve-IPv6 4.4 2.6 2.6 NO
Tunnel - ERSPAN 4.18 2.10 2.10 NO
Tunnel - ERSPAN-IPv6 4.18 2.10 2.10 NO
+ Tunnel - GTP-U NO NO 2.14 NO
+ Tunnel - Bareudp 5.7 NO NO NO
QoS - Policing YES 1.1 2.6 NO
QoS - Shaping YES 1.1 NO NO
sFlow YES 1.0 1.0 NO
@@ -138,6 +147,7 @@ Q: Are all features available with all datapaths?
NIC Bonding YES 1.0 1.0 YES
Multiple VTEPs YES 1.10 1.10 YES
Meter action 4.15 2.10 2.7 NO
+ check_pkt_len action 5.2 2.12 2.12 NO
========================== ============== ============== ========= =======
Do note, however:
@@ -173,9 +183,9 @@ Q: What DPDK version does each Open vSwitch release work with?
A: The following table lists the DPDK version against which the given
versions of Open vSwitch will successfully build.
- ============ =======
+ ============ ========
Open vSwitch DPDK
- ============ =======
+ ============ ========
2.2.x 1.6
2.3.x 1.6
2.4.x 2.0
@@ -183,11 +193,13 @@ Q: What DPDK version does each Open vSwitch release work with?
2.6.x 16.07.2
2.7.x 16.11.9
2.8.x 17.05.2
- 2.9.x 17.11.4
- 2.10.x 17.11.4
- 2.11.x 18.11.5
- 2.12.x 18.11.5
- ============ =======
+ 2.9.x 17.11.10
+ 2.10.x 17.11.10
+ 2.11.x 18.11.9
+ 2.12.x 18.11.9
+ 2.13.x 19.11.2
+ 2.14.x 19.11.2
+ ============ ========
Q: Are all the DPDK releases that OVS versions work with maintained?
@@ -203,7 +215,7 @@ Q: Are all the DPDK releases that OVS versions work with maintained?
The latest information about DPDK stable and LTS releases can be found
at `DPDK stable`_.
-.. _DPDK stable: http://dpdk.org/doc/guides/contributing/stable.html
+.. _DPDK stable: http://doc.dpdk.org/guides-20.11/contributing/stable.html
Q: I get an error like this when I configure Open vSwitch:
diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index be950d7cedb31a32f08a91569cccd500966fcc95..f0d45e47b6c08d7de897a77b83533ab51db3b93c 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -385,7 +385,7 @@ The validated NICs are:
Supported protocols for hardware offload matches are:
- L2: Ethernet, VLAN
-- L3: IPv4
+- L3: IPv4, IPv6
- L4: TCP, UDP, SCTP, ICMP
Supported actions for hardware offload are:
@@ -395,6 +395,9 @@ Supported actions for hardware offload are:
- Modification of Ethernet (mod_dl_src/mod_dl_dst).
- Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl).
- Modification of TCP/UDP (mod_tp_src/mod_tp_dst).
+- VLAN Push/Pop (push_vlan/pop_vlan).
+- Modification of IPv6 (set_field:->ipv6_src/ipv6_dst/mod_nw_ttl).
+- Clone/output (tnl_push and output) for encapsulating over a tunnel.
Further Reading
---------------
diff --git a/Documentation/howto/ipsec.rst b/Documentation/howto/ipsec.rst
index 17153ac2be680c3ec4e345d0c1cacaef93d57b39..cd93484204c83a3846632829accc6da3aab0602a 100644
--- a/Documentation/howto/ipsec.rst
+++ b/Documentation/howto/ipsec.rst
@@ -162,7 +162,7 @@ undesirable situation.
`ipsec_skb_mark`. By setting the ipsec_skb_mark as 0/1, OVS IPsec prevents
all unencrypted tunnel packets leaving the host since the default skb_mark
value for tunnel packets are 0. This affects all OVS tunnels including those
- without IPsec being set up. You can install OpenFlow rules to whitelist
+ without IPsec being set up. You can install OpenFlow rules to enable
those non-IPsec tunnels by setting the skb_mark of the tunnel traffic as
non-zero value.
diff --git a/Documentation/howto/selinux.rst b/Documentation/howto/selinux.rst
index 4809639bc5bc9126f83ada37efe7c8ca8e4fbade..f657d5e518b90b8e91ba479246ea1dd90fc22603 100644
--- a/Documentation/howto/selinux.rst
+++ b/Documentation/howto/selinux.rst
@@ -67,8 +67,8 @@ differently than SELinux.
SELinux and Open vSwitch are moving targets. What this means is that, if you
solely rely on your Linux distribution's SELinux policy, then this policy might
not have correctly anticipated that a newer Open vSwitch version needs extra
-white list rules. However, if you solely rely on SELinux policy that ships
-with Open vSwitch, then Open vSwitch developers might not have correctly
+rules to allow behavior. However, if you solely rely on SELinux policy that
+ships with Open vSwitch, then Open vSwitch developers might not have correctly
anticipated the feature set that your SELinux implementation supports.
Installation
@@ -117,7 +117,7 @@ see in Open vSwitch log files "Permission Denied" errors::
However, not all "Permission denied" errors are caused by SELinux. So, before
blaming too strict SELinux policy, make sure that indeed SELinux was the one
-that denied OVS access to certain resources, for example, run:
+that denied OVS access to certain resources, for example, run::
$ grep "openvswitch_t" /var/log/audit/audit.log | tail
type=AVC msg=audit(1453235431.640:114671): avc: denied { getopt } for pid=4583 comm="ovs-vswitchd" scontext=system_u:system_r:openvswitch_t:s0 tcontext=system_u:system_r:openvswitch_t:s0 tclass=netlink_generic_socket permissive=0
@@ -136,8 +136,8 @@ Then verify that this module was indeed loaded::
openvswitch 1.1.1
If you still see Permission denied errors, then take a look into
-``selinux/openvswitch.te.in`` file in the OVS source tree and try to add white
-list rules. This is really simple, just run SELinux audit2allow tool::
+``selinux/openvswitch.te.in`` file in the OVS source tree and try to add allow
+rules. This is really simple, just run SELinux audit2allow tool::
$ grep "openvswitch_t" /var/log/audit/audit.log | audit2allow -M ovslocal
diff --git a/Documentation/howto/tunneling.rst b/Documentation/howto/tunneling.rst
index 2645b9043e24d88a69e6c675a32f0e1e574e35f7..2cbca977ba19005fd4655b8c47b4726bdac31b5d 100644
--- a/Documentation/howto/tunneling.rst
+++ b/Documentation/howto/tunneling.rst
@@ -130,7 +130,7 @@ Create a mirrored configuration on `host2` using the same basic steps:
$ ovs-vsctl add-port br0 tap1
#. Create the GRE tunnel on `host2`, this time using the IP address for
- ``eth0`` on `host1` when specifying the ``remote_ip`` option:
+ ``eth0`` on `host1` when specifying the ``remote_ip`` option::
$ ovs-vsctl add-port br0 gre0 \
-- set interface gre0 type=gre options:remote_ip=
diff --git a/Documentation/internals/contributing/backporting-patches.rst b/Documentation/internals/contributing/backporting-patches.rst
index e8f4f271c64fef874d26d9d615f925266c819e59..162e9d209ca14a9c8dfdcdb889b419d16828243b 100644
--- a/Documentation/internals/contributing/backporting-patches.rst
+++ b/Documentation/internals/contributing/backporting-patches.rst
@@ -69,7 +69,8 @@ targeted to the `master` branch, using the ``Fixes`` tag described in
:doc:`submitting-patches`. The maintainer first applies the patch to `master`,
then backports the patch to each older affected tree, as far back as it goes or
at least to all currently supported branches. This is usually each branch back
-to the most recent LTS release branch.
+to the oldest maintained LTS release branch or the last 4 release branches if
+the oldest LTS is newer.
If the fix only affects a particular branch and not `master`, contributors
should submit the change with the target branch listed in the subject line of
diff --git a/Documentation/internals/contributing/submitting-patches.rst b/Documentation/internals/contributing/submitting-patches.rst
index 5a314cc60aa7ab4aacf6cf0f838ff7932c827226..4a6780371d4156c77c326f137d38792ffce138ca 100644
--- a/Documentation/internals/contributing/submitting-patches.rst
+++ b/Documentation/internals/contributing/submitting-patches.rst
@@ -68,11 +68,10 @@ Testing is also important:
feature. A bug fix patch should preferably add a test that would
fail if the bug recurs.
-If you are using GitHub, then you may utilize the travis-ci.org CI build system
-by linking your GitHub repository to it. This will run some of the above tests
-automatically when you push changes to your repository. See the "Continuous
-Integration with Travis-CI" in :doc:`/topics/testing` for details on how to set
-it up.
+If you are using GitHub, then you may utilize the travis-ci.org and the GitHub
+Actions CI build systems. They will run some of the above tests automatically
+when you push changes to your repository. See the "Continuous Integration with
+Travis-CI" in :doc:`/topics/testing` for details on how to set it up.
Email Subject
-------------
diff --git a/Documentation/internals/release-process.rst b/Documentation/internals/release-process.rst
index 89c11772489d538991e519f4dd9a680e278eb7ae..fb39ccb5dd1b32f786c89940e21bfb42ea9cd766 100644
--- a/Documentation/internals/release-process.rst
+++ b/Documentation/internals/release-process.rst
@@ -75,16 +75,48 @@ Scheduling`_ for the timing of each stage:
and so on. The process is the same for these additional release as for a .0
release.
-At most two release branches are formally maintained at any given time: the
-latest release and the latest release designed as LTS. An LTS release is one
-that the OVS project has designated as being maintained for a longer period of
-time. Currently, an LTS release is maintained until the next LTS is chosen.
-There is not currently a strict guideline on how often a new LTS release is
-chosen, but so far it has been about every 2 years. That could change based on
-the current state of OVS development. For example, we do not want to designate
-a new release as LTS that includes disruptive internal changes, as that may
-make it harder to support for a longer period of time. Discussion about
-choosing the next LTS release occurs on the OVS development mailing list.
+At most three release branches are formally maintained at any given time: the
+latest release, the latest release designed as LTS and a previous LTS release
+during the transition period. An LTS release is one that the OVS project has
+designated as being maintained for a longer period of time.
+Currently, an LTS release is maintained until the next major release after the
+new LTS is chosen. This one release time frame is a transition period which is
+intended for users to upgrade from old LTS to new one.
+
+New LTS release is chosen every 2 years. The process is that current latest
+stable release becomes an LTS release at the same time the next major release
+is out. That could change based on the current state of OVS development. For
+example, we do not want to designate a new release as LTS that includes
+disruptive internal changes, as that may make it harder to support for a longer
+period of time. Discussion about skipping designation of the next LTS release
+occurs on the OVS development mailing list.
+
+LTS designation schedule example (depends on current state of development):
+
++---------+--------------+--------------------------------------------------+
+| Version | Release Date | Actions |
++---------+--------------+--------------------------------------------------+
+| 2.14 | Aug 2020 | 2.14 - new latest stable, 2.13 stable ⟶ new LTS |
++---------+--------------+--------------------------------------------------+
+| 2.15 | Feb 2021 | 2.12 - new latest stable, 2.5 LTS ⟶ EOL |
++---------+--------------+--------------------------------------------------+
+| 2.16 | Aug 2021 | 2.16 - new latest stable |
++---------+--------------+--------------------------------------------------+
+| 2.17 | Feb 2022 | 2.17 - new latest stable |
++---------+--------------+--------------------------------------------------+
+| 2.18 | Aug 2022 | 2.18 - new latest stable, 2.17 stable ⟶ new LTS |
++---------+--------------+--------------------------------------------------+
+| 2.19 | Feb 2023 | 2.19 - new latest stable, 2.13 LTS ⟶ EOL |
++---------+--------------+--------------------------------------------------+
+
+While branches other than LTS and the latest release are not formally
+maintained, the OVS project usually provides stable releases for these branches
+for at least 2 years, i.e. stable releases are provided for the last 4
+release branches. However, these branches may not include all the fixes that
+LTS has in case backporting is not straightforward and developers are not
+willing to spend their time on that (this mostly affects branches that are
+older than the LTS, because backporting to LTS implies backporting to all
+intermediate branches).
Release Numbering
-----------------
@@ -122,6 +154,63 @@ approximate:
| T + 5.5 | Aug 15, Feb 15 | Release version x.y.0 |
+---------------+----------------+--------------------------------------+
+How to Branch
+-------------
+
+To branch "master" for the eventual release of OVS version x.y.0,
+prepare two patches against master:
+
+1. "Prepare for x.y.0." following the model of commit 836d1973c56e
+ ("Prepare for 2.11.0.").
+
+2. "Prepare for post-x.y.0 (x.y.90)." following the model of commit
+ fe2870c574db ("Prepare for post-2.11.0 (2.11.90).")
+
+Post both patches to ovs-dev. Get them reviewed in the usual way.
+
+Apply both patches to master, and create branch-x.y by pushing only
+the first patch. The following command illustrates how to do both of
+these at once assuming the local repository HEAD points to the
+"Prepare for post-x.y.0" commit:
+
+ git push origin HEAD:master HEAD^:refs/heads/branch-x.y
+
+Branching should be announced on ovs-dev.
+
+How to Release
+--------------
+
+Follow these steps to release version x.y.z of OVS from branch-x.y.
+
+1. Prepare two patches against branch-x.y:
+
+ a. "Set release date for x.y.z". For z = 0, follow the model of
+ commit d11f4cbbfe05 ("Set release date for 2.12.0."); for z > 0,
+ follow the model of commit 53d5c18118b0 ("Set release date for
+ 2.11.3.").
+
+ b. "Prepare for x.y.(z+1)." following the model of commit
+ db02dd23e48a ("Prepare for 2.11.1.").
+
+3. Post the patches to ovs-dev. Get them reviewed in the usual way.
+
+4. Apply the patches to branch-x.y.
+
+5. If z = 0, apply the first patch (only) to master.
+
+6. Sign a tag vx.y.z "Open vSwitch version x.y.z" and push it to the
+ repo.
+
+7. Update http://www.openvswitch.org/download/. See commit
+ 31eaa72cafac ("Add 2.12.0 and older release announcements.") in the
+ website repo (https://github.com/openvswitch/openvswitch.github.io)
+ for an example.
+
+8. Consider updating the Wikipedia page for Open vSwitch at
+ https://en.wikipedia.org/wiki/Open_vSwitch
+
+9. Tweet.
+
Contact
-------
diff --git a/Documentation/internals/security.rst b/Documentation/internals/security.rst
index f6a31ad011164ccdcd41539d34446a75f0762bfe..8b4e5c3f4d5d7b11847342224f9c5ddb8d4bfc55 100644
--- a/Documentation/internals/security.rst
+++ b/Documentation/internals/security.rst
@@ -247,10 +247,13 @@ immediate (esp. if it's already publicly known) to a few weeks. As a basic
default policy, we expect report date to disclosure date to be 10 to 15
business days.
-Operating system vendors are obvious downstream stakeholders. It may not be
-necessary to be too choosy about who to include: any major Open vSwitch user
-who is interested and can be considered trustworthy enough could be included.
-To become a downstream stakeholder, email the ovs-security mailing list.
+Operating system vendors are obvious downstream stakeholders, however,
+any major Open vSwitch user who is interested and can be considered
+trustworthy enough could be included. To request being added to the
+Downstream mailing list, email the ovs-security mailing list. Please
+include a few sentences on how your organization uses Open vSwitch. If
+possible, please provide a security-related email alias rather than a
+direct end-user address.
If the vulnerability is already public, skip this step.
diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst
index c4685fa7ebacaa8ea4179226036b19f475613d41..aad0aeebea6c8b221f38e897cef6575c2997697c 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -108,6 +108,14 @@ vSwitch with AF_XDP will require the following:
* CONFIG_XDP_SOCKETS_DIAG=y (Debugging)
+- If you're building your own kernel, be sure that you're installing kernel
+ headers too. For example, with the following command::
+
+ make headers_install INSTALL_HDR_PATH=/usr
+
+- If you're using kernel from the distribution, be sure that corresponding
+ kernel headers package installed.
+
- Once your AF_XDP-enabled kernel is ready, if possible, run
**./xdpsock -r -N -z -i ** under linux/samples/bpf.
This is an OVS independent benchmark tools for AF_XDP.
@@ -138,11 +146,20 @@ Make sure the libbpf.so is installed correctly::
ldconfig
ldconfig -p | grep libbpf
+.. note::
+ Check /etc/ld.so.conf if libbpf is installed but can not be found by
+ ldconfig.
+
Third, ensure the standard OVS requirements are installed and
bootstrap/configure the package::
./boot.sh && ./configure --enable-afxdp
+.. note::
+ If you encounter "WARNING: bpf/libbpf.h: present but cannot be compiled",
+ check the Linux headers are in line with libbpf. For example, in Ubuntu,
+ check the installed linux-headers* and linux-libc-dev* dpkg.
+
Finally, build and install OVS::
make && make install
@@ -379,7 +396,7 @@ PVP using vhostuser device
--------------------------
First, build OVS with DPDK and AFXDP::
- ./configure --enable-afxdp --with-dpdk=
+ ./configure --enable-afxdp --with-dpdk=shared|static
make -j4 && make install
Create a vhost-user port from OVS::
diff --git a/Documentation/intro/install/distributions.rst b/Documentation/intro/install/distributions.rst
index 54362c0a495e07703b2dd7188fb1272a3c3ad7a0..b68a764d10a26d6dbd36d93ff2afd41cec284b94 100644
--- a/Documentation/intro/install/distributions.rst
+++ b/Documentation/intro/install/distributions.rst
@@ -44,10 +44,13 @@ that includes the core userspace components of the switch.
2. For kernel datapath, ``openvswitch-datapath-dkms`` can be installed to
automatically build and install Open vSwitch kernel module for your running
-kernel.
+kernel. This package is only available when the .deb packages are built from
+the Open vSwitch repository; it is not downstream in Debian or Ubuntu releases.
3. For fast userspace switching, Open vSwitch with DPDK support is
-bundled in the package ``openvswitch-switch-dpdk``.
+bundled in the package ``openvswitch-switch-dpdk``. This package is only
+available in the Ubuntu distribution; it is not upstream in the Open vSwitch
+repository or downstream in Debian.
Fedora
------
diff --git a/Documentation/intro/install/dpdk.rst b/Documentation/intro/install/dpdk.rst
index dbf88ec43f00f21aaade9272d14977b0900790f2..a595417cecdb96ea0bd44459391b6bc4e1522c45 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -42,7 +42,7 @@ Build requirements
In addition to the requirements described in :doc:`general`, building Open
vSwitch with DPDK will require the following:
-- DPDK 19.11
+- DPDK 20.11
- A `DPDK supported NIC`_
@@ -59,8 +59,10 @@ vSwitch with DPDK will require the following:
Detailed system requirements can be found at `DPDK requirements`_.
-.. _DPDK supported NIC: http://dpdk.org/doc/nics
-.. _DPDK requirements: http://dpdk.org/doc/guides/linux_gsg/sys_reqs.html
+.. _DPDK supported NIC: https://doc.dpdk.org/guides-20.11/nics/index.html
+.. _DPDK requirements: https://doc.dpdk.org/guides-20.11/linux_gsg/sys_reqs.html
+
+.. _dpdk-install:
Installing
----------
@@ -71,38 +73,44 @@ Install DPDK
#. Download the `DPDK sources`_, extract the file and set ``DPDK_DIR``::
$ cd /usr/src/
- $ wget https://fast.dpdk.org/rel/dpdk-19.11.tar.xz
- $ tar xf dpdk-19.11.tar.xz
- $ export DPDK_DIR=/usr/src/dpdk-19.11
+ $ wget https://fast.dpdk.org/rel/dpdk-20.11.tar.xz
+ $ tar xf dpdk-20.11.tar.xz
+ $ export DPDK_DIR=/usr/src/dpdk-20.11
$ cd $DPDK_DIR
-#. (Optional) Configure DPDK as a shared library
+#. Configure and install DPDK using Meson
- DPDK can be built as either a static library or a shared library. By
- default, it is configured for the former. If you wish to use the latter, set
- ``CONFIG_RTE_BUILD_SHARED_LIB=y`` in ``$DPDK_DIR/config/common_base``.
+ Build and install the DPDK library::
- .. note::
+ $ export DPDK_BUILD=$DPDK_DIR/build
+ $ meson build
+ $ ninja -C build
+ $ sudo ninja -C build install
+ $ sudo ldconfig
- Minor performance loss is expected when using OVS with a shared DPDK
- library compared to a static DPDK library.
+ Detailed information can be found at `DPDK documentation`_.
-#. Configure and install DPDK
+#. (Optional) Configure and export the DPDK shared library location
- Build and install the DPDK library::
+ Since DPDK is built both as static and shared library by default, no extra
+ configuration is required for the build.
- $ export DPDK_TARGET=x86_64-native-linuxapp-gcc
- $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
- $ make install T=$DPDK_TARGET DESTDIR=install
+ Exporting the path to library is not necessary if the DPDK libraries are
+ system installed. For libraries installed using a prefix, export the path
+ to this library and also update the $PKG_CONFIG_PATH for use
+ before building OVS::
-#. (Optional) Export the DPDK shared library location
+ $ export LD_LIBRARY_PATH=/path/to/installed/DPDK/libraries
+ $ export PKG_CONFIG_PATH=/path/to/installed/".pc" file/for/DPDK
- If DPDK was built as a shared library, export the path to this library for
- use when building OVS::
+ .. note::
- $ export LD_LIBRARY_PATH=$DPDK_DIR/x86_64-native-linuxapp-gcc/lib
+ Minor performance loss is expected when using OVS with a shared DPDK
+ library compared to a static DPDK library.
.. _DPDK sources: http://dpdk.org/rel
+.. _DPDK documentation:
+ https://doc.dpdk.org/guides-20.11/linux_gsg/build_dpdk.html
Install OVS
~~~~~~~~~~~
@@ -121,21 +129,31 @@ has to be configured to build against the DPDK library (``--with-dpdk``).
#. Bootstrap, if required, as described in :ref:`general-bootstrapping`
-#. Configure the package using the ``--with-dpdk`` flag::
+#. Configure the package using the ``--with-dpdk`` flag:
- $ ./configure --with-dpdk=$DPDK_BUILD
+ If OVS must consume DPDK static libraries
+ (also equivalent to ``--with-dpdk=yes`` )::
- where ``DPDK_BUILD`` is the path to the built DPDK library. This can be
- skipped if DPDK library is installed in its default location.
+ $ ./configure --with-dpdk=static
- If no path is provided to ``--with-dpdk``, but a pkg-config configuration
- for libdpdk is available the include paths will be generated via an
- equivalent ``pkg-config --cflags libdpdk``.
+ If OVS must consume DPDK shared libraries::
+
+ $ ./configure --with-dpdk=shared
.. note::
While ``--with-dpdk`` is required, you can pass any other configuration
option described in :ref:`general-configuring`.
+ It is strongly recommended to build OVS with at least ``-msse4.2`` and
+ ``-mpopcnt`` optimization flags. If these flags are not enabled, the AVX512
+ optimized DPCLS implementation is not available in the resulting binary.
+ For technical details see the subtable registration code in the
+ ``lib/dpif-netdev-lookup.c`` file.
+
+ An example that enables the AVX512 optimizations is::
+
+ $ ./configure --with-dpdk=$DPDK_BUILD CFLAGS="-Ofast -msse4.2 -mpopcnt"
+
#. Build and install OVS, as described in :ref:`general-building`
Additional information can be found in :doc:`general`.
@@ -147,6 +165,26 @@ Additional information can be found in :doc:`general`.
__ https://github.com/openvswitch/ovs/blob/master/rhel/README.RHEL.rst
+
+Possible issues when enabling AVX512
+++++++++++++++++++++++++++++++++++++
+
+The enabling of ISA optimized builds requires build-system support.
+Certain versions of the assembler provided by binutils is known to have
+AVX512 assembling issues. The binutils versions affected are 2.30 and 2.31.
+As many distros backport fixes to previous versions of a package, checking
+the version output of ``as -v`` can err on the side of disabling AVX512. To
+remedy this, the OVS build system uses a build-time check to see if ``as``
+will correctly assemble the AVX512 code. The output of a good version when
+running the ``./configure`` step of the build process is as follows::
+
+ $ checking binutils avx512 assembler checks passing... yes
+
+If a bug is detected in the binutils assembler, it would indicate ``no``.
+Build an updated binutils, or request a backport of this binutils
+fix commit ``2069ccaf8dc28ea699bd901fdd35d90613e4402a`` to fix the issue.
+
+
Setup
-----
@@ -673,7 +711,7 @@ Limitations
release notes`_.
.. _DPDK release notes:
- https://doc.dpdk.org/guides-19.11/rel_notes/release_19_11.html
+ https://doc.dpdk.org/guides-20.11/rel_notes/release_20_11.html
- Upper bound MTU: DPDK device drivers differ in how the L2 frame for a
given MTU value is calculated e.g. i40e driver includes 2 x vlan headers in
@@ -687,6 +725,15 @@ Limitations
around is temporary and is expected to be removed once a method is provided
by DPDK to query the upper bound MTU value for a given device.
+- Flow Control: When using i40e devices (Intel(R) 700 Series) it is recommended
+ to set Link State Change detection to interrupt mode. Otherwise it has been
+ observed that using the default polling mode, flow control changes may not be
+ applied, and flow control states will not be reflected correctly.
+ The issue is under investigation, this is a temporary work around.
+
+ For information about setting Link State Change detection, refer to
+ :ref:`lsc-detection`.
+
Reporting Bugs
--------------
diff --git a/Documentation/intro/install/fedora.rst b/Documentation/intro/install/fedora.rst
index 6fe1fb5b24684a027907132d969fd6d93411fcc7..4a2f3507cbbbe2f70573abde4fcee43eb6642d92 100644
--- a/Documentation/intro/install/fedora.rst
+++ b/Documentation/intro/install/fedora.rst
@@ -69,6 +69,11 @@ repositories to help yum-builddep, e.g.::
$ subscription-manager repos --enable=rhel-7-server-extras-rpms
$ subscription-manager repos --enable=rhel-7-server-optional-rpms
+or for RHEL 8::
+
+ $ subscription-manager repos \
+ --enable=codeready-builder-for-rhel-8-x86_64-rpms
+
DNF::
$ dnf builddep /tmp/ovs.spec
diff --git a/Documentation/intro/install/general.rst b/Documentation/intro/install/general.rst
index 09f2c13f165d860c45ba62ab7db2bae5fb47a052..c4300cd53e0de21d20be71a79e4b0db1793e818d 100644
--- a/Documentation/intro/install/general.rst
+++ b/Documentation/intro/install/general.rst
@@ -97,7 +97,9 @@ need the following software:
specifying OpenFlow and OVSDB remotes. If unbound library is already
installed, then Open vSwitch will automatically build with support for it.
The environment variable OVS_RESOLV_CONF can be used to specify DNS server
- configuration file (the default file on Linux is /etc/resolv.conf).
+ configuration file (the default file on Linux is /etc/resolv.conf), and
+ environment variable OVS_UNBOUND_CONF can be used to specify the
+ configuration file for unbound.
On Linux, you may choose to compile the kernel module that comes with the Open
vSwitch distribution or to use the kernel module built into the Linux kernel
diff --git a/Documentation/intro/install/rhel.rst b/Documentation/intro/install/rhel.rst
index 31f0eec3a4bd1b741714b7018b9a11468314c682..b21b274b716afb06a63fb7f81c653b7cc0ab4bcf 100644
--- a/Documentation/intro/install/rhel.rst
+++ b/Documentation/intro/install/rhel.rst
@@ -201,7 +201,7 @@ On RHEL 6, to build the Open vSwitch kernel module run::
$ rpmbuild -bb rhel/kmod-openvswitch-rhel6.spec
-You might have to specify a kernel version and/or variants, e.g.:
+You might have to specify a kernel version and/or variants, e.g.::
$ rpmbuild -bb \
-D "kversion 2.6.32-131.6.1.el6.x86_64" \
diff --git a/Documentation/intro/install/windows.rst b/Documentation/intro/install/windows.rst
index 394572f001e9544eda560d628395830c56be378a..79d4c62612bcdaf10030b873ce6b6bcccd6501c3 100644
--- a/Documentation/intro/install/windows.rst
+++ b/Documentation/intro/install/windows.rst
@@ -71,7 +71,13 @@ The following explains the steps in some detail.
You will need at least Visual Studio 2013 (update 4) to compile userspace
binaries. In addition to that, if you want to compile the kernel module you
- will also need to install Windows Driver Kit (WDK) 8.1 Update.
+ will also need to install Windows Driver Kit (WDK) 8.1 Update or later.
+ To generate the Windows installer you need
+ `WiX Toolset `__ and also be able to build the
+ kernel module.
+
+ We recommend using the latest Visual Studio version together with the latest
+ WDK installed.
It is important to get the Visual Studio related environment variables and to
have the $PATH inside the bash to point to the proper compiler and linker.
@@ -92,13 +98,13 @@ The following explains the steps in some detail.
Visual studio's linker is used. You should also see a 'which sort' report
``/bin/sort.exe``.
-- pthreads-win32
+- PThreads4W
- For pthread support, install the library, dll and includes of pthreads-win32
+ For pthread support, install the library, dll and includes of PThreads4W
project from `sourceware
- `__ to a
- directory (e.g.: ``C:/pthread``). You should add the pthread-win32's dll path
- (e.g.: ``C:\pthread\dll\x86``) to the Windows' PATH environment variable.
+ `__ to a directory
+ (e.g.: ``C:/pthread``). You should add the PThreads4W's dll path
+ (e.g.: ``C:\pthread\bin``) to the Windows' PATH environment variable.
- OpenSSL
@@ -319,6 +325,22 @@ An alternative way to do the same is to run the following command:
seconds has been observed for the change to be reflected in the UI. This is
not a bug in Open vSwitch.
+Generate the Windows installer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To generate the Windows installler run the following command from the top
+source directory:
+
+::
+
+ $ make windows_installer
+
+.. note::
+
+ This will generate the Windows installer in the following location (relative
+ to the top source directory):
+ windows/ovs-windows-installer/bin/Release/OpenvSwitch.msi
+
Starting
--------
@@ -785,10 +807,10 @@ Windows CI Service
------------------
`AppVeyor `__ provides a free Windows autobuild service for
-open source projects. Open vSwitch has integration with AppVeyor for continuous
-build. A developer can build test his changes for Windows by logging into
-appveyor.com using a github account, creating a new project by linking it to
-his development repository in github and triggering a new build.
+open source projects. Open vSwitch has integration with AppVeyor for
+continuous build. A developer can build test his changes for Windows by
+logging into appveyor.com using a github account, creating a new project by
+linking it to his development repository in github and triggering a new build.
TODO
----
@@ -797,5 +819,4 @@ TODO
* Investigate and add the feature to provide QoS.
-* Sign the driver & create an MSI for installing the different Open vSwitch
- components on Windows.
+* Sign the driver.
diff --git a/Documentation/ref/ovsdb-server.7.rst b/Documentation/ref/ovsdb-server.7.rst
index 967761bdfb8451ca672adcd7c1dd47b9d7f0990a..04414350a7755d58907a0fa81373b09728cb0157 100644
--- a/Documentation/ref/ovsdb-server.7.rst
+++ b/Documentation/ref/ovsdb-server.7.rst
@@ -316,9 +316,9 @@ monitor request, will contain any matched rows by old
condition and not matched by the new condition.
Changes according to the new conditions are automatically sent to the client
-using the ``update2`` monitor notification. An update, if any, as a result of
-a condition change, will be sent to the client before the reply to the
-``monitor_cond_change`` request.
+using the ``update2`` or ``update3`` monitor notification depending on the
+monitor method. An update, if any, as a result of a condition change, will
+be sent to the client before the reply to the ``monitor_cond_change`` request.
4.1.14 Update2 notification
---------------------------
diff --git a/Documentation/ref/ovsdb.7.rst b/Documentation/ref/ovsdb.7.rst
index b1f3f5d494c321c46a58b19bfca034aabdc1f06b..da4dbedd22c15d13b1b26910914c4f714234de51 100644
--- a/Documentation/ref/ovsdb.7.rst
+++ b/Documentation/ref/ovsdb.7.rst
@@ -422,7 +422,8 @@ punix:
named .
On Windows, listens on a local named pipe, creating a named pipe
- to mimic the behavior of a Unix domain socket.
+ to mimic the behavior of a Unix domain socket. The ACLs of the named
+ pipe include LocalSystem, Administrators, and Creator Owner.
All IP-based connection methods accept IPv4 and IPv6 addresses. To specify an
IPv6 address, wrap it in square brackets, e.g. ``ssl:[::1]:6640``. Passive
diff --git a/Documentation/topics/bonding.rst b/Documentation/topics/bonding.rst
index ac39fd18bfacc08973d776d1d0ca8192e93a05fa..01bd5dfc2c591a46552d537bdb7bb960f8466d70 100644
--- a/Documentation/topics/bonding.rst
+++ b/Documentation/topics/bonding.rst
@@ -25,22 +25,22 @@
Bonding
=======
-Bonding allows two or more interfaces (the "slaves") to share network traffic.
+Bonding allows two or more interfaces, its "members", to share network traffic.
From a high-level point of view, bonded interfaces act like a single port, but
they have the bandwidth of multiple network devices, e.g. two 1 GB physical
interfaces act like a single 2 GB interface. Bonds also increase robustness:
-the bonded port does not go down as long as at least one of its slaves is up.
+the bonded port does not go down as long as at least one of its members is up.
-In vswitchd, a bond always has at least two slaves (and may have more). If a
-configuration error, etc. would cause a bond to have only one slave, the port
+In vswitchd, a bond always has at least two members (and may have more). If a
+configuration error, etc. would cause a bond to have only one member, the port
becomes an ordinary port, not a bonded port, and none of the special features
of bonded ports described in this section apply.
There are many forms of bonding of which ovs-vswitchd implements only a few.
The most complex bond ovs-vswitchd implements is called "source load balancing"
-or SLB bonding. SLB bonding divides traffic among the slaves based on the
-Ethernet source address. This is useful only if the traffic over the bond has
-multiple Ethernet source addresses, for example if network traffic from
+or SLB bonding. SLB bonding divides traffic among the members based on
+the Ethernet source address. This is useful only if the traffic over the bond
+has multiple Ethernet source addresses, for example if network traffic from
multiple VMs are multiplexed over the bond.
.. note::
@@ -50,89 +50,90 @@ multiple VMs are multiplexed over the bond.
specified.
-Enabling and Disabling Slaves
------------------------------
+Enabling and Disabling Members
+------------------------------
-When a bond is created, a slave is initially enabled or disabled based on
-whether carrier is detected on the NIC (see ``iface_create()``). After that, a
-slave is disabled if its carrier goes down for a period of time longer than the
-downdelay, and it is enabled if carrier comes up for longer than the updelay
-(see ``bond_link_status_update()``). There is one exception where the updelay
-is skipped: if no slaves at all are currently enabled, then the first slave on
-which carrier comes up is enabled immediately.
+When a bond is created, a member is initially enabled or disabled based
+on whether carrier is detected on the NIC (see ``iface_create()``). After
+that, a member is disabled if its carrier goes down for a period of time
+longer than the downdelay, and it is enabled if carrier comes up for longer
+than the updelay (see ``bond_link_status_update()``). There is one exception
+where the updelay is skipped: if no members at all are currently
+enabled, then the first member on which carrier comes up is enabled
+immediately.
The updelay should be set to a time longer than the STP forwarding delay of the
physical switch to which the bond port is connected (if STP is enabled on that
-switch). Otherwise, the slave will be enabled, and load may be shifted to it,
-before the physical switch starts forwarding packets on that port, which can
-cause some data to be "blackholed" for a time. The exception for a single
-enabled slave does not cause any problem in this regard because when no slaves
-are enabled all output packets are blackholed anyway.
-
-When a slave becomes disabled, the vswitch immediately chooses a new output
-port for traffic that was destined for that slave (see
-``bond_enable_slave()``). It also sends a "gratuitous learning packet",
-specifically a RARP, on the bond port (on the newly chosen slave) for each MAC
-address that the vswitch has learned on a port other than the bond (see
-``bundle_send_learning_packets()``), to teach the physical switch that the new
-slave should be used in place of the one that is now disabled. (This behavior
-probably makes sense only for a vswitch that has only one port (the bond)
-connected to a physical switch; vswitchd should probably provide a way to
-disable or configure it in other scenarios.)
+switch). Otherwise, the member will be enabled, and load may be shifted
+to it, before the physical switch starts forwarding packets on that port, which
+can cause some data to be dropped for a time. The exception for a single
+enabled member does not cause any problem in this regard because when no
+members are enabled all output packets are dropped anyway.
+
+When a member becomes disabled, the vswitch immediately chooses a new
+output port for traffic that was destined for that member (see
+``bond_enable_member()``). It also sends a "gratuitous learning packet",
+specifically a RARP, on the bond port (on the newly chosen member) for
+each MAC address that the vswitch has learned on a port other than the bond
+(see ``bundle_send_learning_packets()``), to teach the physical switch that the
+new member should be used in place of the one that is now disabled.
+(This behavior probably makes sense only for a vswitch that has only one port
+(the bond) connected to a physical switch; vswitchd should probably provide a
+way to disable or configure it in other scenarios.)
Bond Packet Input
-----------------
-Bonding accepts unicast packets on any bond slave. This can occasionally cause
-packet duplication for the first few packets sent to a given MAC, if the
+Bonding accepts unicast packets on any member. This can occasionally
+cause packet duplication for the first few packets sent to a given MAC, if the
physical switch attached to the bond is flooding packets to that MAC because it
-has not yet learned the correct slave for that MAC.
+has not yet learned the correct member for that MAC.
-Bonding only accepts multicast (and broadcast) packets on a single bond slave
-(the "active slave") at any given time. Multicast packets received on other
-slaves are dropped. Otherwise, every multicast packet would be duplicated,
-once for every bond slave, because the physical switch attached to the bond
-will flood those packets.
+Bonding only accepts multicast (and broadcast) packets on a single bond
+member (the "active member") at any given time. Multicast
+packets received on other members are dropped. Otherwise, every
+multicast packet would be duplicated, once for every bond member,
+because the physical switch attached to the bond will flood those packets.
Bonding also drops received packets when the vswitch has learned that the
packet's MAC is on a port other than the bond port itself. This is because it
is likely that the vswitch itself sent the packet out the bond port on a
-different slave and is now receiving the packet back. This occurs when the
-packet is multicast or the physical switch has not yet learned the MAC and is
-flooding it. However, the vswitch makes an exception to this rule for
+different member and is now receiving the packet back. This occurs when
+the packet is multicast or the physical switch has not yet learned the MAC and
+is flooding it. However, the vswitch makes an exception to this rule for
broadcast ARP replies, which indicate that the MAC has moved to another switch,
probably due to VM migration. (ARP replies are normally unicast, so this
exception does not match normal ARP replies. It will match the learning
packets sent on bond fail-over.)
-The active slave is simply the first slave to be enabled after the bond is
-created (see ``bond_choose_active_slave()``). If the active slave is disabled,
-then a new active slave is chosen among the slaves that remain active.
-Currently due to the way that configuration works, this tends to be the
-remaining slave whose interface name is first alphabetically, but this is by no
-means guaranteed.
+The active member is simply the first member to be enabled after
+the bond is created (see ``bond_choose_active_member()``). If the active
+member is disabled, then a new active member is chosen among the
+members that remain active. Currently due to the way that configuration
+works, this tends to be the remaining member whose interface name is
+first alphabetically, but this is by no means guaranteed.
Bond Packet Output
------------------
-When a packet is sent out a bond port, the bond slave actually used is selected
-based on the packet's source MAC and VLAN tag (see ``bond_choose_output_slave()``).
-In particular, the source MAC and VLAN tag are hashed into one of 256 values,
-and that value is looked up in a hash table (the "bond hash") kept in the
-``bond_hash`` member of struct port. The hash table entry identifies a bond
-slave. If no bond slave has yet been chosen for that hash table entry,
-vswitchd chooses one arbitrarily.
-
-Every 10 seconds, vswitchd rebalances the bond slaves (see
-``bond_rebalance()``). To rebalance, vswitchd examines the statistics for
-the number of bytes transmitted by each slave over approximately the past
+When a packet is sent out a bond port, the bond member actually used is
+selected based on the packet's source MAC and VLAN tag (see
+``bond_choose_output_member()``). In particular, the source MAC and VLAN tag
+are hashed into one of 256 values, and that value is looked up in a hash table
+(the "bond hash") kept in the ``bond_hash`` member of struct port. The hash
+table entry identifies a bond member. If no bond member has yet been chosen
+for that hash table entry, vswitchd chooses one arbitrarily.
+
+Every 10 seconds, vswitchd rebalances the bond members (see
+``bond_rebalance()``). To rebalance, vswitchd examines the statistics for the
+number of bytes transmitted by each member over approximately the past
minute, with data sent more recently weighted more heavily than data sent less
-recently. It considers each of the slaves in order from most-loaded to
-least-loaded. If highly loaded slave H is significantly more heavily loaded
-than the least-loaded slave L, and slave H carries at least two hashes, then
-vswitchd shifts one of H's hashes to L. However, vswitchd will only shift a
-hash from H to L if it will decrease the ratio of the load between H and L by
-at least 0.1.
+recently. It considers each of the members in order from most-loaded to
+least-loaded. If highly loaded member H is significantly more heavily
+loaded than the least-loaded member L, and member H carries at
+least two hashes, then vswitchd shifts one of H's hashes to L. However,
+vswitchd will only shift a hash from H to L if it will decrease the ratio of
+the load between H and L by at least 0.1.
Currently, "significantly more loaded" means that H must carry at least 1 Mbps
more traffic, and that traffic must be at least 3% greater than L's.
@@ -166,11 +167,11 @@ behavior on Open vSwitch.
Active Backup Bonding
~~~~~~~~~~~~~~~~~~~~~
-Active Backup bonds send all traffic out one "active" slave until that slave
-becomes unavailable. Since they are significantly less complicated than SLB
-bonds, they are preferred when LACP is not an option. Additionally, they are
-the only bond mode which supports attaching each slave to a different upstream
-switch.
+Active Backup bonds send all traffic out one "active" member until that
+member becomes unavailable. Since they are significantly less
+complicated than SLB bonds, they are preferred when LACP is not an option.
+Additionally, they are the only bond mode which supports attaching each
+member to a different upstream switch.
SLB Bonding
~~~~~~~~~~~
@@ -195,15 +196,15 @@ SLB bonding has the following complications:
This would cause packet duplication if not handled specially.
Open vSwitch avoids packet duplication by accepting multicast and broadcast
- packets on only the active slave, and dropping multicast and broadcast
- packets on all other slaves.
+ packets on only the active member, and dropping multicast and
+ broadcast packets on all other members.
2. When Open vSwitch forwards a multicast or broadcast packet to a link in the
- SLB bond other than the active slave, the remote switch will forward it to
- all of the other links in the SLB bond, including the active slave. Without
- special handling, this would mean that Open vSwitch would forward a second
- copy of the packet to each switch port (other than the bond), including the
- port that originated the packet.
+ SLB bond other than the active member, the remote switch will forward
+ it to all of the other links in the SLB bond, including the active
+ member. Without special handling, this would mean that Open vSwitch
+ would forward a second copy of the packet to each switch port (other than
+ the bond), including the port that originated the packet.
Open vSwitch deals with this case by dropping packets received on any SLB
bonded link that have a source MAC+VLAN that has been learned on any other
@@ -226,11 +227,11 @@ SLB bonding has the following complications:
4. Suppose that a MAC+VLAN moves from an SLB bond to another port (e.g. when a
VM is migrated from a different hypervisor to this one), that the MAC+VLAN
emits a gratuitous ARP, and that Open vSwitch forwards that gratuitous ARP
- to a link in the SLB bond other than the active slave. The remote switch
- will forward the gratuitous ARP to all of the other links in the SLB bond,
- including the active slave. Without additional special handling, this would
- mean that Open vSwitch would learn that the MAC+VLAN was located on the SLB
- bond, as a consequence of rule #3.
+ to a link in the SLB bond other than the active member. The remote
+ switch will forward the gratuitous ARP to all of the other links in the SLB
+ bond, including the active member. Without additional special
+ handling, this would mean that Open vSwitch would learn that the MAC+VLAN
+ was located on the SLB bond, as a consequence of rule #3.
Open vSwitch avoids this problem by "locking" the MAC learning table entry
for a MAC+VLAN from which a gratuitous ARP was received from a non-SLB bond
diff --git a/Documentation/topics/datapath.rst b/Documentation/topics/datapath.rst
index 8585c79eb936099e6936c916c38bc3e9fe8363de..e6dcfbc199e23fb4a2819fe0bd9f01621bbcba42 100644
--- a/Documentation/topics/datapath.rst
+++ b/Documentation/topics/datapath.rst
@@ -261,5 +261,5 @@ Implement the headers and codes for compatibility with older kernel in
function should be prefixed with ``rpl_``. Otherwise, the function should be
prefixed with ``ovs_``. For special case when it is not possible to follow
this rule (e.g., the ``pskb_expand_head()`` function), the function name must
-be added to ``linux/compat/build-aux/export-check-whitelist``, otherwise, the
+be added to ``linux/compat/build-aux/export-check-allowlist``, otherwise, the
compilation check ``check-export-symbol`` will fail.
diff --git a/Documentation/topics/design.rst b/Documentation/topics/design.rst
index 22e966687bd9dd4bc929762460857afb1a0be4e8..656d60673e9857077dd4148cc3053c6db0486afd 100644
--- a/Documentation/topics/design.rst
+++ b/Documentation/topics/design.rst
@@ -70,79 +70,79 @@ that the message is suppressed.
.. table:: ``OFPT_PACKET_IN`` / ``NXT_PACKET_IN``
- =========================================== ======= =====
- master/
- message and reason code other slave
- =========================================== ======= =====
- ``OFPR_NO_MATCH`` yes ---
- ``OFPR_ACTION`` yes ---
- ``OFPR_INVALID_TTL`` --- ---
- ``OFPR_ACTION_SET`` (OF1.4+) yes ---
- ``OFPR_GROUP`` (OF1.4+) yes ---
- ``OFPR_PACKET_OUT`` (OF1.4+) yes ---
- =========================================== ======= =====
+ =========================================== ======== =========
+ primary/
+ message and reason code other secondary
+ =========================================== ======== =========
+ ``OFPR_NO_MATCH`` yes ---
+ ``OFPR_ACTION`` yes ---
+ ``OFPR_INVALID_TTL`` --- ---
+ ``OFPR_ACTION_SET`` (OF1.4+) yes ---
+ ``OFPR_GROUP`` (OF1.4+) yes ---
+ ``OFPR_PACKET_OUT`` (OF1.4+) yes ---
+ =========================================== ======== =========
.. table:: ``OFPT_FLOW_REMOVED`` / ``NXT_FLOW_REMOVED``
- =========================================== ======= =====
- master/
- message and reason code other slave
- =========================================== ======= =====
- ``OFPRR_IDLE_TIMEOUT`` yes ---
- ``OFPRR_HARD_TIMEOUT`` yes ---
- ``OFPRR_DELETE`` yes ---
- ``OFPRR_GROUP_DELETE`` (OF1.3+) yes ---
- ``OFPRR_METER_DELETE`` (OF1.4+) yes ---
- ``OFPRR_EVICTION`` (OF1.4+) yes ---
- =========================================== ======= =====
+ =========================================== ======== =========
+ primary/
+ message and reason code other secondary
+ =========================================== ======== =========
+ ``OFPRR_IDLE_TIMEOUT`` yes ---
+ ``OFPRR_HARD_TIMEOUT`` yes ---
+ ``OFPRR_DELETE`` yes ---
+ ``OFPRR_GROUP_DELETE`` (OF1.3+) yes ---
+ ``OFPRR_METER_DELETE`` (OF1.4+) yes ---
+ ``OFPRR_EVICTION`` (OF1.4+) yes ---
+ =========================================== ======== =========
.. table:: ``OFPT_PORT_STATUS``
- =========================================== ======= =====
- master/
- message and reason code other slave
- =========================================== ======= =====
- ``OFPPR_ADD`` yes yes
- ``OFPPR_DELETE`` yes yes
- ``OFPPR_MODIFY`` yes yes
- =========================================== ======= =====
-
+ =========================================== ======== =========
+ primary/
+ message and reason code other secondary
+ =========================================== ======== =========
+ ``OFPPR_ADD`` yes ---
+ ``OFPPR_DELETE`` yes ---
+ ``OFPPR_MODIFY`` yes ---
+ =========================================== ======== =========
+
.. table:: ``OFPT_ROLE_REQUEST`` / ``OFPT_ROLE_REPLY`` (OF1.4+)
-
- =========================================== ======= =====
- master/
- message and reason code other slave
- =========================================== ======= =====
- ``OFPCRR_MASTER_REQUEST`` --- ---
- ``OFPCRR_CONFIG`` --- ---
- ``OFPCRR_EXPERIMENTER`` --- ---
- =========================================== ======= =====
+
+ =========================================== ======== =========
+ primary/
+ message and reason code other secondary
+ =========================================== ======== =========
+ ``OFPCRR_PROMOTE_REQUEST`` --- ---
+ ``OFPCRR_CONFIG`` --- ---
+ ``OFPCRR_EXPERIMENTER`` --- ---
+ =========================================== ======== =========
.. table:: ``OFPT_TABLE_STATUS`` (OF1.4+)
- =========================================== ======= =====
- master/
- message and reason code other slave
- =========================================== ======= =====
- ``OFPTR_VACANCY_DOWN`` --- ---
- ``OFPTR_VACANCY_UP`` --- ---
- =========================================== ======= =====
+ =========================================== ======== =========
+ primary/
+ message and reason code other secondary
+ =========================================== ======== =========
+ ``OFPTR_VACANCY_DOWN`` --- ---
+ ``OFPTR_VACANCY_UP`` --- ---
+ =========================================== ======== =========
.. table:: ``OFPT_REQUESTFORWARD`` (OF1.4+)
- =========================================== ======= =====
- master/
- message and reason code other slave
- =========================================== ======= =====
- ``OFPRFR_GROUP_MOD`` --- ---
- ``OFPRFR_METER_MOD`` --- ---
- =========================================== ======= =====
+ =========================================== ======== =========
+ primary/
+ message and reason code other secondary
+ =========================================== ======== =========
+ ``OFPRFR_GROUP_MOD`` --- ---
+ ``OFPRFR_METER_MOD`` --- ---
+ =========================================== ======== =========
The ``NXT_SET_ASYNC_CONFIG`` message directly sets all of the values in this
table for the current connection. The ``OFPC_INVALID_TTL_TO_CONTROLLER`` bit
in the ``OFPT_SET_CONFIG`` message controls the setting for
-``OFPR_INVALID_TTL`` for the "master" role.
+``OFPR_INVALID_TTL`` for the "primary" role.
``OFPAT_ENQUEUE``
-----------------
diff --git a/Documentation/topics/dpdk/bridge.rst b/Documentation/topics/dpdk/bridge.rst
index f0ef42ecc32ca71ea2b44c402873b28b2e528e88..526d5c9590ecfc62640cdfa353ebf1b8fa025707 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -137,3 +137,80 @@ currently turned off by default.
To turn on SMC::
$ ovs-vsctl --no-wait set Open_vSwitch . other_config:smc-enable=true
+
+Datapath Classifier Performance
+-------------------------------
+
+The datapath classifier (dpcls) performs wildcard rule matching, a compute
+intensive process of matching a packet ``miniflow`` to a rule ``miniflow``. The
+code that does this compute work impacts datapath performance, and optimizing
+it can provide higher switching performance.
+
+Modern CPUs provide extensive SIMD instructions which can be used to get higher
+performance. The CPU OVS is being deployed on must be capable of running these
+SIMD instructions in order to take advantage of the performance benefits.
+In OVS v2.14 runtime CPU detection was introduced to enable identifying if
+these CPU ISA additions are available, and to allow the user to enable them.
+
+OVS provides multiple implementations of dpcls. The following command enables
+the user to check what implementations are available in a running instance ::
+
+ $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
+ Available lookup functions (priority : name)
+ 0 : autovalidator
+ 1 : generic
+ 0 : avx512_gather
+
+To set the priority of a lookup function, run the ``prio-set`` command ::
+
+ $ ovs-appctl dpif-netdev/subtable-lookup-prio-set avx512_gather 5
+ Lookup priority change affected 1 dpcls ports and 1 subtables.
+
+The highest priority lookup function is used for classification, and the output
+above indicates that one subtable of one DPCLS port is has changed its lookup
+function due to the command being run. To verify the prioritization, re-run the
+get command, note the updated priority of the ``avx512_gather`` function ::
+
+ $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
+ Available lookup functions (priority : name)
+ 0 : autovalidator
+ 1 : generic
+ 5 : avx512_gather
+
+If two lookup functions have the same priority, the first one in the list is
+chosen, and the 2nd occurance of that priority is not used. Put in logical
+terms, a subtable is chosen if its priority is greater than the previous
+best candidate.
+
+CPU ISA Testing and Validation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As multiple versions of DPCLS can co-exist, each with different CPU ISA
+optimizations, it is important to validate that they all give the exact same
+results. To easily test all DPCLS implementations, an ``autovalidator``
+implementation of the DPCLS exists. This implementation runs all other
+available DPCLS implementations, and verifies that the results are identical.
+
+Running the OVS unit tests with the autovalidator enabled ensures all
+implementations provide the same results. Note that the performance of the
+autovalidator is lower than all other implementations, as it tests the scalar
+implementation against itself, and against all other enabled DPCLS
+implementations.
+
+To adjust the DPCLS autovalidator priority, use this command ::
+
+ $ ovs-appctl dpif-netdev/subtable-lookup-prio-set autovalidator 7
+
+Running Unit Tests with Autovalidator
++++++++++++++++++++++++++++++++++++++
+
+To run the OVS unit test suite with the DPCLS autovalidator as the default
+implementation, it is required to recompile OVS. During the recompilation,
+the default priority of the `autovalidator` implementation is set to the
+maximum priority, ensuring every test will be run with every lookup
+implementation ::
+
+ $ ./configure --enable-autovalidator
+
+Compile OVS in debug mode to have `ovs_assert` statements error out if
+there is a mis-match in the DPCLS lookup implementation.
diff --git a/Documentation/topics/dpdk/index.rst b/Documentation/topics/dpdk/index.rst
index f2862ea7019a06e334a6ad98030534e892739870..a5be5e3440e276fee7f472133c769227099da86c 100644
--- a/Documentation/topics/dpdk/index.rst
+++ b/Documentation/topics/dpdk/index.rst
@@ -34,10 +34,8 @@ DPDK Support
/topics/dpdk/bridge
/topics/dpdk/phy
/topics/dpdk/vhost-user
- /topics/dpdk/ring
/topics/dpdk/vdev
/topics/dpdk/pmd
/topics/dpdk/qos
- /topics/dpdk/pdump
/topics/dpdk/jumbo-frames
/topics/dpdk/memory
diff --git a/Documentation/topics/dpdk/pdump.rst b/Documentation/topics/dpdk/pdump.rst
deleted file mode 100644
index ce03b327af9a4b3119bff17f9395c20475783ab4..0000000000000000000000000000000000000000
--- a/Documentation/topics/dpdk/pdump.rst
+++ /dev/null
@@ -1,74 +0,0 @@
-..
- Licensed under the Apache License, Version 2.0 (the "License"); you may
- not use this file except in compliance with the License. You may obtain
- a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- License for the specific language governing permissions and limitations
- under the License.
-
- Convention for heading levels in Open vSwitch documentation:
-
- ======= Heading 0 (reserved for the title in a document)
- ------- Heading 1
- ~~~~~~~ Heading 2
- +++++++ Heading 3
- ''''''' Heading 4
-
- Avoid deeper levels because they do not render well.
-
-=====
-pdump
-=====
-
-.. versionadded:: 2.6.0
-
-.. warning::
-
- DPDK pdump support is deprecated in OVS and will be removed in next
- releases.
-
-pdump allows you to listen on DPDK ports and view the traffic that is passing
-on them. To use this utility, one must have libpcap installed on the system.
-Furthermore, DPDK must be built with ``CONFIG_RTE_LIBRTE_PDUMP=y`` and
-``CONFIG_RTE_LIBRTE_PMD_PCAP=y``. OVS should be built with
-``--enable-dpdk-pdump`` configuration option.
-
-.. warning::
-
- A performance decrease is expected when using a monitoring application like
- the DPDK pdump app.
-
-To use pdump, simply launch OVS as usual, then navigate to the ``app/pdump``
-directory in DPDK, ``make`` the application and run like so::
-
- $ sudo ./build/app/dpdk-pdump -- \
- --pdump port=0,queue=0,rx-dev=/tmp/pkts.pcap
-
-The above command captures traffic received on queue 0 of port 0 and stores it
-in ``/tmp/pkts.pcap``. Other combinations of port numbers, queues numbers and
-pcap locations are of course also available to use. For example, to capture all
-packets that traverse port 0 in a single pcap file::
-
- $ sudo ./build/app/dpdk-pdump -- \
- --pdump 'port=0,queue=*,rx-dev=/tmp/pkts.pcap,tx-dev=/tmp/pkts.pcap'
-
-.. note::
-
- ``XDG_RUNTIME_DIR`` environment variable might need to be adjusted to
- OVS runtime directory (``/var/run/openvswitch`` in most cases) for
- ``dpdk-pdump`` utility if OVS started by non-root user.
-
-Many tools are available to view the contents of the pcap file. Once example is
-tcpdump. Issue the following command to view the contents of ``pkts.pcap``::
-
- $ tcpdump -r pkts.pcap
-
-More information on the pdump app and its usage can be found in the `DPDK
-documentation`__.
-
-__ http://dpdk.org/doc/guides/tools/pdump.html
diff --git a/Documentation/topics/dpdk/phy.rst b/Documentation/topics/dpdk/phy.rst
index 38e52c8deb5410a70b4ad6780ab6ed42d80957fc..986dbd38ebd312954361588f53267d51789a7ff0 100644
--- a/Documentation/topics/dpdk/phy.rst
+++ b/Documentation/topics/dpdk/phy.rst
@@ -117,7 +117,7 @@ tool::
For more information, refer to the `DPDK documentation `__.
-.. _dpdk-drivers: http://dpdk.org/doc/guides/linux_gsg/linux_drivers.html
+.. _dpdk-drivers: https://doc.dpdk.org/guides-20.11/linux_gsg/linux_drivers.html
.. _dpdk-phy-multiqueue:
@@ -218,18 +218,24 @@ If the log is not seen then the port can be detached like so::
Hotplugging with IGB_UIO
~~~~~~~~~~~~~~~~~~~~~~~~
-As of DPDK 19.11, default igb_uio hotplugging behavior changes from
+.. important::
+
+ As of DPDK v20.11 IGB_UIO has been deprecated and is no longer built as
+ part of the default DPDK library. Below is intended for those who wish
+ to use IGB_UIO outside of the standard DPDK build from v20.11 onwards.
+
+As of DPDK v19.11, default igb_uio hotplugging behavior changed from
previous DPDK versions.
-With DPDK 19.11, if no device is bound to igb_uio when OVS is launched then
-the IOVA mode may be set to virtual addressing for DPDK. This is incompatible
-for hotplugging with igb_uio.
+From DPDK v19.11 onwards, if no device is bound to igb_uio when OVS is
+launched then the IOVA mode may be set to virtual addressing for DPDK.
+This is incompatible for hotplugging with igb_uio.
To hotplug a port with igb_uio in this case, DPDK must be configured to use
physical addressing for IOVA mode. For more information regarding IOVA modes
in DPDK please refer to the `DPDK IOVA Mode Detection`__.
-__ https://doc.dpdk.org/guides-19.11/prog_guide/env_abstraction_layer.html#iova-mode-detection
+__ https://doc.dpdk.org/guides-20.11/prog_guide/env_abstraction_layer.html#iova-mode-detection
To configure OVS DPDK to use physical addressing for IOVA::
@@ -261,7 +267,7 @@ Representors are multi devices created on top of one PF.
For more information, refer to the `DPDK documentation`__.
-__ https://doc.dpdk.org/guides-19.11/prog_guide/switch_representation.html
+__ https://doc.dpdk.org/guides-20.11/prog_guide/switch_representation.html
Prior to port representors there was a one-to-one relationship between the PF
and the eth device. With port representors the relationship becomes one PF to
@@ -379,12 +385,65 @@ an eth device whose mac address is ``00:11:22:33:44:55``::
$ ovs-vsctl add-port br0 dpdk-mac -- set Interface dpdk-mac type=dpdk \
options:dpdk-devargs="class=eth,mac=00:11:22:33:44:55"
+Representor specific configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In some topologies, a VF must be configured before being assigned to a
+guest (VM) machine. This configuration is done through VF-specific fields
+in the ``options`` column of the ``Interface`` table.
+
+.. important::
+
+ Some DPDK port use `bifurcated drivers `__,
+ which means that a kernel netdevice remains when Open vSwitch is stopped.
+
+ In such case, any configuration applied to a VF would remain set on the
+ kernel netdevice, and be inherited from it when Open vSwitch is restarted,
+ even if the options described in this section are unset from Open vSwitch.
+
+.. _bifurcated-drivers: https://doc.dpdk.org/guides-20.11/linux_gsg/linux_drivers.html#bifurcated-driver
+
+- Configure the VF MAC address::
+
+ $ ovs-vsctl set Interface dpdk-rep0 options:dpdk-vf-mac=00:11:22:33:44:55
+
+The requested MAC address is assigned to the port and is listed as part of
+its options::
+
+ $ ovs-appctl dpctl/show
+ [...]
+ port 3: dpdk-rep0 (dpdk: configured_rx_queues=1, ..., dpdk-vf-mac=00:11:22:33:44:55, ...)
+
+ $ ovs-vsctl show
+ [...]
+ Port dpdk-rep0
+ Interface dpdk-rep0
+ type: dpdk
+ options: {dpdk-devargs="", dpdk-vf-mac="00:11:22:33:44:55"}
+
+ $ ovs-vsctl get Interface dpdk-rep0 status
+ {dpdk-vf-mac="00:11:22:33:44:55", ...}
+
+ $ ovs-vsctl list Interface dpdk-rep0 | grep 'mac_in_use\|options'
+ mac_in_use : "00:11:22:33:44:55"
+ options : {dpdk-devargs="", dpdk-vf-mac="00:11:22:33:44:55"}
+
+The value listed as ``dpdk-vf-mac`` is only a request from the user and is
+possibly not yet applied.
+
+When the requested configuration is successfully applied to the port,
+this MAC address is then also shown in the column ``mac_in_use`` of
+the ``Interface`` table. On failure however, ``mac_in_use`` will keep its
+previous value, which will thus differ from ``dpdk-vf-mac``.
+
Jumbo Frames
------------
DPDK physical ports can be configured to use Jumbo Frames. For more
information, refer to :doc:`jumbo-frames`.
+.. _lsc-detection:
+
Link State Change (LSC) detection configuration
-----------------------------------------------
diff --git a/Documentation/topics/dpdk/ring.rst b/Documentation/topics/dpdk/ring.rst
deleted file mode 100644
index 9d91498c71bdc863a804fccaed74d184f417aa02..0000000000000000000000000000000000000000
--- a/Documentation/topics/dpdk/ring.rst
+++ /dev/null
@@ -1,92 +0,0 @@
-..
- Licensed under the Apache License, Version 2.0 (the "License"); you may
- not use this file except in compliance with the License. You may obtain
- a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- License for the specific language governing permissions and limitations
- under the License.
-
- Convention for heading levels in Open vSwitch documentation:
-
- ======= Heading 0 (reserved for the title in a document)
- ------- Heading 1
- ~~~~~~~ Heading 2
- +++++++ Heading 3
- ''''''' Heading 4
-
- Avoid deeper levels because they do not render well.
-
-===============
-DPDK Ring Ports
-===============
-
-.. warning::
-
- DPDK ring ports are considered *deprecated*. Please migrate to
- virtio-based interfaces, e.g. :doc:`vhost-user ` ports,
- ``net_virtio_user`` :doc:`DPDK vdev `.
-
-.. warning::
-
- DPDK ring interfaces cannot be used for guest communication and are retained
- mainly for backwards compatibility purposes. In nearly all cases,
- :doc:`vhost-user ports ` are a better choice and should be used
- instead.
-
-OVS userspace switching supports ring ports implemented using DPDK's
-``librte_ring`` library. For more information on this library, refer
-to the `DPDK documentation`_.
-
-.. important::
-
- To use any DPDK-backed interface, you must ensure your bridge is configured
- correctly. For more information, refer to :doc:`bridge`.
-
-Quick Example
--------------
-
-This example demonstrates how to add a ``dpdkr`` port to an existing bridge
-called ``br0``::
-
- $ ovs-vsctl add-port br0 dpdkr0 -- set Interface dpdkr0 type=dpdkr
-
-dpdkr
------
-
-To use ring ports, you must first add said ports to the switch. Unlike
-:doc:`vhost-user ports `, ring port names must take a specific
-format, ``dpdkrNN``, where ``NN`` is the port ID. For example::
-
- $ ovs-vsctl add-port br0 dpdkr0 -- set Interface dpdkr0 type=dpdkr
-
-Once the port has been added to the switch, they can be used by host processes.
-A sample loopback application - ``test-dpdkr`` - is included with Open vSwitch.
-To use this, run the following::
-
- $ ./tests/test-dpdkr -c 1 -n 4 --proc-type=secondary -- -n 0
-
-Further functionality would require developing your own application. Refer to
-the `DPDK documentation`_ for more information on how to do this.
-
-Adding dpdkr ports to the guest
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-It is **not** recommended to use ring ports from guests. Historically, this was
-possible using a patched version of QEMU and the IVSHMEM feature provided with
-DPDK. However, this functionality was removed because:
-
-- The IVSHMEM library was removed from DPDK in DPDK 16.11
-
-- Support for IVSHMEM was never upstreamed to QEMU and has been publicly
- rejected by the QEMU community
-
-- :doc:`vhost-user interfaces ` are the de facto DPDK-based path to
- guests
-
-.. _DPDK documentation:
- https://doc.dpdk.org/guides-19.11/prog_guide/ring_lib.html
diff --git a/Documentation/topics/dpdk/vdev.rst b/Documentation/topics/dpdk/vdev.rst
index 1c0df7f4b106d1b300aeea3daca1ce64c2520ccd..7bd48165e8dbeb5ea46091f45fc37445cd87969a 100644
--- a/Documentation/topics/dpdk/vdev.rst
+++ b/Documentation/topics/dpdk/vdev.rst
@@ -63,4 +63,4 @@ run::
More information on the different types of virtual DPDK PMDs can be found in
the `DPDK documentation`__.
-__ http://dpdk.org/doc/guides/nics/overview.html
+__ https://doc.dpdk.org/guides-20.11/nics/overview.html
diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst
index c6c6fd8bdeff94b9d520c0a1294cd4cb6779ab62..bcd51e65c0e062cfe7fe1d42d580b6ddc820d348 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -25,9 +25,9 @@
DPDK vHost User Ports
=====================
-OVS userspace switching supports supports vHost user ports as a
-primary way to interact with guests. For more information on vHost
-User, refer to the `QEMU documentation`_ on same.
+OVS userspace switching supports vHost user ports as a primary way to
+interact with guests. For more information on vHost User, refer to
+the `QEMU documentation`_ on same.
.. important::
@@ -340,8 +340,8 @@ The default value is ``false``.
fixes (like userfaulfd leak) was released in 3.0.1.
DPDK Post-copy feature requires avoiding to populate the guest memory
- (application must not call mlock* syscall). So enabling mlockall and
- dequeue zero-copy features is mis-compatible with post-copy feature.
+ (application must not call mlock* syscall). So enabling mlockall is
+ incompatible with post-copy feature.
Note that during migration of vhost-user device, PMD threads hang for the
time of faulted pages download from source host. Transferring 1GB hugepage
@@ -389,23 +389,7 @@ application in the VM.
To begin, instantiate a guest as described in :ref:`dpdk-vhost-user` or
:ref:`dpdk-vhost-user-client`. Once started, connect to the VM, download the
-DPDK sources to VM and build DPDK::
-
- $ cd /root/dpdk/
- $ wget https://fast.dpdk.org/rel/dpdk-19.11.tar.xz
- $ tar xf dpdk-19.11.tar.xz
- $ export DPDK_DIR=/root/dpdk/dpdk-19.11
- $ export DPDK_TARGET=x86_64-native-linuxapp-gcc
- $ export DPDK_BUILD=$DPDK_DIR/$DPDK_TARGET
- $ cd $DPDK_DIR
- $ make install T=$DPDK_TARGET DESTDIR=install
-
-Build the test-pmd application::
-
- $ cd app/test-pmd
- $ export RTE_SDK=$DPDK_DIR
- $ export RTE_TARGET=$DPDK_TARGET
- $ make
+DPDK sources to VM and build DPDK as described in :ref:`dpdk-install`.
Setup huge pages and DPDK devices using UIO::
@@ -553,73 +537,6 @@ shown with::
$ ovs-vsctl get Interface dpdkvhostclient0 statistics:ovs_tx_retries
-vhost-user Dequeue Zero Copy (experimental)
--------------------------------------------
-
-Normally when dequeuing a packet from a vHost User device, a memcpy operation
-must be used to copy that packet from guest address space to host address
-space. This memcpy can be removed by enabling dequeue zero-copy like so::
-
- $ ovs-vsctl add-port br0 dpdkvhostuserclient0 -- set Interface \
- dpdkvhostuserclient0 type=dpdkvhostuserclient \
- options:vhost-server-path=/tmp/dpdkvhostclient0 \
- options:dq-zero-copy=true
-
-With this feature enabled, a reference (pointer) to the packet is passed to
-the host, instead of a copy of the packet. Removing this memcpy can give a
-performance improvement for some use cases, for example switching large packets
-between different VMs. However additional packet loss may be observed.
-
-Note that the feature is disabled by default and must be explicitly enabled
-by setting the ``dq-zero-copy`` option to ``true`` while specifying the
-``vhost-server-path`` option as above. If you wish to split out the command
-into multiple commands as below, ensure ``dq-zero-copy`` is set before
-``vhost-server-path``::
-
- $ ovs-vsctl set Interface dpdkvhostuserclient0 options:dq-zero-copy=true
- $ ovs-vsctl set Interface dpdkvhostuserclient0 \
- options:vhost-server-path=/tmp/dpdkvhostclient0
-
-The feature is only available to ``dpdkvhostuserclient`` port types.
-
-A limitation exists whereby if packets from a vHost port with
-``dq-zero-copy=true`` are destined for a ``dpdk`` type port, the number of tx
-descriptors (``n_txq_desc``) for that port must be reduced to a smaller number,
-128 being the recommended value. This can be achieved by issuing the following
-command::
-
- $ ovs-vsctl set Interface dpdkport options:n_txq_desc=128
-
-Note: The sum of the tx descriptors of all ``dpdk`` ports the VM will send to
-should not exceed 128. For example, in case of a bond over two physical ports
-in balance-tcp mode, one must divide 128 by the number of links in the bond.
-
-Refer to :ref:`dpdk-queues-sizes` for more information.
-
-The reason for this limitation is due to how the zero copy functionality is
-implemented. The vHost device's 'tx used vring', a virtio structure used for
-tracking used ie. sent descriptors, will only be updated when the NIC frees
-the corresponding mbuf. If we don't free the mbufs frequently enough, that
-vring will be starved and packets will no longer be processed. One way to
-ensure we don't encounter this scenario, is to configure ``n_txq_desc`` to a
-small enough number such that the 'mbuf free threshold' for the NIC will be hit
-more often and thus free mbufs more frequently. The value of 128 is suggested,
-but values of 64 and 256 have been tested and verified to work too, with
-differing performance characteristics. A value of 512 can be used too, if the
-virtio queue size in the guest is increased to 1024 (available to configure in
-QEMU versions v2.10 and greater). This value can be set like so::
-
- $ qemu-system-x86_64 ... -chardev socket,id=char1,path=,server
- -netdev type=vhost-user,id=mynet1,chardev=char1,vhostforce
- -device virtio-net-pci,mac=00:00:00:00:00:01,netdev=mynet1,
- tx_queue_size=1024
-
-Because of this limitation, this feature is considered 'experimental'.
-
-.. note::
-
- Post-copy Live Migration is not compatible with dequeue zero copy.
-
Further information can be found in the
`DPDK documentation
-`__
+`__
diff --git a/Documentation/topics/ovsdb-replication.rst b/Documentation/topics/ovsdb-replication.rst
index 950dfc9b7cde51ea3af8b6a7b9f9486d291a6680..e762f07302fa42705507be1174c6e1fdac0cde03 100644
--- a/Documentation/topics/ovsdb-replication.rst
+++ b/Documentation/topics/ovsdb-replication.rst
@@ -91,7 +91,7 @@ When sending a monitor request the standby server is doing the following:
4. For each database with the same schema in both the active and standby
servers: construct and send a monitor request message specifying the tables
that will be monitored (i.e all the tables on the database except the ones
- blacklisted [*]).
+ explicitly excluded [*]).
5. Set the standby database to the current state of the active database.
@@ -100,10 +100,9 @@ receive notifications of changes occurring to the tables specified in the
request. The process of handling this notifications is detailed in the next
section.
-[*] A set of tables that will be excluded from replication can be configure as
-a blacklist of tables via the command line option
-``--sync-exclude-tables=db:table[,db:table]...``, where db corresponds to the
-database where the table resides.
+[*] A set of tables that will be excluded from replication can be configured
+via the command line option ``--sync-exclude-tables=db:table[,db:table]...``,
+where db corresponds to the database where the table resides.
Replication Process
-------------------
diff --git a/Documentation/topics/porting.rst b/Documentation/topics/porting.rst
index b327b2b0d2ee20f56ed28c9da3b6a41bd6bf38a6..839b04d52eeacf8ec6efd5bb8f286eea93d2a403 100644
--- a/Documentation/topics/porting.rst
+++ b/Documentation/topics/porting.rst
@@ -42,8 +42,8 @@ concordance, indexed by the area of the source tree:
datapath/ vport ---
vswitchd/ iface port
ofproto/ port bundle
- ofproto/bond.c slave bond
- lib/lacp.c slave lacp
+ ofproto/bond.c member bond
+ lib/lacp.c member lacp
lib/netdev.c netdev ---
database Interface Port
diff --git a/Documentation/topics/testing.rst b/Documentation/topics/testing.rst
index 161e9d442ee67b88482cab8fdfcaaf13372d12bd..951fe9e8517e58d45509f263230f6c87f4c5f6fe 100644
--- a/Documentation/topics/testing.rst
+++ b/Documentation/topics/testing.rst
@@ -353,7 +353,7 @@ All tests are skipped if no hugepages are configured. User must look into the DP
manual to figure out how to `Configure hugepages`_.
The phy test will skip if no compatible physical device is available.
-.. _Configure hugepages: https://doc.dpdk.org/guides-19.11/linux_gsg/sys_reqs.html
+.. _Configure hugepages: https://doc.dpdk.org/guides-20.11/linux_gsg/sys_reqs.html
All the features documented under `Unit Tests`_ are available for the DPDK
datapath testsuite.
@@ -381,6 +381,17 @@ The results of the testsuite are in ``tests/system-kmod-testsuite.dir``.
All the features documented under `Unit Tests`_ are available for the kernel
datapath testsuite.
+.. note::
+ Many of the kernel tests are dependent on the utilities present in the
+ iproute2 package, especially the 'ip' command. If there are many
+ otherwise unexplained errors it may be necessary to update the iproute2
+ package utilities on the system. It is beyond the scope of this
+ documentation to explain all that is necessary to build and install
+ an updated iproute2 utilities package. The package is available from
+ the Linux kernel organization open source git repositories.
+
+ https://git.kernel.org/pub/scm/linux/kernel/git/shemminger/iproute2.git
+
.. _testing-static-analysis:
Static Code Analysis
diff --git a/Documentation/topics/userspace-tso.rst b/Documentation/topics/userspace-tso.rst
index 94eddc0b2fd0ce951af8ae8ba5c309f8e45d4c34..bd64e7ed318b2c4aa4f6da0f310518a7e51eaf48 100644
--- a/Documentation/topics/userspace-tso.rst
+++ b/Documentation/topics/userspace-tso.rst
@@ -46,14 +46,14 @@ datasheet for compatibility. Secondly, the NIC must have an associated DPDK
Poll Mode Driver (PMD) which supports `TSO`. For a list of features per PMD,
refer to the `DPDK documentation`__.
-__ https://doc.dpdk.org/guides-19.11/nics/overview.html
+__ https://doc.dpdk.org/guides-20.11/nics/overview.html
Enabling TSO
~~~~~~~~~~~~
The TSO support may be enabled via a global config value
``userspace-tso-enable``. Setting this to ``true`` enables TSO support for
-all ports.
+all ports.::
$ ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true
@@ -91,20 +91,24 @@ The current OvS userspace `TSO` implementation supports flat and VLAN networks
only (i.e. no support for `TSO` over tunneled connection [VxLAN, GRE, IPinIP,
etc.]).
+The NIC driver must support and advertise checksum offload for TCP and UDP.
+However, SCTP is not mandatory because very few drivers advertised support
+and it wasn't a widely used protocol at the moment this feature was introduced
+in Open vSwitch. Currently, if the NIC supports that, then the feature is
+enabled, otherwise TSO can still be enabled but SCTP packets sent to the NIC
+will be dropped.
+
There is no software implementation of TSO, so all ports attached to the
datapath must support TSO or packets using that feature will be dropped
on ports without TSO support. That also means guests using vhost-user
in client mode will receive TSO packet regardless of TSO being enabled
or disabled within the guest.
-When the NIC performing the segmentation is using the i40e DPDK PMD, a fix
-must be included in the DPDK build, otherwise TSO will not work. The fix can
-be found on `DPDK patchwork`__.
-
-__ https://patches.dpdk.org/patch/64136/
-
-This fix is expected to be included in the 19.11.1 release. When OVS migrates
-to this DPDK release, this limitation can be removed.
+All kernel devices that use the raw socket interface (veth, for example)
+require the kernel commit 9d2f67e43b73 ("net/packet: fix packet drop as of
+virtio gso") in order to work properly. This commit was merged in upstream
+kernel 4.19-rc7, so make sure your kernel is either newer or contains the
+backport.
~~~~~~~~~~~~~~~~~~
Performance Tuning
diff --git a/Documentation/topics/windows.rst b/Documentation/topics/windows.rst
index 3a103b4e8983b9e4480c5a560c4d9b161c9b4ce6..be6e2861e038c55ef4600b2f415df0abce9a2cc0 100644
--- a/Documentation/topics/windows.rst
+++ b/Documentation/topics/windows.rst
@@ -253,9 +253,9 @@ Netlink Message Parser
~~~~~~~~~~~~~~~~~~~~~~
The communication between OVS userspace and OVS kernel datapath is in the form
-of Netlink messages [1]_, [8]_. More details about this are provided below. In the
-kernel, a full fledged netlink message parser has been implemented along the
-lines of the netlink message parser in OVS userspace. In fact, a lot of the
+of Netlink messages [1]_, [8]_. More details about this are provided below. In
+the kernel, a full fledged netlink message parser has been implemented along
+the lines of the netlink message parser in OVS userspace. In fact, a lot of the
code is ported code.
On the lines of ``struct ofpbuf`` in OVS userspace, a managed buffer has been
diff --git a/Documentation/tutorials/faucet.rst b/Documentation/tutorials/faucet.rst
index b7bfb575bc45c61d0fe4b95dc2682704a025e44a..6aa4d39aa8ab8f0cd990d177ac5854e6c8c71c2b 100644
--- a/Documentation/tutorials/faucet.rst
+++ b/Documentation/tutorials/faucet.rst
@@ -68,9 +68,9 @@ approaches:
$ git clone https://github.com/openvswitch/ovs.git
$ cd ovs
- The default checkout is the master branch. You can check out a tag
- (such as v2.8.0) or a branch (such as origin/branch-2.8), if you
- prefer.
+ The default checkout is the master branch. You will need to use the master
+ branch for this tutorial as it includes some functionality required for this
+ tutorial.
2. If you do not already have an installed copy of Open vSwitch on your system,
or if you do not want to use it for the sandbox (the sandbox will not
@@ -80,6 +80,13 @@ approaches:
$ tutorial/ovs-sandbox
+ .. note::
+
+ The default behaviour for some of the commands used in this tutorial
+ changed in Open vSwitch versions 2.9.x and 2.10.x which breaks the
+ tutorial. We recommend following step 3 and building master from
+ source or using a system Open vSwitch that is version 2.8.x or older.
+
If it is successful, you will find yourself in a subshell environment, which
is the sandbox (you can exit with ``exit`` or Control+D). If so, you're
finished and do not need to complete the rest of the steps. If it fails,
@@ -90,6 +97,12 @@ approaches:
tutorial, there is no need to compile the Linux kernel module, or to use any
of the optional libraries such as OpenSSL, DPDK, or libcap-ng.
+ If you are using a Linux system that uses apt and have some ``deb-src``
+ repos listed in ``/etc/apt/sources.list``, often an easy way to install
+ the build dependencies for a package is to use ``build-dep``::
+
+ $ sudo apt-get build-dep openvswitch
+
4. Configure and build Open vSwitch::
$ ./boot.sh
@@ -130,7 +143,7 @@ between one and the other.
2. Build a docker container image::
- $ docker build -t faucet/faucet .
+ $ sudo docker build -t faucet/faucet -f Dockerfile.faucet .
This will take a few minutes.
@@ -147,7 +160,7 @@ between one and the other.
4. Create a container and start Faucet::
- $ docker run -d --name faucet --restart=always -v $(pwd)/inst/:/etc/faucet/ -v $(pwd)/inst/:/var/log/faucet/ -p 6653:6653 -p 9302:9302 faucet/faucet
+ $ sudo docker run -d --name faucet --restart=always -v $(pwd)/inst/:/etc/faucet/ -v $(pwd)/inst/:/var/log/faucet/ -p 6653:6653 -p 9302:9302 faucet/faucet
5. Look in ``inst/faucet.log`` to verify that Faucet started. It will
probably start with an exception and traceback because we have not
@@ -156,17 +169,17 @@ between one and the other.
6. Later on, to make a new or updated Faucet configuration take
effect quickly, you can run::
- $ docker exec faucet pkill -HUP -f faucet.faucet
+ $ sudo docker exec faucet pkill -HUP -f faucet.faucet
Another way is to stop and start the Faucet container::
- $ docker restart faucet
+ $ sudo docker restart faucet
You can also stop and delete the container; after this, to start it
again, you need to rerun the ``docker run`` command::
- $ docker stop faucet
- $ docker rm faucet
+ $ sudo docker stop faucet
+ $ sudo docker rm faucet
Overview
--------
@@ -260,17 +273,16 @@ to be 0x1.
This also sets high MAC learning and ARP timeouts. The defaults are
5 minutes and about 8 minutes, which are fine in production but
- sometimes too fast for manual experimentation. (Don't use a timeout
- bigger than about 65000 seconds because it will crash Faucet.)
+ sometimes too fast for manual experimentation.
Now restart Faucet so that the configuration takes effect, e.g.::
- $ docker restart faucet
+ $ sudo docker restart faucet
Assuming that the configuration update is successful, you should now
see a new line at the end of ``inst/faucet.log``::
- Jan 06 15:14:35 faucet INFO Add new datapath DPID 1 (0x1)
+ Sep 10 06:44:10 faucet INFO Add new datapath DPID 1 (0x1)
Faucet is now waiting for a switch with datapath ID 0x1 to connect to
it over OpenFlow, so our next step is to create a switch with OVS and
@@ -319,18 +331,24 @@ information, run ``man ovs-vswitchd.conf.db`` and search for
means that, for example, there is never a time when the controller
is set but it has not yet been configured as out-of-band.
+Faucet requires ports to be in the up state before it will configure them. In
+Open vSwitch versions earlier than 2.11.0 dummy ports started in the down state.
+You will need to force them to come up with the following ``ovs-appctl`` command
+(please skip this step if using a newer version of Open vSwitch)::
+
+ $ ovs-appctl netdev-dummy/set-admin-state up
+
Now, if you look at ``inst/faucet.log`` again, you should see that
Faucet recognized and configured the new switch and its ports::
- Jan 06 15:17:10 faucet INFO DPID 1 (0x1) connected
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Cold start configuring DP
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Configuring VLAN 100 vid:100 ports:Port 1,Port 2,Port 3
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Configuring VLAN 200 vid:200 ports:Port 4,Port 5
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 1 up, configuring
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 2 up, configuring
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 3 up, configuring
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 4 up, configuring
- Jan 06 15:17:10 faucet.valve INFO DPID 1 (0x1) Port 5 up, configuring
+ Sep 10 06:45:03 faucet.valve INFO DPID 1 (0x1) switch-1 Cold start configuring DP
+ Sep 10 06:45:03 faucet.valve INFO DPID 1 (0x1) switch-1 Configuring VLAN 100 vid:100 ports:Port 1,Port 2,Port 3
+ Sep 10 06:45:03 faucet.valve INFO DPID 1 (0x1) switch-1 Configuring VLAN 200 vid:200 ports:Port 4,Port 5
+ Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 1 (1) up
+ Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 2 (2) up
+ Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 3 (3) up
+ Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 4 (4) up
+ Sep 10 06:45:24 faucet.valve INFO DPID 1 (0x1) switch-1 Port 5 (5) up
Over on the Open vSwitch side, you can see a lot of related activity
if you take a look in ``sandbox/ovs-vswitchd.log``. For example, here
@@ -340,51 +358,48 @@ ports and capabilities::
rconn|INFO|br0<->tcp:127.0.0.1:6653: connecting...
vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_HELLO (OF1.4) (xid=0x1):
version bitmap: 0x01, 0x02, 0x03, 0x04, 0x05
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_HELLO (OF1.3) (xid=0x2f24810a):
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_HELLO (OF1.3) (xid=0xdb9dab08):
version bitmap: 0x01, 0x02, 0x03, 0x04
vconn|DBG|tcp:127.0.0.1:6653: negotiated OpenFlow version 0x04 (we support version 0x05 and earlier, peer supports version 0x04 and earlier)
rconn|INFO|br0<->tcp:127.0.0.1:6653: connected
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_ECHO_REQUEST (OF1.3) (xid=0x2f24810b): 0 bytes of payload
- vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_ECHO_REPLY (OF1.3) (xid=0x2f24810b): 0 bytes of payload
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FEATURES_REQUEST (OF1.3) (xid=0x2f24810c):
- vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_FEATURES_REPLY (OF1.3) (xid=0x2f24810c): dpid:0000000000000001
- n_tables:254, n_buffers:0
- capabilities: FLOW_STATS TABLE_STATS PORT_STATS GROUP_STATS QUEUE_STATS
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPST_PORT_DESC request (OF1.3) (xid=0x2f24810d): port=ANY
- vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPST_PORT_DESC reply (OF1.3) (xid=0x2f24810d):
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FEATURES_REQUEST (OF1.3) (xid=0xdb9dab09):
+ 00040|vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_FEATURES_REPLY (OF1.3) (xid=0xdb9dab09): dpid:0000000000000001
+ n_tables:254, n_buffers:0
+ capabilities: FLOW_STATS TABLE_STATS PORT_STATS GROUP_STATS QUEUE_STATS
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPST_PORT_DESC request (OF1.3) (xid=0xdb9dab0a): port=ANY
+ vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPST_PORT_DESC reply (OF1.3) (xid=0xdb9dab0a):
1(p1): addr:aa:55:aa:55:00:14
- config: PORT_DOWN
- state: LINK_DOWN
+ config: 0
+ state: LIVE
speed: 0 Mbps now, 0 Mbps max
2(p2): addr:aa:55:aa:55:00:15
- config: PORT_DOWN
- state: LINK_DOWN
+ config: 0
+ state: LIVE
speed: 0 Mbps now, 0 Mbps max
3(p3): addr:aa:55:aa:55:00:16
- config: PORT_DOWN
- state: LINK_DOWN
+ config: 0
+ state: LIVE
speed: 0 Mbps now, 0 Mbps max
4(p4): addr:aa:55:aa:55:00:17
- config: PORT_DOWN
- state: LINK_DOWN
+ config: 0
+ state: LIVE
speed: 0 Mbps now, 0 Mbps max
5(p5): addr:aa:55:aa:55:00:18
- config: PORT_DOWN
- state: LINK_DOWN
+ config: 0
+ state: LIVE
speed: 0 Mbps now, 0 Mbps max
- LOCAL(br0): addr:c6:64:ff:59:48:41
- config: PORT_DOWN
- state: LINK_DOWN
+ LOCAL(br0): addr:42:51:a1:c4:97:45
+ config: 0
+ state: LIVE
speed: 0 Mbps now, 0 Mbps max
After that, you can see Faucet delete all existing flows and then
start adding new ones::
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0x2f24810e): DEL table:255 priority=0 actions=drop
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_BARRIER_REQUEST (OF1.3) (xid=0x2f24810f):
- vconn|DBG|tcp:127.0.0.1:6653: sent (Success): OFPT_BARRIER_REPLY (OF1.3) (xid=0x2f24810f):
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0x2f248110): ADD priority=0 cookie:0x5adc15c0 out_port:0 actions=drop
- vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0x2f248111): ADD table:1 priority=0 cookie:0x5adc15c0 out_port:0 actions=drop
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab0f): DEL table:255 priority=0 actions=drop
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab10): ADD priority=0 cookie:0x5adc15c0 out_port:0 actions=drop
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab11): ADD table:1 priority=0 cookie:0x5adc15c0 out_port:0 actions=goto_table:2
+ vconn|DBG|tcp:127.0.0.1:6653: received: OFPT_FLOW_MOD (OF1.3) (xid=0xdb9dab12): ADD table:2 priority=0 cookie:0x5adc15c0 out_port:0 actions=goto_table:3
...
OpenFlow Layer
@@ -393,7 +408,8 @@ OpenFlow Layer
Let's take a look at the OpenFlow tables that Faucet set up. Before
we do that, it's helpful to take a look at ``docs/architecture.rst``
in the Faucet documentation to learn how Faucet structures its flow
-tables. In summary, this document says:
+tables. In summary, this document says that when all features are enabled
+our table layout will be:
Table 0
Port-based ACLs
@@ -456,38 +472,43 @@ this::
$ dump-flows br0
-First, table 0 has a flow that just jumps to table 1 for each
-configured port, and drops other unrecognized packets. Presumably it
-will do more if we configured port-based ACLs::
+To reduce resource utilisation on hardware switches, Faucet will try to install
+the minimal set of OpenFlow tables to match the features enabled in
+``faucet.yaml``. Since we have only enabled switching we will end up
+with 4 tables. If we inspect the contents of ``inst/faucet.log`` Faucet will
+tell us what each table does::
- priority=9099,in_port=p1 actions=goto_table:1
- priority=9099,in_port=p2 actions=goto_table:1
- priority=9099,in_port=p3 actions=goto_table:1
- priority=9099,in_port=p4 actions=goto_table:1
- priority=9099,in_port=p5 actions=goto_table:1
- priority=0 actions=drop
+ Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 0 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: vlan next_tables: ['eth_src'] output: True set_fields: ('vlan_vid',) size: 32 table_id: 0 vlan_port_scale: 1.5
+ Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 1 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_src', False), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: eth_dst name: eth_src next_tables: ['eth_dst', 'flood'] output: True set_fields: ('vlan_vid', 'eth_dst') size: 32 table_id: 1 vlan_port_scale: 4.1
+ Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 2 table config dec_ttl: None exact_match: True match_types: (('eth_dst', False), ('vlan_vid', False)) meter: None miss_goto: flood name: eth_dst next_tables: [] output: True set_fields: None size: 41 table_id: 2 vlan_port_scale: 4.1
+ Sep 10 06:44:10 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 3 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: flood next_tables: [] output: True set_fields: None size: 32 table_id: 3 vlan_port_scale: 2.1
-Table 1, for ingress VLAN processing, has a bunch of flows that drop
-inappropriate packets, such as LLDP and STP::
+Currently, we have:
- table=1, priority=9099,dl_dst=01:80:c2:00:00:00 actions=drop
- table=1, priority=9099,dl_dst=01:00:0c:cc:cc:cd actions=drop
- table=1, priority=9099,dl_type=0x88cc actions=drop
+Table 0 (vlan)
+ Ingress VLAN processing
+
+Table 1 (eth_src)
+ Ingress L2 processing, MAC learning
+
+Table 2 (eth_dst)
+ Egress L2 processing
-Table 1 also has some more interesting flows that recognize packets
-without a VLAN header on each of our ports
-(``vlan_tci=0x0000/0x1fff``), push on the VLAN configured for the
-port, and proceed to table 3. Presumably these skip table 2 because
-we did not configure any VLAN-based ACLs. There is also a fallback
-flow to drop other packets, which in practice means that if any
-received packet already has a VLAN header then it will be dropped::
+Table 3 (flood)
+ Flooding
- table=1, priority=9000,in_port=p1,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:3
- table=1, priority=9000,in_port=p2,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:3
- table=1, priority=9000,in_port=p3,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:3
- table=1, priority=9000,in_port=p4,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:3
- table=1, priority=9000,in_port=p5,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:3
- table=1, priority=0 actions=drop
+In Table 0 we see flows that recognize packets without a VLAN header on each of
+our ports (``vlan_tci=0x0000/0x1fff``), push on the VLAN configured for the
+port, and proceed to table 3. There is also a fallback flow to drop other
+packets, which in practice means that if any received packet already has a
+VLAN header then it will be dropped::
+
+ priority=9000,in_port=p1,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:1
+ priority=9000,in_port=p2,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:1
+ priority=9000,in_port=p3,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4196->vlan_vid,goto_table:1
+ priority=9000,in_port=p4,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:1
+ priority=9000,in_port=p5,vlan_tci=0x0000/0x1fff actions=push_vlan:0x8100,set_field:4296->vlan_vid,goto_table:1
+ priority=0 actions=drop
.. note::
@@ -497,82 +518,54 @@ received packet already has a VLAN header then it will be dropped::
since 4196 is 0x1064, this action sets VLAN value 0x64, which in
decimal is 100.
-Table 2 isn't used because there are no VLAN-based ACLs. It just has
-a drop flow::
+Table 1 starts off with a flow that drops some inappropriate packets,
+in this case EtherType 0x9000 (Ethernet Configuration Testing Protocol),
+which should not be forwarded by a switch::
- table=2, priority=0 actions=drop
+ table=1, priority=9099,dl_type=0x9000 actions=drop
-Table 3 is used for MAC learning but the controller hasn't learned any
-MAC yet. It also drops some inappropriate packets such as those that claim
-to be from a broadcast source address (why not from all multicast source
-addresses, though?). We'll come back here later::
+Table 1 is primarily used for MAC learning but the controller hasn't learned
+any MAC addresses yet. It also drops some more inappropriate packets such as
+those that claim to be from a broadcast source address (why not from all
+multicast source addresses, though?). We'll come back here later::
- table=3, priority=9099,dl_src=ff:ff:ff:ff:ff:ff actions=drop
- table=3, priority=9001,dl_src=0e:00:00:00:00:01 actions=drop
- table=3, priority=0 actions=drop
- table=3, priority=9000 actions=CONTROLLER:96,goto_table:7
+ table=1, priority=9099,dl_src=ff:ff:ff:ff:ff:ff actions=drop
+ table=1, priority=9001,dl_src=0e:00:00:00:00:01 actions=drop
+ table=1, priority=9000,dl_vlan=100 actions=CONTROLLER:96,goto_table:2
+ table=1, priority=9000,dl_vlan=200 actions=CONTROLLER:96,goto_table:2
+ table=1, priority=0 actions=goto_table:2
-Tables 4, 5, and 6 aren't used because we haven't configured any
-routing::
+Table 2 is used to direct packets to learned MACs but Faucet hasn't
+learned any MACs yet, so it just sends all the packets along to table 3::
- table=4, priority=0 actions=drop
- table=5, priority=0 actions=drop
- table=6, priority=0 actions=drop
+ table=2, priority=0 actions=goto_table:3
-Table 7 is used to direct packets to learned MACs but Faucet hasn't
-learned any MACs yet, so it just sends all the packets along to table
-8::
+Table 3 does some more dropping of packets we don't want to forward,
+in this case STP::
- table=7, priority=0 actions=drop
- table=7, priority=9000 actions=goto_table:8
+ table=3, priority=9099,dl_dst=01:00:0c:cc:cc:cd actions=drop
+ table=3, priority=9099,dl_dst=01:80:c2:00:00:00/ff:ff:ff:ff:ff:f0 actions=drop
-Table 8 implements flooding, broadcast, and multicast. The flows for
+Table 3 implements flooding, broadcast, and multicast. The flows for
broadcast and flood are easy to understand: if the packet came in on a
given port and needs to be flooded or broadcast, output it to all the
other ports in the same VLAN::
- table=8, priority=9008,in_port=p1,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p2,output:p3
- table=8, priority=9008,in_port=p2,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p3
- table=8, priority=9008,in_port=p3,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p2
- table=8, priority=9008,in_port=p4,dl_vlan=200,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p5
- table=8, priority=9008,in_port=p5,dl_vlan=200,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p4
- table=8, priority=9000,in_port=p1,dl_vlan=100 actions=pop_vlan,output:p2,output:p3
- table=8, priority=9000,in_port=p2,dl_vlan=100 actions=pop_vlan,output:p1,output:p3
- table=8, priority=9000,in_port=p3,dl_vlan=100 actions=pop_vlan,output:p1,output:p2
- table=8, priority=9000,in_port=p4,dl_vlan=200 actions=pop_vlan,output:p5
- table=8, priority=9000,in_port=p5,dl_vlan=200 actions=pop_vlan,output:p4
-
-.. note::
-
- These flows could apparently be simpler because OpenFlow says that
- ``output:`` is ignored if ```` is the input port. That
- means that the first three flows above could apparently be collapsed
- into just::
-
- table=8, priority=9008,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p2,output:p3
-
- There might be some reason why this won't work or isn't practical,
- but that isn't obvious from looking at the flow table.
+ table=3, priority=9004,dl_vlan=100,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p1,output:p2,output:p3
+ table=3, priority=9004,dl_vlan=200,dl_dst=ff:ff:ff:ff:ff:ff actions=pop_vlan,output:p4,output:p5
+ table=3, priority=9000,dl_vlan=100 actions=pop_vlan,output:p1,output:p2,output:p3
+ table=3, priority=9000,dl_vlan=200 actions=pop_vlan,output:p4,output:p5
There are also some flows for handling some standard forms of
multicast, and a fallback drop flow::
- table=8, priority=9006,in_port=p1,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p2,output:p3
- table=8, priority=9006,in_port=p2,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p1,output:p3
- table=8, priority=9006,in_port=p3,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p1,output:p2
- table=8, priority=9006,in_port=p4,dl_vlan=200,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p5
- table=8, priority=9006,in_port=p5,dl_vlan=200,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p4
- table=8, priority=9002,in_port=p1,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p2,output:p3
- table=8, priority=9002,in_port=p2,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p3
- table=8, priority=9002,in_port=p3,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2
- table=8, priority=9004,in_port=p1,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p2,output:p3
- table=8, priority=9004,in_port=p2,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p3
- table=8, priority=9004,in_port=p3,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2
- table=8, priority=9002,in_port=p4,dl_vlan=200,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p5
- table=8, priority=9002,in_port=p5,dl_vlan=200,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4
- table=8, priority=9004,in_port=p4,dl_vlan=200,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p5
- table=8, priority=9004,in_port=p5,dl_vlan=200,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4
- table=8, priority=0 actions=drop
+ table=3, priority=9003,dl_vlan=100,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p1,output:p2,output:p3
+ table=3, priority=9003,dl_vlan=200,dl_dst=33:33:00:00:00:00/ff:ff:00:00:00:00 actions=pop_vlan,output:p4,output:p5
+ table=3, priority=9001,dl_vlan=100,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2,output:p3
+ table=3, priority=9002,dl_vlan=100,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p1,output:p2,output:p3
+ table=3, priority=9001,dl_vlan=200,dl_dst=01:80:c2:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4,output:p5
+ table=3, priority=9002,dl_vlan=200,dl_dst=01:00:5e:00:00:00/ff:ff:ff:00:00:00 actions=pop_vlan,output:p4,output:p5
+ table=3, priority=0 actions=drop
Tracing
~~~~~~~
@@ -602,25 +595,25 @@ trivial example::
bridge("br0")
-------------
- 0. in_port=1, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
- goto_table:3
- 3. priority 9000, cookie 0x5adc15c0
+ goto_table:1
+ 1. dl_vlan=100, priority 9000, cookie 0x5adc15c0
CONTROLLER:96
- goto_table:7
- 7. priority 9000, cookie 0x5adc15c0
- goto_table:8
- 8. in_port=1,dl_vlan=100, priority 9000, cookie 0x5adc15c0
+ goto_table:2
+ 2. priority 0, cookie 0x5adc15c0
+ goto_table:3
+ 3. dl_vlan=100, priority 9000, cookie 0x5adc15c0
pop_vlan
+ output:1
+ >> skipping output to input port
output:2
output:3
Final flow: unchanged
Megaflow: recirc_id=0,eth,in_port=1,vlan_tci=0x0000,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x0000
- Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,flags=1,recirc_id=1,rule_cookie=0x5adc15c0,controller_id=0,max_len=96)),pop_vlan,2,3
+ Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,dont_send=1,continuation=0,recirc_id=1,rule_cookie=0x5adc15c0,controller_id=0,max_len=96)),pop_vlan,2,3
The first line of output, beginning with ``Flow:``, just repeats our
request in a more verbose form, including the L2 fields that were
@@ -628,10 +621,10 @@ zeroed.
Each of the numbered items under ``bridge("br0")`` shows what would
happen to our hypothetical packet in the table with the given number.
-For example, we see in table 1 that the packet matches a flow that
+For example, we see in table 0 that the packet matches a flow that
push on a VLAN header, set the VLAN ID to 100, and goes on to further
-processing in table 3. In table 3, the packet gets sent to the
-controller to allow MAC learning to take place, and then table 8
+processing in table 1. In table 1, the packet gets sent to the
+controller to allow MAC learning to take place, and then table 3
floods the packet to the other ports in the same VLAN.
Summary information follows the numbered tables. The packet hasn't
@@ -662,7 +655,7 @@ here. But, take a look at ``inst/faucet.log`` now. It should now
include a line at the end that says that it learned about our MAC
00:11:11:00:00:00, like this::
- Jan 06 15:56:02 faucet.valve INFO DPID 1 (0x1) L2 learned 00:11:11:00:00:00 (L2 type 0x0000, L3 src None) on Port 1 on VLAN 100 (1 hosts total
+ Sep 10 08:16:28 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:11:11:00:00:00 (L2 type 0x0000, L3 src None, L3 dst None) Port 1 VLAN 100 (1 hosts total)
Now compare the flow tables that we saved to the current ones::
@@ -671,8 +664,8 @@ Now compare the flow tables that we saved to the current ones::
The result should look like this, showing new flows for the learned
MACs::
- +table=3 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3601 actions=goto_table:7
- +table=7 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3601 actions=pop_vlan,output:1
+ +table=1 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3605 actions=goto_table:2
+ +table=2 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3605 actions=pop_vlan,output:1
To demonstrate the usefulness of the learned MAC, try tracing (with
side effects) a packet arriving on ``p2`` (or ``p3``) and destined to
@@ -686,31 +679,29 @@ address::
bridge("br0")
-------------
- 0. in_port=2, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
- goto_table:3
- 3. priority 9000, cookie 0x5adc15c0
+ goto_table:1
+ 1. dl_vlan=100, priority 9000, cookie 0x5adc15c0
CONTROLLER:96
- goto_table:7
- 7. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0
+ goto_table:2
+ 2. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0
pop_vlan
output:1
If you check ``inst/faucet.log``, you can see that ``p2``'s MAC has
been learned too::
- Jan 06 15:58:09 faucet.valve INFO DPID 1 (0x1) L2 learned 00:22:22:00:00:00 (L2 type 0x0000, L3 src None) on Port 2 on VLAN 100 (2 hosts total)
+ Sep 10 08:17:45 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:22:22:00:00:00 (L2 type 0x0000, L3 src None, L3 dst None) Port 2 VLAN 100 (2 hosts total)
Similarly for ``diff-flows``::
$ diff-flows flows1 br0
- +table=3 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3601 actions=goto_table:7
- +table=3 priority=9098,in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00 hard_timeout=3604 actions=goto_table:7
- +table=7 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3601 actions=pop_vlan,output:1
- +table=7 priority=9099,dl_vlan=100,dl_dst=00:22:22:00:00:00 idle_timeout=3604 actions=pop_vlan,output:2
+ +table=1 priority=9098,in_port=1,dl_vlan=100,dl_src=00:11:11:00:00:00 hard_timeout=3605 actions=goto_table:2
+ +table=1 priority=9098,in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00 hard_timeout=3599 actions=goto_table:2
+ +table=2 priority=9099,dl_vlan=100,dl_dst=00:11:11:00:00:00 idle_timeout=3605 actions=pop_vlan,output:1
+ +table=2 priority=9099,dl_vlan=100,dl_dst=00:22:22:00:00:00 idle_timeout=3599 actions=pop_vlan,output:2
Then, if you re-run either of the ``ofproto/trace`` commands (with or
without ``-generate``), you can see that the packets go back and forth
@@ -721,15 +712,13 @@ without any further MAC learning, e.g.::
bridge("br0")
-------------
- 0. in_port=2, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
- goto_table:3
- 3. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0
- goto_table:7
- 7. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0
+ goto_table:1
+ 1. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0
+ goto_table:2
+ 2. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0
pop_vlan
output:1
@@ -812,15 +801,13 @@ at the most recent ``ofproto/trace`` output::
bridge("br0")
-------------
- 0. in_port=2, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=2,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
- goto_table:3
- 3. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0
- goto_table:7
- 7. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0
+ goto_table:1
+ 1. in_port=2,dl_vlan=100,dl_src=00:22:22:00:00:00, priority 9098, cookie 0x5adc15c0
+ goto_table:2
+ 2. dl_vlan=100,dl_dst=00:11:11:00:00:00, priority 9099, cookie 0x5adc15c0
pop_vlan
output:1
@@ -844,17 +831,17 @@ megaflow entry includes:
visited:
``in_port``
- In tables 0, 1, and 3.
+ In tables 0 and 1.
``vlan_tci``
- In tables 1, 3, and 7 (``vlan_tci`` includes the VLAN ID and PCP
+ In tables 0, 1, and 2 (``vlan_tci`` includes the VLAN ID and PCP
fields and``dl_vlan`` is just the VLAN ID).
``dl_src``
- In table 3
+ In table 1.
``dl_dst``
- In table 7.
+ In table 2.
* All of the fields matched by flows that had to be ruled out to
ensure that the ones that actually matched were the highest priority
@@ -865,12 +852,12 @@ The last one is important. Notice how the megaflow matches on
``dl_type`` (the Ethernet type). One reason is because of this flow
in OpenFlow table 1 (which shows up in ``dump-flows`` output)::
- table=1, priority=9099,dl_type=0x88cc actions=drop
+ table=1, priority=9099,dl_type=0x9000 actions=drop
This flow has higher priority than the flow in table 1 that actually
matched. This means that, to put it in the megaflow cache,
``ovs-vswitchd`` has to add a match on ``dl_type`` to ensure that the
-cache entry doesn't match LLDP packets (with Ethertype 0x88cc).
+cache entry doesn't match ECTP packets (with Ethertype 0x9000).
.. note::
@@ -935,59 +922,81 @@ each VLAN and define a router between them. The ``dps`` section is unchanged::
router-1:
vlans: [100, 200]
-Then we restart Faucet::
+Then we can tell Faucet to reload its configuration::
- $ docker restart faucet
-
-.. note::
-
- One should be able to tell Faucet to re-read its configuration file
- without restarting it. I sometimes saw anomalous behavior when I
- did this, although I didn't characterize it well enough to make a
- quality bug report. I found restarting the container to be
- reliable.
+ $ sudo docker exec faucet pkill -HUP -f faucet.faucet
OpenFlow Layer
~~~~~~~~~~~~~~
-Back in the OVS sandbox, let's see how the flow table has changed, with::
+Now that we have an additional feature enabled (routing) we will notice some
+additional OpenFlow tables if we check ``inst/faucet.log``::
- $ diff-flows flows1 br0
+ Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 0 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: vlan next_tables: ['eth_src'] output: True set_fields: ('vlan_vid',) size: 32 table_id: 0 vlan_port_scale: 1.5
+ Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 1 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('eth_src', False), ('eth_type', False), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: eth_dst name: eth_src next_tables: ['ipv4_fib', 'vip', 'eth_dst', 'flood'] output: True set_fields: ('vlan_vid', 'eth_dst') size: 32 table_id: 1 vlan_port_scale: 4.1
+ Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 2 table config dec_ttl: True exact_match: None match_types: (('eth_type', False), ('ipv4_dst', True), ('vlan_vid', False)) meter: None miss_goto: None name: ipv4_fib next_tables: ['vip', 'eth_dst', 'flood'] output: True set_fields: ('eth_dst', 'eth_src', 'vlan_vid') size: 32 table_id: 2 vlan_port_scale: 3.1
+ Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 3 table config dec_ttl: None exact_match: None match_types: (('arp_tpa', False), ('eth_dst', False), ('eth_type', False), ('icmpv6_type', False), ('ip_proto', False)) meter: None miss_goto: None name: vip next_tables: ['eth_dst', 'flood'] output: True set_fields: None size: 32 table_id: 3 vlan_port_scale: None
+ Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 4 table config dec_ttl: None exact_match: True match_types: (('eth_dst', False), ('vlan_vid', False)) meter: None miss_goto: flood name: eth_dst next_tables: [] output: True set_fields: None size: 41 table_id: 4 vlan_port_scale: 4.1
+ Sep 10 08:28:14 faucet.valve INFO DPID 1 (0x1) switch-1 table ID 5 table config dec_ttl: None exact_match: None match_types: (('eth_dst', True), ('in_port', False), ('vlan_vid', False)) meter: None miss_goto: None name: flood next_tables: [] output: True set_fields: None size: 32 table_id: 5 vlan_port_scale: 2.1
+
+So now we have an additional FIB and VIP table:
+
+Table 0 (vlan)
+ Ingress VLAN processing
+
+Table 1 (eth_src)
+ Ingress L2 processing, MAC learning
+
+Table 2 (ipv4_fib)
+ L3 forwarding for IPv4
-First, table 3 has new flows to direct ARP packets to table 6 (the
+Table 3 (vip)
+ Virtual IP processing, e.g. for router IP addresses implemented by Faucet
+
+Table 4 (eth_dst)
+ Egress L2 processing
+
+Table 5 (flood)
+ Flooding
+
+Back in the OVS sandbox, let's see what new flow rules have been added, with::
+
+ $ diff-flows flows1 br0 | grep +
+
+First, table 1 has new flows to direct ARP packets to table 3 (the
virtual IP processing table), presumably to handle ARP for the router
IPs. New flows also send IP packets destined to a particular Ethernet
-address to table 4 (the L3 forwarding table); we can make the educated
+address to table 2 (the L3 forwarding table); we can make the educated
guess that the Ethernet address is the one used by the Faucet router::
- +table=3 priority=9131,arp,dl_vlan=100 actions=goto_table:6
- +table=3 priority=9131,arp,dl_vlan=200 actions=goto_table:6
- +table=3 priority=9099,ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01 actions=goto_table:4
- +table=3 priority=9099,ip,dl_vlan=200,dl_dst=0e:00:00:00:00:01 actions=goto_table:4
-
-The new flows in table 4 appear to be verifying that the packets are
-indeed addressed to a network or IP address that Faucet knows how to
-route::
-
- +table=4 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.254 actions=goto_table:6
- +table=4 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.254 actions=goto_table:6
- +table=4 priority=9123,ip,dl_vlan=100,nw_dst=10.100.0.0/24 actions=goto_table:6
- +table=4 priority=9123,ip,dl_vlan=200,nw_dst=10.100.0.0/24 actions=goto_table:6
- +table=4 priority=9123,ip,dl_vlan=100,nw_dst=10.200.0.0/24 actions=goto_table:6
- +table=4 priority=9123,ip,dl_vlan=200,nw_dst=10.200.0.0/24 actions=goto_table:6
-
-Table 6 has a few different things going on. It sends ARP requests
-for the router IPs to the controller; presumably the controller will
-generate replies and send them back to the requester. It switches
-other ARP packets, either broadcasting them if they have a broadcast
+ +table=1 priority=9131,arp,dl_vlan=100 actions=goto_table:3
+ +table=1 priority=9131,arp,dl_vlan=200 actions=goto_table:3
+ +table=1 priority=9099,ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01 actions=goto_table:2
+ +table=1 priority=9099,ip,dl_vlan=200,dl_dst=0e:00:00:00:00:01 actions=goto_table:2
+
+In the new ``ipv4_fib`` table (table 2) there appear to be flows for verifying
+that the packets are indeed addressed to a network or IP address that Faucet
+knows how to route::
+
+ +table=2 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.254 actions=goto_table:3
+ +table=2 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.254 actions=goto_table:3
+ +table=2 priority=9123,ip,dl_vlan=200,nw_dst=10.100.0.0/24 actions=goto_table:3
+ +table=2 priority=9123,ip,dl_vlan=100,nw_dst=10.100.0.0/24 actions=goto_table:3
+ +table=2 priority=9123,ip,dl_vlan=200,nw_dst=10.200.0.0/24 actions=goto_table:3
+ +table=2 priority=9123,ip,dl_vlan=100,nw_dst=10.200.0.0/24 actions=goto_table:3
+
+In our new ``vip`` table (table 3) there are a few different things going on.
+It sends ARP requests for the router IPs to the controller; presumably the
+controller will generate replies and send them back to the requester.
+It switches other ARP packets, either broadcasting them if they have a broadcast
destination or attempting to unicast them otherwise. It sends all
other IP packets to the controller::
- +table=6 priority=9133,arp,arp_tpa=10.100.0.254 actions=CONTROLLER:128
- +table=6 priority=9133,arp,arp_tpa=10.200.0.254 actions=CONTROLLER:128
- +table=6 priority=9132,arp,dl_dst=ff:ff:ff:ff:ff:ff actions=goto_table:8
- +table=6 priority=9131,arp actions=goto_table:7
- +table=6 priority=9130,ip actions=CONTROLLER:128
+ +table=3 priority=9133,arp,arp_tpa=10.100.0.254 actions=CONTROLLER:128
+ +table=3 priority=9133,arp,arp_tpa=10.200.0.254 actions=CONTROLLER:128
+ +table=3 priority=9132,arp,dl_dst=ff:ff:ff:ff:ff:ff actions=goto_table:4
+ +table=3 priority=9131,arp actions=goto_table:4
+ +table=3 priority=9130,ip actions=CONTROLLER:128
Performance is clearly going to be poor if every packet that needs to
be routed has to go to the controller, but it's unlikely that's the
@@ -1039,27 +1048,27 @@ The important part of the output is where it shows that the packet was
recognized as an ARP request destined to the router gateway and
therefore sent to the controller::
- 6. arp,arp_tpa=10.100.0.254, priority 9133, cookie 0x5adc15c0
- CONTROLLER:128
+ 3. arp,arp_tpa=10.100.0.254, priority 9133, cookie 0x5adc15c0
+ CONTROLLER:128
The Faucet log shows that Faucet learned the host's MAC address,
its MAC-to-IP mapping, and responded to the ARP request::
- Jan 06 16:12:23 faucet.valve INFO DPID 1 (0x1) Adding new route 10.100.0.1/32 via 10.100.0.1 (00:01:02:03:04:05) on VLAN 100
- Jan 06 16:12:23 faucet.valve INFO DPID 1 (0x1) Responded to ARP request for 10.100.0.254 from 10.100.0.1 (00:01:02:03:04:05) on VLAN 100
- Jan 06 16:12:23 faucet.valve INFO DPID 1 (0x1) L2 learned 00:01:02:03:04:05 (L2 type 0x0806, L3 src 10.100.0.1) on Port 1 on VLAN 100 (1 hosts total)
+ Sep 10 08:52:46 faucet.valve INFO DPID 1 (0x1) switch-1 Adding new route 10.100.0.1/32 via 10.100.0.1 (00:01:02:03:04:05) on VLAN 100
+ Sep 10 08:52:46 faucet.valve INFO DPID 1 (0x1) switch-1 Resolve response to 10.100.0.254 from 00:01:02:03:04:05 (L2 type 0x0806, L3 src 10.100.0.1, L3 dst 10.100.0.254) Port 1 VLAN 100
+ Sep 10 08:52:46 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:01:02:03:04:05 (L2 type 0x0806, L3 src 10.100.0.1, L3 dst 10.100.0.254) Port 1 VLAN 100 (1 hosts total)
We can also look at the changes to the flow tables::
$ diff-flows flows2 br0
- +table=3 priority=9098,in_port=1,dl_vlan=100,dl_src=00:01:02:03:04:05 hard_timeout=3600 actions=goto_table:7
- +table=4 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:7
- +table=4 priority=9131,ip,dl_vlan=200,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:7
- +table=7 priority=9099,dl_vlan=100,dl_dst=00:01:02:03:04:05 idle_timeout=3600 actions=pop_vlan,output:1
+ +table=1 priority=9098,in_port=1,dl_vlan=100,dl_src=00:01:02:03:04:05 hard_timeout=3605 actions=goto_table:4
+ +table=2 priority=9131,ip,dl_vlan=200,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:4
+ +table=2 priority=9131,ip,dl_vlan=100,nw_dst=10.100.0.1 actions=set_field:4196->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:01:02:03:04:05->eth_dst,dec_ttl,goto_table:4
+ +table=4 priority=9099,dl_vlan=100,dl_dst=00:01:02:03:04:05 idle_timeout=3605 actions=pop_vlan,output:1
-The new flows include one in table 3 and one in table 7 for the
+The new flows include one in table 1 and one in table 4 for the
learned MAC, which have the same forms we saw before. The new flows
-in table 4 are different. They matches packets directed to 10.100.0.1
+in table 2 are different. They matches packets directed to 10.100.0.1
(in two VLANs) and forward them to the host by updating the Ethernet
source and destination addresses appropriately, decrementing the TTL,
and skipping ahead to unicast output in table 7. This means that
@@ -1083,7 +1092,7 @@ And dump the reply packet::
$ /usr/sbin/tcpdump -evvvr sandbox/p1.pcap
reading from file sandbox/p1.pcap, link-type EN10MB (Ethernet)
- 16:14:47.670727 0e:00:00:00:00:01 (oui Unknown) > 00:01:02:03:04:05 (oui Unknown), ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Reply 10.100.0.254 is-at 0e:00:00:00:00:01 (oui Unknown), length 46
+ 20:55:13.186932 0e:00:00:00:00:01 (oui Unknown) > 00:01:02:03:04:05 (oui Unknown), ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Reply 10.100.0.254 is-at 0e:00:00:00:00:01 (oui Unknown), length 46
We clearly see the ARP reply, which tells us that the Faucet router's
Ethernet address is 0e:00:00:00:00:01 (as we guessed before from the
@@ -1105,26 +1114,24 @@ this::
bridge("br0")
-------------
- 0. in_port=1, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
+ goto_table:1
+ 1. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0
+ goto_table:2
+ 2. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0
goto_table:3
- 3. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0
- goto_table:4
- 4. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0
- goto_table:6
- 6. ip, priority 9130, cookie 0x5adc15c0
+ 3. ip, priority 9130, cookie 0x5adc15c0
CONTROLLER:128
Final flow: udp,in_port=1,dl_vlan=100,dl_vlan_pcp=0,vlan_tci1=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=0
Megaflow: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_dst=10.200.0.0/25,nw_frag=no
- Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,flags=0,recirc_id=6,rule_cookie=0x5adc15c0,controller_id=0,max_len=128))
+ Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=6,rule_cookie=0x5adc15c0,controller_id=0,max_len=128))
Observe that the packet gets recognized as destined to the router, in
-table 3, and then as properly destined to the 10.200.0.0/24 network,
-in table 4. In table 6, however, it gets sent to the controller.
+table 1, and then as properly destined to the 10.200.0.0/24 network,
+in table 2. In table 3, however, it gets sent to the controller.
Presumably, this is because Faucet has not yet resolved an Ethernet
address for the destination host 10.200.0.1. It probably sent out an
ARP request. Let's take a look in the next step.
@@ -1140,13 +1147,13 @@ Let's make sure::
$ /usr/sbin/tcpdump -evvvr sandbox/p4.pcap
reading from file sandbox/p4.pcap, link-type EN10MB (Ethernet)
- 16:17:43.174006 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46
+ 20:57:31.116097 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46
and::
$ /usr/sbin/tcpdump -evvvr sandbox/p5.pcap
reading from file sandbox/p5.pcap, link-type EN10MB (Ethernet)
- 16:17:43.174268 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46
+ 20:58:04.129735 0e:00:00:00:00:01 (oui Unknown) > Broadcast, ethertype ARP (0x0806), length 60: Ethernet (len 6), IPv4 (len 4), Request who-has 10.200.0.1 tell 10.200.0.254, length 46
For good measure, let's make sure that it wasn't sent to ``p3``::
@@ -1164,37 +1171,34 @@ reply::
bridge("br0")
-------------
- 0. in_port=4, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=4,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=4,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4296->vlan_vid
+ goto_table:1
+ 1. arp,dl_vlan=200, priority 9131, cookie 0x5adc15c0
goto_table:3
- 3. arp,dl_vlan=200, priority 9131, cookie 0x5adc15c0
- goto_table:6
- 6. arp,arp_tpa=10.200.0.254, priority 9133, cookie 0x5adc15c0
+ 3. arp,arp_tpa=10.200.0.254, priority 9133, cookie 0x5adc15c0
CONTROLLER:128
Final flow: arp,in_port=4,dl_vlan=200,dl_vlan_pcp=0,vlan_tci1=0x0000,dl_src=00:10:20:30:40:50,dl_dst=0e:00:00:00:00:01,arp_spa=10.200.0.1,arp_tpa=10.200.0.254,arp_op=2,arp_sha=00:10:20:30:40:50,arp_tha=0e:00:00:00:00:01
- Megaflow: recirc_id=0,eth,arp,in_port=4,vlan_tci=0x0000/0x1fff,dl_dst=0e:00:00:00:00:01,arp_tpa=10.200.0.254
- Datapath actions: push_vlan(vid=200,pcp=0),userspace(pid=0,controller(reason=1,flags=0,recirc_id=7,rule_cookie=0x5adc15c0,controller_id=0,max_len=128))
+ Megaflow: recirc_id=0,eth,arp,in_port=4,vlan_tci=0x0000/0x1fff,arp_tpa=10.200.0.254
+ Datapath actions: push_vlan(vid=200,pcp=0),userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=7,rule_cookie=0x5adc15c0,controller_id=0,max_len=128))
It shows up in ``inst/faucet.log``::
- Jan 06 03:20:11 faucet.valve INFO DPID 1 (0x1) Adding new route 10.200.0.1/32 via 10.200.0.1 (00:10:20:30:40:50) on VLAN 200
- Jan 06 03:20:11 faucet.valve INFO DPID 1 (0x1) ARP response 10.200.0.1 (00:10:20:30:40:50) on VLAN 200
- Jan 06 03:20:11 faucet.valve INFO DPID 1 (0x1) L2 learned 00:10:20:30:40:50 (L2 type 0x0806, L3 src 10.200.0.1) on Port 4 on VLAN 200 (1 hosts total)
+ Sep 10 08:59:02 faucet.valve INFO DPID 1 (0x1) switch-1 Adding new route 10.200.0.1/32 via 10.200.0.1 (00:10:20:30:40:50) on VLAN 200
+ Sep 10 08:59:02 faucet.valve INFO DPID 1 (0x1) switch-1 Received advert for 10.200.0.1 from 00:10:20:30:40:50 (L2 type 0x0806, L3 src 10.200.0.1, L3 dst 10.200.0.254) Port 4 VLAN 200
+ Sep 10 08:59:02 faucet.valve INFO DPID 1 (0x1) switch-1 L2 learned 00:10:20:30:40:50 (L2 type 0x0806, L3 src 10.200.0.1, L3 dst 10.200.0.254) Port 4 VLAN 200 (1 hosts total)
and in the OVS flow tables::
$ diff-flows flows2 br0
- +table=3 priority=9098,in_port=4,dl_vlan=200,dl_src=00:10:20:30:40:50 hard_timeout=3601 actions=goto_table:7
+ +table=1 priority=9098,in_port=4,dl_vlan=200,dl_src=00:10:20:30:40:50 hard_timeout=3598 actions=goto_table:4
...
- +table=4 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:7
- +table=4 priority=9131,ip,dl_vlan=100,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:7
+ +table=2 priority=9131,ip,dl_vlan=200,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:4
+ +table=2 priority=9131,ip,dl_vlan=100,nw_dst=10.200.0.1 actions=set_field:4296->vlan_vid,set_field:0e:00:00:00:00:01->eth_src,set_field:00:10:20:30:40:50->eth_dst,dec_ttl,goto_table:4
...
- +table=4 priority=9123,ip,dl_vlan=100,nw_dst=10.200.0.0/24 actions=goto_table:6
- +table=7 priority=9099,dl_vlan=200,dl_dst=00:10:20:30:40:50 idle_timeout=3601 actions=pop_vlan,output:4
+ +table=4 priority=9099,dl_vlan=200,dl_dst=00:10:20:30:40:50 idle_timeout=3598 actions=pop_vlan,output:4
Step 6: IP Packet Delivery
++++++++++++++++++++++++++
@@ -1213,25 +1217,23 @@ for the original sending host to re-send the packet. We can do that
by re-running the trace::
$ ovs-appctl ofproto/trace br0 in_port=p1,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,udp,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_ttl=64 -generate
- Flow: udp,in_port=1,vlan_tci=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=0
+ Flow: udp,in_port=1,vlan_tci=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=0
bridge("br0")
-------------
- 0. in_port=1, priority 9099, cookie 0x5adc15c0
- goto_table:1
- 1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
+ 0. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
- goto_table:3
- 3. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0
- goto_table:4
- 4. ip,dl_vlan=100,nw_dst=10.200.0.1, priority 9131, cookie 0x5adc15c0
+ goto_table:1
+ 1. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0
+ goto_table:2
+ 2. ip,dl_vlan=100,nw_dst=10.200.0.1, priority 9131, cookie 0x5adc15c0
set_field:4296->vlan_vid
set_field:0e:00:00:00:00:01->eth_src
set_field:00:10:20:30:40:50->eth_dst
dec_ttl
- goto_table:7
- 7. dl_vlan=200,dl_dst=00:10:20:30:40:50, priority 9099, cookie 0x5adc15c0
+ goto_table:4
+ 4. dl_vlan=200,dl_dst=00:10:20:30:40:50, priority 9099, cookie 0x5adc15c0
pop_vlan
output:4
@@ -1325,18 +1327,27 @@ the ways that OVS tries to optimize megaflows. Update
actions:
allow: 1
-Then restart Faucet::
+Then reload Faucet::
- $ docker restart faucet
+ $ sudo docker exec faucet pkill -HUP -f faucet.faucet
-On port 1, this new configuration blocks all traffic to TCP port 8080
-and allows all other traffic. The resulting change in the flow table
-shows this clearly too::
+We will now find Faucet has added a new table to the start of the pipeline
+for processing port ACLs. Let's take a look at our new table 0 with
+``dump-flows br0``::
- $ diff-flows flows2 br0
- -priority=9099,in_port=1 actions=goto_table:1
- +priority=9098,in_port=1 actions=goto_table:1
- +priority=9099,tcp,in_port=1,tp_dst=8080 actions=drop
+ priority=9099,tcp,in_port=p1,tp_dst=8080 actions=drop
+ priority=9098,in_port=p1 actions=goto_table:1
+ priority=9099,in_port=p2 actions=goto_table:1
+ priority=9099,in_port=p3 actions=goto_table:1
+ priority=9099,in_port=p4 actions=goto_table:1
+ priority=9099,in_port=p5 actions=goto_table:1
+ priority=0 actions=drop
+
+We now have a flow that just jumps to table 1 (vlan) for each configured port,
+and a low priority rule to drop other unrecognized packets. We also see a flow
+rule for dropping TCP port 8080 traffic on port 1. If we compare this rule to
+the ACL we configured, we can clearly see how Faucet has converted this ACL to
+fit into the OpenFlow pipeline.
The most interesting question here is performance. If you recall the
earlier discussion, when a packet through the flow table encounters a
@@ -1357,6 +1368,7 @@ Let's see what happens, by sending a packet to port 80 (instead of
8080)::
$ ovs-appctl ofproto/trace br0 in_port=p1,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,tcp,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_ttl=64,tp_dst=80 -generate
+ src=10.100.0.1,nw_dst=10.200.0.1,nw_ttl=64,tp_dst=80 -generate
Flow: tcp,in_port=1,vlan_tci=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
bridge("br0")
@@ -1366,17 +1378,17 @@ Let's see what happens, by sending a packet to port 80 (instead of
1. in_port=1,vlan_tci=0x0000/0x1fff, priority 9000, cookie 0x5adc15c0
push_vlan:0x8100
set_field:4196->vlan_vid
+ goto_table:2
+ 2. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0
goto_table:3
- 3. ip,dl_vlan=100,dl_dst=0e:00:00:00:00:01, priority 9099, cookie 0x5adc15c0
+ 3. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0
goto_table:4
- 4. ip,dl_vlan=100,nw_dst=10.200.0.0/24, priority 9123, cookie 0x5adc15c0
- goto_table:6
- 6. ip, priority 9130, cookie 0x5adc15c0
+ 4. ip, priority 9130, cookie 0x5adc15c0
CONTROLLER:128
Final flow: tcp,in_port=1,dl_vlan=100,dl_vlan_pcp=0,vlan_tci1=0x0000,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_src=10.100.0.1,nw_dst=10.200.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=0,tp_dst=80,tcp_flags=0
- Megaflow: recirc_id=0,eth,tcp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_dst=10.200.0.1,nw_frag=no,tp_dst=0x0/0xf000
- Datapath actions: push_vlan(vid=100,pcp=0)
+ Megaflow: recirc_id=0,eth,tcp,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=00:01:02:03:04:05,dl_dst=0e:00:00:00:00:01,nw_dst=10.200.0.0/25,nw_frag=no,tp_dst=0x0/0xf000
+ Datapath actions: push_vlan(vid=100,pcp=0),userspace(pid=0,controller(reason=1,dont_send=0,continuation=0,recirc_id=8,rule_cookie=0x5adc15c0,controller_id=0,max_len=128))
Take a look at the Megaflow line and in particular the match on
``tp_dst``, which says ``tp_dst=0x0/0xf000``. What this means is that
@@ -1406,8 +1418,8 @@ Finishing Up
------------
When you're done, you probably want to exit the sandbox session, with
-Control+D or ``exit``, and stop the Faucet controller with ``docker
-stop faucet; docker rm faucet``.
+Control+D or ``exit``, and stop the Faucet controller with ``sudo docker
+stop faucet; sudo docker rm faucet``.
Further Directions
------------------
diff --git a/Documentation/tutorials/ipsec.rst b/Documentation/tutorials/ipsec.rst
index b4c3235132bc2dd0b7fde7c238d5be101ae1199f..ebc0ae429c197b6c515b42c719e44222d9df197c 100644
--- a/Documentation/tutorials/ipsec.rst
+++ b/Documentation/tutorials/ipsec.rst
@@ -42,7 +42,7 @@ Installing OVS and IPsec Packages
---------------------------------
OVS IPsec has .deb and .rpm packages. You should use the right package
-based on your Linux distribution. This tutorial uses Ubuntu 16.04 and Fedora 27
+based on your Linux distribution. This tutorial uses Ubuntu 16.04 and Fedora 32
as examples.
Ubuntu
@@ -59,8 +59,8 @@ Ubuntu
2. Install the related packages::
- $ apt-get install dkms strongswan
- $ dpkg -i libopenvswitch_*.deb openvswitch-common_*.deb \
+ # apt-get install dkms strongswan
+ # dpkg -i libopenvswitch_*.deb openvswitch-common_*.deb \
openvswitch-switch_*.deb openvswitch-datapath-dkms_*.deb \
python-openvswitch_*.deb openvswitch-pki_*.deb \
openvswitch-ipsec_*.deb
@@ -71,23 +71,25 @@ Ubuntu
Fedora
~~~~~~
-1. Follow :doc:`/intro/install/fedora` to build RPM packages.
+1. Install the related packages. Fedora 32 does not require installation of
+ the out-of-tree kernel module::
-2. Install the related packages::
+ # dnf install python3-openvswitch libreswan \
+ openvswitch openvswitch-ipsec
+
+2. Install firewall rules to allow ESP and IKE traffic::
- $ dnf install python2-openvswitch libreswan \
- "kernel-devel-uname-r == $(uname -r)"
- $ rpm -i openvswitch-*.rpm openvswitch-kmod-*.rpm \
- openvswitch-openvswitch-ipsec-*.rpm
+ # systemctl start firewalld
+ # firewall-cmd --add-service ipsec
-3. Install firewall rules to allow ESP and IKE traffic::
+ Or to make permanent::
- $ iptables -A IN_FedoraServer_allow -p esp -j ACCEPT
- $ iptables -A IN_FedoraServer_allow -p udp --dport 500 -j ACCEPT
+ # systemctl enable firewalld
+ # firewall-cmd --permanent --add-service ipsec
-4. Run the openvswitch-ipsec service::
+3. Run the openvswitch-ipsec service::
- $ systemctl start openvswitch-ipsec.service
+ # systemctl start openvswitch-ipsec.service
.. note::
@@ -97,47 +99,47 @@ Fedora
Configuring IPsec tunnel
------------------------
-Suppose you want to build IPsec tunnel between two hosts. Assume `host_1`'s
+Suppose you want to build an IPsec tunnel between two hosts. Assume `host_1`'s
external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure
`host_1` and `host_2` can ping each other via these external IPs.
0. Set up some variables to make life easier. On both hosts, set ``ip_1`` and
``ip_2`` variables, e.g.::
- $ ip_1=1.1.1.1
- $ ip_2=2.2.2.2
+ # ip_1=1.1.1.1
+ # ip_2=2.2.2.2
1. Set up OVS bridges in both hosts.
In `host_1`::
- $ ovs-vsctl add-br br-ipsec
- $ ip addr add 192.0.0.1/24 dev br-ipsec
- $ ip link set br-ipsec up
+ # ovs-vsctl add-br br-ipsec
+ # ip addr add 192.0.0.1/24 dev br-ipsec
+ # ip link set br-ipsec up
In `host_2`::
- $ ovs-vsctl add-br br-ipsec
- $ ip addr add 192.0.0.2/24 dev br-ipsec
- $ ip link set br-ipsec up
+ # ovs-vsctl add-br br-ipsec
+ # ip addr add 192.0.0.2/24 dev br-ipsec
+ # ip link set br-ipsec up
2. Set up IPsec tunnel.
- There are three authentication methods. You can choose one to set up your
- IPsec tunnel.
+ There are three authentication methods. Choose one method to set up your
+ IPsec tunnel and follow the steps below.
a) Using pre-shared key:
In `host_1`::
- $ ovs-vsctl add-port br-ipsec tun -- \
+ # ovs-vsctl add-port br-ipsec tun -- \
set interface tun type=gre \
options:remote_ip=$ip_2 \
options:psk=swordfish
In `host_2`::
- $ ovs-vsctl add-port br-ipsec tun -- \
+ # ovs-vsctl add-port br-ipsec tun -- \
set interface tun type=gre \
options:remote_ip=$ip_1 \
options:psk=swordfish
@@ -156,15 +158,15 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure
In `host_1`::
- $ ovs-pki req -u host_1
- $ ovs-pki self-sign host_1
- $ scp host_1-cert.pem $ip_2:/etc/keys/host_1-cert.pem
+ # ovs-pki req -u host_1
+ # ovs-pki self-sign host_1
+ # scp host_1-cert.pem $ip_2:/etc/keys/host_1-cert.pem
In `host_2`::
- $ ovs-pki req -u host_2
- $ ovs-pki self-sign host_2
- $ scp host_2-cert.pem $ip_1:/etc/keys/host_2-cert.pem
+ # ovs-pki req -u host_2
+ # ovs-pki self-sign host_2
+ # scp host_2-cert.pem $ip_1:/etc/keys/host_2-cert.pem
.. note::
@@ -176,20 +178,20 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure
In `host_1`::
- $ ovs-vsctl set Open_vSwitch . \
+ # ovs-vsctl set Open_vSwitch . \
other_config:certificate=/etc/keys/host_1-cert.pem \
other_config:private_key=/etc/keys/host_1-privkey.pem
- $ ovs-vsctl add-port br-ipsec tun -- \
+ # ovs-vsctl add-port br-ipsec tun -- \
set interface tun type=gre \
options:remote_ip=$ip_2 \
options:remote_cert=/etc/keys/host_2-cert.pem
In `host_2`::
- $ ovs-vsctl set Open_vSwitch . \
+ # ovs-vsctl set Open_vSwitch . \
other_config:certificate=/etc/keys/host_2-cert.pem \
other_config:private_key=/etc/keys/host_2-privkey.pem
- $ ovs-vsctl add-port br-ipsec tun -- \
+ # ovs-vsctl add-port br-ipsec tun -- \
set interface tun type=gre \
options:remote_ip=$ip_1 \
options:remote_cert=/etc/keys/host_1-cert.pem
@@ -207,29 +209,29 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure
In `host_1`::
- $ ovs-pki init
+ # ovs-pki init
Generate certificate requests and copy the certificate request of
`host_2` to `host_1`.
In `host_1`::
- $ ovs-pki req -u host_1
+ # ovs-pki req -u host_1
In `host_2`::
- $ ovs-pki req -u host_2
- $ scp host_2-req.pem $ip_1:/etc/keys/host_2-req.pem
+ # ovs-pki req -u host_2
+ # scp host_2-req.pem $ip_1:/etc/keys/host_2-req.pem
Sign the certificate requests with the CA key. Copy `host_2`'s signed
certificate and the CA certificate to `host_2`.
In `host_1`::
- $ ovs-pki sign host_1 switch
- $ ovs-pki sign host_2 switch
- $ scp host_2-cert.pem $ip_2:/etc/keys/host_2-cert.pem
- $ scp /var/lib/openvswitch/pki/switchca/cacert.pem \
+ # ovs-pki sign host_1 switch
+ # ovs-pki sign host_2 switch
+ # scp host_2-cert.pem $ip_2:/etc/keys/host_2-cert.pem
+ # scp /var/lib/openvswitch/pki/switchca/cacert.pem \
$ip_2:/etc/keys/cacert.pem
.. note::
@@ -243,22 +245,22 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure
In `host_1`::
- $ ovs-vsctl set Open_vSwitch . \
+ # ovs-vsctl set Open_vSwitch . \
other_config:certificate=/etc/keys/host_1-cert.pem \
other_config:private_key=/etc/keys/host_1-privkey.pem \
other_config:ca_cert=/etc/keys/cacert.pem
- $ ovs-vsctl add-port br-ipsec tun -- \
+ # ovs-vsctl add-port br-ipsec tun -- \
set interface tun type=gre \
options:remote_ip=$ip_2 \
options:remote_name=host_2
In `host_2`::
- $ ovs-vsctl set Open_vSwitch . \
+ # ovs-vsctl set Open_vSwitch . \
other_config:certificate=/etc/keys/host_2-cert.pem \
other_config:private_key=/etc/keys/host_2-privkey.pem \
other_config:ca_cert=/etc/keys/cacert.pem
- $ ovs-vsctl add-port br-ipsec tun -- \
+ # ovs-vsctl add-port br-ipsec tun -- \
set interface tun type=gre \
options:remote_ip=$ip_1 \
options:remote_name=host_1
@@ -276,8 +278,8 @@ external IP is 1.1.1.1, and `host_2`'s external IP is 2.2.2.2. Make sure
Now you should have an IPsec GRE tunnel running between two hosts. To verify
it, in `host_1`::
- $ ping 192.0.0.2 &
- $ tcpdump -ni any net $ip_2
+ # ping 192.0.0.2 &
+ # tcpdump -ni any net $ip_2
You should be able to see that ESP packets are being sent from `host_1` to
`host_2`.
@@ -289,7 +291,7 @@ The ``ovs-monitor-ipsec`` daemon manages and monitors the IPsec tunnel state.
Use the following ``ovs-appctl`` command to view ``ovs-monitor-ipsec`` internal
representation of tunnel configuration::
- $ ovs-appctl -t ovs-monitor-ipsec tunnels/show
+ # ovs-appctl -t ovs-monitor-ipsec tunnels/show
If there is misconfiguration, then ``ovs-appctl`` should indicate why.
For example::
@@ -324,7 +326,7 @@ For example::
If you don't see any active connections, try to run the following command to
refresh the ``ovs-monitor-ipsec`` daemon::
- $ ovs-appctl -t ovs-monitor-ipsec refresh
+ # ovs-appctl -t ovs-monitor-ipsec refresh
You can also check the logs of the ``ovs-monitor-ipsec`` daemon and the IKE
daemon to locate issues. ``ovs-monitor-ipsec`` outputs log messages to
diff --git a/Makefile.am b/Makefile.am
index b279303d186c6f621432bf9ce28ae7a7028cf0bd..691a005ad9776fbaf902fbebf45e8188cce0c302 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -46,7 +46,7 @@ AM_CPPFLAGS += -DNDEBUG
AM_CFLAGS += -fomit-frame-pointer
endif
-AM_CTAGSFLAGS = $(OVS_CTAGS_IDENTIFIERS_LIST)
+AM_CTAGSFLAGS = -I "$(OVS_CTAGS_IDENTIFIERS_LIST)"
if WIN32
psep=";"
@@ -76,12 +76,13 @@ EXTRA_DIST = \
MAINTAINERS.rst \
README.rst \
NOTICE \
+ .ci/linux-build.sh \
+ .ci/linux-prepare.sh \
+ .ci/osx-build.sh \
+ .ci/osx-prepare.sh \
.cirrus.yml \
+ .github/workflows/build-and-test.yml \
.travis.yml \
- .travis/linux-build.sh \
- .travis/linux-prepare.sh \
- .travis/osx-build.sh \
- .travis/osx-prepare.sh \
appveyor.yml \
boot.sh \
poc/builders/Vagrantfile \
@@ -276,7 +277,7 @@ static-check:
fi
.PHONY: static-check
-# Check that assert.h is not used outside a whitelist of files.
+# Check that assert.h is not used (outside a small set of files).
ALL_LOCAL += check-assert-h-usage
check-assert-h-usage:
@if test -e $(srcdir)/.git && (git --version) >/dev/null 2>&1 && \
@@ -323,7 +324,7 @@ check-tabs:
if test -e .git && (git --version) >/dev/null 2>&1 && \
grep -ln "^ " \
`git ls-files \
- | grep -v -f build-aux/initial-tab-whitelist` /dev/null \
+ | grep -v -f build-aux/initial-tab-allowed-files` /dev/null \
| $(EGREP) -v ':[ ]*/?\*'; \
then \
echo "See above for files that use tabs for indentation."; \
@@ -336,16 +337,16 @@ ALL_LOCAL += thread-safety-check
thread-safety-check:
@cd $(srcdir); \
if test -e .git && (git --version) >/dev/null 2>&1 && \
- grep -n -f build-aux/thread-safety-blacklist \
+ grep -n -f build-aux/thread-safety-forbidden \
`git ls-files | grep '\.[ch]$$' \
| $(EGREP) -v '^datapath|^lib/sflow|^third-party'` /dev/null \
| $(EGREP) -v ':[ ]*/?\*'; \
then \
echo "See above for list of calls to functions that are"; \
- echo "blacklisted due to thread safety issues"; \
+ echo "forbidden due to thread safety issues"; \
exit 1; \
fi
-EXTRA_DIST += build-aux/thread-safety-blacklist
+EXTRA_DIST += build-aux/thread-safety-forbidden
.PHONY: thread-safety-check
# Check that "ip" is used in preference to "ifconfig", because
@@ -412,8 +413,8 @@ flake8-check: $(FLAKE8_PYFILES)
endif
CLEANFILES += flake8-check
-include $(srcdir)/manpages.mk
-$(srcdir)/manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.py
+-include manpages.mk
+manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.py
@PYTHONPATH=$$PYTHONPATH$(psep)$(srcdir)/python $(PYTHON3) $(srcdir)/build-aux/sodepends.py -I. -I$(srcdir) $(MAN_ROOTS) >$(@F).tmp
@if cmp -s $(@F).tmp $@; then \
touch $@; \
@@ -421,6 +422,7 @@ $(srcdir)/manpages.mk: $(MAN_ROOTS) build-aux/sodepends.py python/build/soutil.p
else \
mv $(@F).tmp $@; \
fi
+CLEANFILES += manpages.mk
CLEANFILES += manpage-dep-check
if VSTUDIO_DDK
diff --git a/NEWS b/NEWS
index dab94e924dc6dc709ece8503d4936b0f1f2e0a82..d357da31d8778e49020d38c1d9d19f22c6c33d0d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,90 @@
+Post-v2.14.0
+---------------------
+ - OVSDB:
+ * New unixctl command 'ovsdb-server/get-db-storage-status' to show the
+ status of the storage that's backing a database.
+ * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'.
+ If turned on, ovsdb-server will try to reclaim all the unused memory
+ after every DB compaction back to OS. Disabled by default.
+ * Maximum backlog on RAFT connections limited to 500 messages or 4GB.
+ Once threshold reached, connection is dropped (and re-established).
+ Use the 'cluster/set-backlog-threshold' command to change limits.
+ - DPDK:
+ * Removed support for vhost-user dequeue zero-copy.
+ * Add support for DPDK 20.11.
+ - Userspace datapath:
+ * Add the 'pmd' option to "ovs-appctl dpctl/dump-flows", which
+ restricts a flow dump to a single PMD thread if set.
+ * New 'options:dpdk-vf-mac' field for DPDK interface of VF ports,
+ that allows configuring the MAC address of a VF representor.
+ * Add generic IP protocol support to conntrack. With this change, all
+ none UDP, TCP, and ICMP traffic will be treated as general L3
+ traffic, i.e. using 3 tupples.
+ - The environment variable OVS_UNBOUND_CONF, if set, is now used
+ as the DNS resolver's (unbound) configuration file.
+ - Linux datapath:
+ * Support for kernel versions up to 5.8.x.
+ - Terminology:
+ * The terms "master" and "slave" have been replaced by "primary" and
+ "secondary", respectively, for OpenFlow connection roles.
+ * The term "slave" has been replaced by "member", for bonds, LACP, and
+ OpenFlow bundle actions.
+ - Support for GitHub Actions based continuous integration builds has been
+ added.
+ - Bareudp Tunnel
+ * Bareudp device support is present in linux kernel from version 5.7
+ * Kernel bareudp device is not backported to ovs tree.
+ * Userspace datapath support is not added
+
+
+v2.14.0 - 17 Aug 2020
+---------------------
+ - ovs-vswitchd no longer deletes datapath flows on exit by default.
+ - OpenFlow:
+ * The OpenFlow ofp_desc/serial_num may now be configured by setting the
+ value of other-config:dp-sn in the Bridge table.
+ * Added support to watch CONTROLLER port status in fast failover group.
+ * New action "delete_field".
+ - DPDK:
+ * Deprecated DPDK pdump packet capture support removed.
+ * Deprecated DPDK ring ports (dpdkr) are no longer supported.
+ * Add hardware offload support for VLAN Push/Pop actions (experimental).
+ * Add hardware offload support for matching IPv6 protocol (experimental).
+ * Add hardware offload support for set of IPv6 src/dst/ttl
+ and tunnel push-output actions (experimental).
+ * OVS validated with DPDK 19.11.2, due to the inclusion of fixes for
+ CVE-2020-10722, CVE-2020-10723, CVE-2020-10724, CVE-2020-10725 and
+ CVE-2020-10726, this DPDK version is strongly recommended to be used.
+ * New 'ovs-appctl dpdk/log-list' and 'ovs-appctl dpdk/log-set' commands
+ to list and change log levels in DPDK components.
+ * Vhost-user Dequeue zero-copy support is deprecated and will be removed
+ in the next release.
+ - Linux datapath:
+ * Support for kernel versions up to 5.5.x.
+ - AF_XDP:
+ * New netdev class 'afxdp-nonpmd' for netdev-afxdp to save CPU cycles
+ by enabling interrupt mode.
+ - Userspace datapath:
+ * Removed artificial datapath flow limit that was 65536.
+ Now number of datapath flows is fully controlled by revalidators and the
+ 'other_config:flow-limit' knob.
+ * Add support for conntrack zone-based timeout policy.
+ * New configuration knob 'other_config:lb-output-action' for bond ports
+ that enables new datapath action 'lb_output' to avoid recirculation
+ in balance-tcp mode. Disabled by default.
+ * Add runtime CPU ISA detection to allow optimized ISA functions
+ * Add support for dynamically changing DPCLS subtable lookup functions
+ * Add ISA optimized DPCLS lookup function using AVX512
+ - New configuration knob 'other_config:bond-primary' for AB bonds
+ that specifies interface will be the preferred port if it is active.
+ - Tunnels: TC Flower offload
+ * Tunnel Local endpoint address masked match are supported.
+ * Tunnel Romte endpoint address masked match are supported.
+ - GTP-U Tunnel Protocol
+ * Add two new fields: tun_gtpu_flags, tun_gtpu_msgtype.
+ * Only support for userspace datapath.
+
+
v2.13.0 - 14 Feb 2020
---------------------
- OVN:
@@ -43,6 +130,9 @@ v2.13.0 - 14 Feb 2020
- 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
type filter supports new filters: "dpdk" and "partially-offloaded".
+ - Add new argument '--offload-stats' for command
+ 'ovs-appctl bridge/dump-flows',
+ so it can display offloaded packets statistics.
v2.12.0 - 03 Sep 2019
---------------------
@@ -117,9 +207,6 @@ v2.12.0 - 03 Sep 2019
* Add support for conntrack zone-based timeout policy.
- 'ovs-dpctl dump-flows' is no longer suitable for dumping offloaded flows.
'ovs-appctl dpctl/dump-flows' should be used instead.
- - Add new argument '--offload-stats' for command
- 'ovs-appctl bridge/dump-flows',
- so it can display offloaded packets statistics.
- Add L2 GRE tunnel over IPv6 support.
v2.11.0 - 19 Feb 2019
@@ -769,7 +856,7 @@ v2.4.0 - 20 Aug 2015
The implementation has been tested successfully against the Ixia Automated
Network Validation Library (ANVL).
- Stats are no longer updated on fake bond interface.
- - Keep active bond slave selection across OVS restart.
+ - Keep active bond interface selection across OVS restart.
- A simple wrapper script, 'ovs-docker', to integrate OVS with Docker
containers. If and when there is a native integration of Open vSwitch
with Docker, the wrapper script will be retired.
diff --git a/README.rst b/README.rst
index e06ddf2671d74a0f839ddf3282fa4e513b456ed9..319f705154985833086bc11427a57b43217d6272 100644
--- a/README.rst
+++ b/README.rst
@@ -6,6 +6,8 @@
Open vSwitch
============
+.. image:: https://github.com/openvswitch/ovs/workflows/Build%20and%20Test/badge.svg
+ :target: https://github.com/openvswitch/ovs/actions
.. image:: https://travis-ci.org/openvswitch/ovs.png
:target: https://travis-ci.org/openvswitch/ovs
.. image:: https://ci.appveyor.com/api/projects/status/github/openvswitch/ovs?branch=master&svg=true&retina=true
diff --git a/acinclude.m4 b/acinclude.m4
index c1470ccc6bc4eb74fb2517d06ae49145a97144d5..60871f67a5c75557325ef0a423f7ca3c5eceb3b5 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -14,6 +14,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+dnl Set OVS DPCLS Autovalidator as default subtable search at compile time?
+dnl This enables automatically running all unit tests with all DPCLS
+dnl implementations.
+AC_DEFUN([OVS_CHECK_DPCLS_AUTOVALIDATOR], [
+ AC_ARG_ENABLE([autovalidator],
+ [AC_HELP_STRING([--enable-autovalidator], [Enable DPCLS autovalidator as default subtable search implementation.])],
+ [autovalidator=yes],[autovalidator=no])
+ AC_MSG_CHECKING([whether DPCLS Autovalidator is default implementation])
+ if test "$autovalidator" != yes; then
+ AC_MSG_RESULT([no])
+ else
+ OVS_CFLAGS="$OVS_CFLAGS -DDPCLS_AUTOVALIDATOR_DEFAULT"
+ AC_MSG_RESULT([yes])
+ fi
+])
+
dnl OVS_ENABLE_WERROR
AC_DEFUN([OVS_ENABLE_WERROR],
[AC_ARG_ENABLE(
@@ -151,10 +167,10 @@ AC_DEFUN([OVS_CHECK_LINUX], [
AC_MSG_RESULT([$kversion])
if test "$version" -ge 5; then
- if test "$version" = 5 && test "$patchlevel" -le 0; then
+ if test "$version" = 5 && test "$patchlevel" -le 8; then
: # Linux 5.x
else
- AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 5.0.x is not supported (please refer to the FAQ for advice)])
+ AC_ERROR([Linux kernel in $KBUILD is version $kversion, but version newer than 5.8.x is not supported (please refer to the FAQ for advice)])
fi
elif test "$version" = 4; then
: # Linux 4.x
@@ -250,6 +266,18 @@ AC_DEFUN([OVS_CHECK_LINUX_SCTP_CT], [
[Define to 1 if SCTP_CONNTRACK_HEARTBEAT_SENT is available.])])
])
+dnl OVS_CHECK_LINUX_VIRTIO_TYPES
+dnl
+dnl Checks for kernels that need virtio_types definition.
+AC_DEFUN([OVS_CHECK_LINUX_VIRTIO_TYPES], [
+ AC_COMPILE_IFELSE([
+ AC_LANG_PROGRAM([#include ], [
+ __virtio16 x = 0;
+ ])],
+ [AC_DEFINE([HAVE_VIRTIO_TYPES], [1],
+ [Define to 1 if __virtio16 is available.])])
+])
+
dnl OVS_FIND_DEPENDENCY(FUNCTION, SEARCH_LIBS, NAME_TO_PRINT)
dnl
dnl Check for a function in a library list.
@@ -306,8 +334,9 @@ dnl
dnl Configure DPDK source tree
AC_DEFUN([OVS_CHECK_DPDK], [
AC_ARG_WITH([dpdk],
- [AC_HELP_STRING([--with-dpdk=/path/to/dpdk],
- [Specify the DPDK build directory])],
+ [AC_HELP_STRING([--with-dpdk=static|shared|yes],
+ [Specify "static" or "shared" depending on the
+ DPDK libraries to use])],
[have_dpdk=true])
AC_MSG_CHECKING([whether dpdk is enabled])
@@ -317,35 +346,45 @@ AC_DEFUN([OVS_CHECK_DPDK], [
else
AC_MSG_RESULT([yes])
case "$with_dpdk" in
- yes)
- DPDK_AUTO_DISCOVER="true"
- PKG_CHECK_MODULES_STATIC([DPDK], [libdpdk], [
- DPDK_INCLUDE="$DPDK_CFLAGS"
- DPDK_LIB="$DPDK_LIBS"], [
- DPDK_INCLUDE="-I/usr/local/include/dpdk -I/usr/include/dpdk"
- DPDK_LIB="-ldpdk"])
- ;;
- *)
- DPDK_AUTO_DISCOVER="false"
- DPDK_INCLUDE_PATH="$with_dpdk/include"
- # If 'with_dpdk' is passed install directory, point to headers
- # installed in $DESTDIR/$prefix/include/dpdk
- if test -e "$DPDK_INCLUDE_PATH/rte_config.h"; then
- DPDK_INCLUDE="-I$DPDK_INCLUDE_PATH"
- elif test -e "$DPDK_INCLUDE_PATH/dpdk/rte_config.h"; then
- DPDK_INCLUDE="-I$DPDK_INCLUDE_PATH/dpdk"
- fi
- DPDK_LIB_DIR="$with_dpdk/lib"
- DPDK_LIB="-ldpdk"
- ;;
+ "shared")
+ PKG_CHECK_MODULES([DPDK], [libdpdk], [
+ DPDK_INCLUDE="$DPDK_CFLAGS"
+ DPDK_LIB="$DPDK_LIBS"], [
+ DPDK_INCLUDE="-I/usr/local/include/dpdk -I/usr/include/dpdk"
+ DPDK_LIB="-ldpdk"])
+ ;;
+ "static" | "yes")
+ PKG_CHECK_MODULES_STATIC([DPDK], [libdpdk], [
+ DPDK_INCLUDE="$DPDK_CFLAGS"
+ DPDK_LIB="$DPDK_LIBS"], [
+ DPDK_INCLUDE="-I/usr/local/include/dpdk -I/usr/include/dpdk"
+ DPDK_LIB="-ldpdk"])
+
+ dnl Statically linked private DPDK objects of form
+ dnl -l:file.a must be positioned between
+ dnl --whole-archive ... --no-whole-archive linker parameters.
+ dnl Old pkg-config versions misplace --no-whole-archive parameter
+ dnl and put it next to --whole-archive.
+ AC_MSG_CHECKING([for faulty pkg-config version])
+ echo "$DPDK_LIB" | grep -q 'whole-archive.*l:lib.*no-whole-archive'
+ status=$?
+ case $status in
+ 0)
+ AC_MSG_RESULT([no])
+ ;;
+ 1)
+ AC_MSG_RESULT([yes])
+ AC_MSG_ERROR([Please upgrade pkg-config])
+ ;;
+ *)
+ AC_MSG_ERROR([grep exited with status $status])
+ ;;
+ esac
esac
ovs_save_CFLAGS="$CFLAGS"
ovs_save_LDFLAGS="$LDFLAGS"
CFLAGS="$CFLAGS $DPDK_INCLUDE"
- if test "$DPDK_AUTO_DISCOVER" = "false"; then
- LDFLAGS="$LDFLAGS -L${DPDK_LIB_DIR}"
- fi
AC_CHECK_HEADERS([rte_config.h], [], [
AC_MSG_ERROR([unable to find rte_config.h in $with_dpdk])
@@ -355,31 +394,19 @@ AC_DEFUN([OVS_CHECK_DPDK], [
OVS_FIND_DEPENDENCY([get_mempolicy], [numa], [libnuma])
], [], [[#include ]])
+ AC_CHECK_DECL([RTE_LIBRTE_PMD_PCAP], [
+ OVS_FIND_DEPENDENCY([pcap_dump_close], [pcap], [libpcap])
+ ], [], [[#include ]])
+
+ AC_CHECK_DECL([RTE_LIBRTE_PMD_AF_XDP], [
+ LIBBPF_LDADD="-lbpf"
+ ], [], [[#include ]])
+
AC_CHECK_DECL([RTE_LIBRTE_VHOST_NUMA], [
AC_DEFINE([VHOST_NUMA], [1], [NUMA Aware vHost support detected in DPDK.])
], [], [[#include ]])
- AC_MSG_CHECKING([whether DPDK pdump support is enabled])
- AC_ARG_ENABLE(
- [dpdk-pdump],
- [AC_HELP_STRING([--enable-dpdk-pdump],
- [Enable DPDK pdump packet capture support])],
- [AC_MSG_RESULT([yes])
- AC_MSG_WARN([DPDK pdump is deprecated, consider using ovs-tcpdump instead])
- AC_CHECK_DECL([RTE_LIBRTE_PMD_PCAP], [
- OVS_FIND_DEPENDENCY([pcap_dump], [pcap], [libpcap])
- AC_CHECK_DECL([RTE_LIBRTE_PDUMP], [
- AC_DEFINE([DPDK_PDUMP], [1], [DPDK pdump enabled in OVS.])
- ], [
- AC_MSG_ERROR([RTE_LIBRTE_PDUMP is not defined in rte_config.h])
- ], [[#include ]])
- ], [
- AC_MSG_ERROR([RTE_LIBRTE_PMD_PCAP is not defined in rte_config.h])
- ], [[#include ]])],
- [AC_MSG_RESULT([no])])
-
AC_CHECK_DECL([RTE_LIBRTE_MLX5_PMD], [dnl found
- OVS_FIND_DEPENDENCY([mnl_attr_put], [mnl], [libmnl])
AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found
OVS_FIND_DEPENDENCY([mlx5dv_create_wq], [mlx5], [libmlx5])
OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs])
@@ -406,20 +433,18 @@ AC_DEFUN([OVS_CHECK_DPDK], [
[AC_MSG_RESULT([yes])
DPDKLIB_FOUND=true],
[AC_MSG_RESULT([no])
- if test "$DPDK_AUTO_DISCOVER" = "true"; then
- AC_MSG_ERROR(m4_normalize([
- Could not find DPDK library in default search path, Use --with-dpdk
- to specify the DPDK library installed in non-standard location]))
- else
- AC_MSG_ERROR([Could not find DPDK libraries in $DPDK_LIB_DIR])
- fi
+ AC_MSG_ERROR(m4_normalize([
+ Could not find DPDK library in default search path, update
+ PKG_CONFIG_PATH for pkg-config to find the .pc file in
+ non-standard location]))
])
CFLAGS="$ovs_save_CFLAGS"
LDFLAGS="$ovs_save_LDFLAGS"
- if test "$DPDK_AUTO_DISCOVER" = "false"; then
- OVS_LDFLAGS="$OVS_LDFLAGS -L$DPDK_LIB_DIR"
- fi
+ # Stripping out possible instruction set specific configuration that DPDK
+ # forces in pkg-config since this could override user-specified options.
+ # It's enough to have -mssse3 to build with DPDK headers.
+ DPDK_INCLUDE=$(echo "$DPDK_INCLUDE" | sed 's/-march=[[^ ]]*//g')
OVS_CFLAGS="$OVS_CFLAGS $DPDK_INCLUDE"
OVS_ENABLE_OPTION([-mssse3])
@@ -428,17 +453,15 @@ AC_DEFUN([OVS_CHECK_DPDK], [
# This happens because the rest of the DPDK code doesn't use any symbol in
# the pmd driver objects, and the drivers register themselves using an
# __attribute__((constructor)) function.
- #
- # These options are specified inside a single -Wl directive to prevent
- # autotools from reordering them.
- #
- # OTOH newer versions of dpdk pkg-config (generated with Meson)
- # will already have flagged just the right set of libs with
- # --whole-archive - in those cases do not wrap it once more.
- case "$DPDK_LIB" in
- *whole-archive*) DPDK_vswitchd_LDFLAGS=$DPDK_LIB;;
- *) DPDK_vswitchd_LDFLAGS=-Wl,--whole-archive,$DPDK_LIB,--no-whole-archive
- esac
+ # Wrap the DPDK libraries inside a single -Wl directive
+ # after comma separation to prevent autotools from reordering them.
+ DPDK_vswitchd_LDFLAGS=$(echo "$DPDK_LIB"| tr -s ' ' ',' | sed 's/-Wl,//g')
+ # Replace -pthread with -lpthread for LD and remove the last extra comma.
+ DPDK_vswitchd_LDFLAGS=$(echo "$DPDK_vswitchd_LDFLAGS"| sed 's/,$//' | \
+ sed 's/-pthread/-lpthread/g')
+ # Prepend "-Wl,".
+ DPDK_vswitchd_LDFLAGS="-Wl,$DPDK_vswitchd_LDFLAGS"
+
AC_SUBST([DPDK_vswitchd_LDFLAGS])
AC_DEFINE([DPDK_NETDEV], [1], [System uses the DPDK module.])
fi
@@ -539,6 +562,37 @@ AC_DEFUN([OVS_FIND_PARAM_IFELSE], [
fi
])
+dnl OVS_FIND_OP_PARAM_IFELSE(FILE, OP, REGEX, [IF-MATCH], [IF-NO-MATCH])
+dnl
+dnl Looks for OP in FILE. If it is found, greps for REGEX within the
+dnl OP definition. If this is successful, runs IF-MATCH, otherwise
+dnl IF_NO_MATCH. If IF-MATCH is empty then it defines to
+dnl OVS_DEFINE(HAVE__WITH_), with and
+dnl translated to uppercase.
+AC_DEFUN([OVS_FIND_OP_PARAM_IFELSE], [
+ AC_MSG_CHECKING([whether $2 has member $3 in $1])
+ if test -f $1; then
+ awk '/$2[[ \t\n]]*\)\(/,/;/' $1 2>/dev/null | grep '$3' >/dev/null
+ status=$?
+ case $status in
+ 0)
+ AC_MSG_RESULT([yes])
+ m4_if([$4], [], [OVS_DEFINE([HAVE_]m4_toupper([$2])[_WITH_]m4_toupper([$3]))], [$4])
+ ;;
+ 1)
+ AC_MSG_RESULT([no])
+ $5
+ ;;
+ *)
+ AC_MSG_ERROR([grep exited with status $status])
+ ;;
+ esac
+ else
+ AC_MSG_RESULT([file not found])
+ $5
+ fi
+])
+
dnl OVS_DEFINE(NAME)
dnl
dnl Defines NAME to 1 in kcompat.h.
@@ -567,9 +621,14 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/net/ip6_fib.h], [rt6_get_cookie],
[OVS_DEFINE([HAVE_RT6_GET_COOKIE])])
+ OVS_FIND_FIELD_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_stub],
+ [dst_entry])
OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup.*net],
[OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_NET])])
+ OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow.*net],
+ [OVS_DEFINE([HAVE_IPV6_DST_LOOKUP_FLOW_NET])])
OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_stub])
+ OVS_GREP_IFELSE([$KSRC/include/net/addrconf.h], [ipv6_dst_lookup_flow])
OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [ERR_CAST])
OVS_GREP_IFELSE([$KSRC/include/linux/err.h], [IS_ERR_OR_NULL])
@@ -579,6 +638,9 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
[OVS_DEFINE([HAVE_UPSTREAM_STATIC_KEY])])
OVS_GREP_IFELSE([$KSRC/include/linux/jump_label.h], [DEFINE_STATIC_KEY_FALSE],
[OVS_DEFINE([HAVE_DEFINE_STATIC_KEY])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/jump_label.h],
+ [DECLARE_STATIC_KEY_FALSE],
+ [OVS_DEFINE([HAVE_DECLARE_STATIC_KEY])])
OVS_GREP_IFELSE([$KSRC/include/linux/etherdevice.h], [eth_hw_addr_random])
OVS_GREP_IFELSE([$KSRC/include/linux/etherdevice.h], [ether_addr_copy])
@@ -765,6 +827,10 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
[prandom_u32[[\(]]],
[OVS_DEFINE([HAVE_PRANDOM_U32])])
OVS_GREP_IFELSE([$KSRC/include/linux/random.h], [prandom_u32_max])
+ OVS_GREP_IFELSE([$KSRC/include/linux/prandom.h],
+ [prandom_u32[[\(]]],
+ [OVS_DEFINE([HAVE_PRANDOM_U32])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/prandom.h], [prandom_u32_max])
OVS_GREP_IFELSE([$KSRC/include/net/rtnetlink.h], [get_link_net])
OVS_GREP_IFELSE([$KSRC/include/net/rtnetlink.h], [name_assign_type])
@@ -818,8 +884,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_clear_hash])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [int.skb_zerocopy(],
[OVS_DEFINE([HAVE_SKB_ZEROCOPY])])
- OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [u8.*l4_rxhash],
- [OVS_DEFINE([HAVE_L4_RXHASH])])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_ensure_writable])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_vlan_pop])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [__skb_vlan_pop])
@@ -830,8 +894,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_nfct])
OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_put_zero])
- OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [bool],
- [OVS_DEFINE([HAVE_BOOL_TYPE])])
OVS_GREP_IFELSE([$KSRC/include/linux/types.h], [__wsum],
[OVS_DEFINE([HAVE_CSUM_TYPES])])
OVS_GREP_IFELSE([$KSRC/include/uapi/linux/types.h], [__wsum],
@@ -918,8 +980,6 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/net/sock.h], [sk_no_check_tx])
OVS_GREP_IFELSE([$KSRC/include/linux/udp.h], [no_check6_tx])
- OVS_GREP_IFELSE([$KSRC/include/linux/utsrelease.h], [el6],
- [OVS_DEFINE([HAVE_RHEL6_PER_CPU])])
OVS_FIND_PARAM_IFELSE([$KSRC/include/net/protocol.h],
[udp_add_offload], [net],
[OVS_DEFINE([HAVE_UDP_ADD_OFFLOAD_TAKES_NET])])
@@ -1069,6 +1129,21 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
[OVS_DEFINE([HAVE_RBTREE_RB_LINK_NODE_RCU])])
OVS_GREP_IFELSE([$KSRC/include/net/dst_ops.h], [bool confirm_neigh],
[OVS_DEFINE([HAVE_DST_OPS_CONFIRM_NEIGH])])
+ OVS_GREP_IFELSE([$KSRC/include/net/inet_frag.h], [fqdir],
+ [OVS_DEFINE([HAVE_INET_FRAG_FQDIR])])
+ OVS_FIND_FIELD_IFELSE([$KSRC/include/net/genetlink.h], [genl_ops],
+ [policy],
+ [OVS_DEFINE([HAVE_GENL_OPS_POLICY])])
+ OVS_GREP_IFELSE([$KSRC/include/net/netlink.h],
+ [nla_parse_deprecated_strict],
+ [OVS_DEFINE([HAVE_NLA_PARSE_DEPRECATED_STRICT])])
+ OVS_FIND_OP_PARAM_IFELSE([$KSRC/include/net/rtnetlink.h],
+ [validate], [extack],
+ [OVS_DEFINE([HAVE_RTNLOP_VALIDATE_WITH_EXTACK])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+ [__skb_set_hash])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [sw_hash])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h], [skb_get_hash_raw])
if cmp -s datapath/linux/kcompat.h.new \
datapath/linux/kcompat.h >/dev/null 2>&1; then
@@ -1294,11 +1369,11 @@ AC_DEFUN([OVS_ENABLE_SPARSE],
dnl OVS_CTAGS_IDENTIFIERS
dnl
-dnl ctags ignores symbols with extras identifiers. This builds a list of
-dnl specially handled identifiers to be ignored.
+dnl ctags ignores symbols with extras identifiers. This is a list of
+dnl specially handled identifiers to be ignored. [ctags(1) -I ].
AC_DEFUN([OVS_CTAGS_IDENTIFIERS],
AC_SUBST([OVS_CTAGS_IDENTIFIERS_LIST],
- [`printf %s '-I "'; sed -n 's/^#define \(OVS_[A-Z_]\+\)(\.\.\.)$/\1+/p' ${srcdir}/include/openvswitch/compiler.h | tr \\\n ' ' ; printf '"'`] ))
+ ["OVS_LOCKABLE OVS_NO_THREAD_SAFETY_ANALYSIS OVS_REQ_RDLOCK+ OVS_ACQ_RDLOCK+ OVS_REQ_WRLOCK+ OVS_ACQ_WRLOCK+ OVS_REQUIRES+ OVS_ACQUIRES+ OVS_TRY_WRLOCK+ OVS_TRY_RDLOCK+ OVS_TRY_LOCK+ OVS_GUARDED_BY+ OVS_EXCLUDED+ OVS_RELEASES+ OVS_ACQ_BEFORE+ OVS_ACQ_AFTER+"]))
dnl OVS_PTHREAD_SET_NAME
dnl
diff --git a/appveyor.yml b/appveyor.yml
index fa6754ce20ea6681a795c37cc3cb4c0bb7ec344a..25c3f69fb48784661582b185641445ed6f5ab831 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,48 +1,60 @@
version: 1.0.{build}
+image: Visual Studio 2019
branches:
only:
- master
-clone_folder: C:\openvswitch
+configuration:
+ - Debug
+ - Release
+clone_folder: C:\openvswitch_compile
init:
- ps: $env:PATH ="C:\Python37;"+$env:PATH
- ps: New-Item -Type HardLink -Path "C:\Python37\python3.exe" -Value "C:\Python37\python.exe"
- ps: >-
- mkdir C:\pthreads-win32
-
mkdir C:\ovs-build-downloads
- $source = "ftp://sourceware.org/pub/pthreads-win32/pthreads-w32-2-9-1-release.zip"
-
- $destination = "C:\pthreads-win32\pthreads-win32.zip"
-
- Invoke-WebRequest $source -OutFile $destination
+ mkdir C:\openvswitch\driver
- $source = "https://slproweb.com/download/Win32OpenSSL-1_0_2t.exe"
+ $source = "https://slproweb.com/download/Win64OpenSSL-1_0_2u.exe"
- $destination = "C:\ovs-build-downloads\Win32OpenSSL-1_0_2t.exe"
+ $destination = "C:\ovs-build-downloads\Win64OpenSSL-1_0_2u.exe"
Invoke-WebRequest $source -OutFile $destination
- cd C:\pthreads-win32
-
- 7z x C:\pthreads-win32\pthreads-win32.zip
-
cd C:\ovs-build-downloads
- .\Win32OpenSSL-1_0_2t.exe /silent /verysilent /sp- /suppressmsgboxes
+ .\Win64OpenSSL-1_0_2u.exe /silent /verysilent /sp- /suppressmsgboxes
Start-Sleep -s 30
cd C:\openvswitch
+ git clone https://git.code.sf.net/p/pthreads4w/code c:\pthreads4w-code
+
python3 -m pip install pypiwin32 --disable-pip-version-check
+ cd C:\openvswitch_compile
+
build_script:
-- '"C:\Program Files (x86)\Microsoft Visual Studio 12.0\Common7\Tools\VsDevCmd"'
+- '"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"'
- C:\MinGW\msys\1.0\bin\bash -lc "echo \"C:/MinGW /mingw\" > /etc/fstab"
-- C:\MinGW\msys\1.0\bin\bash -lc "cp /c/pthreads-win32/Pre-built.2/dll/x86/*.dll /c/openvswitch/."
- C:\MinGW\msys\1.0\bin\bash -lc "mv /bin/link.exe /bin/link_copy.exe"
-- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./boot.sh"
-- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -lShlwapi -liphlpapi -lwbemuuid -lole32 -loleaut32\" --with-pthread=C:/pthreads-win32/Pre-built.2 --with-openssl=C:/OpenSSL-Win32 --with-vstudiotarget=\"Debug\"
-- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make"
-- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch && make datapath_windows_analyze"
+# Build pthreads
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/pthreads4w-code && nmake all install"
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && ./boot.sh"
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && ./configure CC=build-aux/cccl LD=\"`which link`\" LIBS=\"-lws2_32 -lShlwapi -liphlpapi -lwbemuuid -lole32 -loleaut32\" --prefix=C:/openvswitch/usr --localstatedir=C:/openvswitch/var --sysconfdir=C:/openvswitch/etc --with-pthread=c:/PTHREADS-BUILT/ --enable-ssl --with-openssl=C:/OpenSSL-Win64 --with-vstudiotarget=\"%CONFIGURATION%\""
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make -j 4"
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make datapath_windows_analyze"
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make install"
+- C:\MinGW\msys\1.0\bin\bash -lc "cd /c/openvswitch_compile && make windows_installer"
+- cp C:\PTHREADS-BUILT\bin\pthreadVC3.dll C:\openvswitch\usr\bin
+- cp C:\PTHREADS-BUILT\bin\pthreadVC3.dll C:\openvswitch\usr\sbin
+- ps: cp C:\openvswitch_compile\datapath-windows\x64\Win10$env:CONFIGURATION\package\* C:\openvswitch\driver
+- ps: cp C:\openvswitch_compile\datapath-windows\x64\Win10$env:CONFIGURATION\package.cer C:\openvswitch\driver
+- ps: cp C:\openvswitch_compile\datapath-windows\misc\* C:\openvswitch\driver
+- cp c:\openvswitch_compile\windows\ovs-windows-installer\bin\x64\Release\OpenvSwitch.msi c:\OpenvSwitch-%CONFIGURATION%.msi
+
+after_build:
+ - ps: 7z a C:\ovs-master-$env:CONFIGURATION.zip C:\openvswitch
+ - ps: Push-AppveyorArtifact C:\ovs-master-$env:CONFIGURATION.zip
+ - ps: Push-AppveyorArtifact C:\OpenvSwitch-$env:CONFIGURATION.msi
diff --git a/build-aux/automake.mk b/build-aux/automake.mk
index 9007ecda9c19e820bdb46f208053e0e546341b5c..6267ccd7cf43fe61ca74bec37bd2ee2cfe9ab95a 100644
--- a/build-aux/automake.mk
+++ b/build-aux/automake.mk
@@ -5,7 +5,7 @@ EXTRA_DIST += \
build-aux/dist-docs \
build-aux/dpdkstrip.py \
build-aux/generate-dhparams-c \
- build-aux/initial-tab-whitelist \
+ build-aux/initial-tab-allowed-files \
build-aux/sodepends.py \
build-aux/soexpand.py \
build-aux/text2c \
diff --git a/build-aux/initial-tab-whitelist b/build-aux/initial-tab-allowed-files
similarity index 100%
rename from build-aux/initial-tab-whitelist
rename to build-aux/initial-tab-allowed-files
diff --git a/build-aux/thread-safety-blacklist b/build-aux/thread-safety-forbidden
similarity index 100%
rename from build-aux/thread-safety-blacklist
rename to build-aux/thread-safety-forbidden
diff --git a/configure.ac b/configure.ac
index 92b52f67127e70fa72d82a71e77cd55ff4fe0b68..126a1d9d187568fef9a623dba32e23f17a0441e4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
# limitations under the License.
AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.13.0, bugs@openvswitch.org)
+AC_INIT(openvswitch, 2.14.90, bugs@openvswitch.org)
AC_CONFIG_SRCDIR([datapath/datapath.c])
AC_CONFIG_MACRO_DIR([m4])
AC_CONFIG_AUX_DIR([build-aux])
@@ -100,6 +100,7 @@ OVS_CHECK_IF_DL
OVS_CHECK_STRTOK_R
OVS_CHECK_LINUX_AF_XDP
AC_CHECK_DECLS([sys_siglist], [], [], [[#include ]])
+AC_CHECK_DECLS([malloc_trim], [], [], [[#include ]])
AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
[], [], [[#include ]])
AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include ]])
@@ -178,9 +179,13 @@ OVS_ENABLE_OPTION([-Wno-null-pointer-arithmetic])
OVS_ENABLE_OPTION([-Warray-bounds-pointer-arithmetic])
OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED])
OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], [HAVE_WNO_UNUSED_PARAMETER])
+OVS_CONDITIONAL_CC_OPTION([-mavx512f], [HAVE_AVX512F])
+OVS_CHECK_CC_OPTION([-mavx512f], [CFLAGS="$CFLAGS -DHAVE_AVX512F"])
OVS_ENABLE_WERROR
OVS_ENABLE_SPARSE
OVS_CTAGS_IDENTIFIERS
+OVS_CHECK_DPCLS_AUTOVALIDATOR
+OVS_CHECK_BINUTILS_AVX512
AC_ARG_VAR(KARCH, [Kernel Architecture String])
AC_SUBST(KARCH)
@@ -188,6 +193,7 @@ OVS_CHECK_LINUX
OVS_CHECK_LINUX_NETLINK
OVS_CHECK_LINUX_TC
OVS_CHECK_LINUX_SCTP_CT
+OVS_CHECK_LINUX_VIRTIO_TYPES
OVS_CHECK_DPDK
OVS_CHECK_PRAGMA_MESSAGE
AC_SUBST([OVS_CFLAGS])
diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index b8cf5dd954eabc99ef766da7c4cd3178c30c8615..60b3d603395d14ccae4e8fd66e5cfb5a1a798bd8 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -90,3 +90,7 @@ datapath_windows_analyze: all
MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Analyze"
MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8.1Analyze"
MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win8Analyze"
+
+datapath_windows: all
+ MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Debug"
+ MSBuild.exe //nologo //maxcpucount datapath-windows/ovsext.sln /target:Build /property:Configuration="Win10Release"
diff --git a/datapath-windows/ovsext/Actions.c b/datapath-windows/ovsext/Actions.c
index 5c9b5c3a0c5c22c05af4c7c9a9888de531973b19..4f43369844e3023fc13c4249cd047e22d5483c29 100644
--- a/datapath-windows/ovsext/Actions.c
+++ b/datapath-windows/ovsext/Actions.c
@@ -1259,6 +1259,7 @@ OvsActionMplsPush(OvsForwardingContext *ovsFwdCtx,
*/
static __inline NDIS_STATUS
OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_ethernet *ethAttr)
{
PNET_BUFFER curNb;
@@ -1285,9 +1286,11 @@ OvsUpdateEthHeader(OvsForwardingContext *ovsFwdCtx,
}
ethHdr = (EthHdr *)(bufferStart + NET_BUFFER_CURRENT_MDL_OFFSET(curNb));
- RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst,
- sizeof ethHdr->Destination);
- RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, sizeof ethHdr->Source);
+ RtlCopyMemory(ethHdr->Destination, ethAttr->eth_dst, ETH_ADDR_LENGTH);
+ RtlCopyMemory(ethHdr->Source, ethAttr->eth_src, ETH_ADDR_LENGTH);
+ /* Update l2 flow key */
+ RtlCopyMemory(key->l2.dlDst, ethAttr->eth_dst, ETH_ADDR_LENGTH);
+ RtlCopyMemory(key->l2.dlSrc, ethAttr->eth_src, ETH_ADDR_LENGTH);
return NDIS_STATUS_SUCCESS;
}
@@ -1376,6 +1379,7 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
*/
NDIS_STATUS
OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_udp *udpAttr)
{
PUINT8 bufferStart;
@@ -1400,15 +1404,19 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
udpHdr->check = ChecksumUpdate16(udpHdr->check, udpHdr->source,
udpAttr->udp_src);
udpHdr->source = udpAttr->udp_src;
+ key->ipKey.l4.tpSrc = udpAttr->udp_src;
}
if (udpHdr->dest != udpAttr->udp_dst) {
udpHdr->check = ChecksumUpdate16(udpHdr->check, udpHdr->dest,
udpAttr->udp_dst);
udpHdr->dest = udpAttr->udp_dst;
+ key->ipKey.l4.tpDst = udpAttr->udp_dst;
}
} else {
udpHdr->source = udpAttr->udp_src;
+ key->ipKey.l4.tpSrc = udpAttr->udp_src;
udpHdr->dest = udpAttr->udp_dst;
+ key->ipKey.l4.tpDst = udpAttr->udp_dst;
}
return NDIS_STATUS_SUCCESS;
@@ -1423,6 +1431,7 @@ OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
*/
NDIS_STATUS
OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_tcp *tcpAttr)
{
PUINT8 bufferStart;
@@ -1447,11 +1456,13 @@ OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
tcpHdr->check = ChecksumUpdate16(tcpHdr->check, tcpHdr->source,
tcpAttr->tcp_src);
tcpHdr->source = tcpAttr->tcp_src;
+ key->ipKey.l4.tpSrc = tcpAttr->tcp_src;
}
if (tcpHdr->dest != tcpAttr->tcp_dst) {
tcpHdr->check = ChecksumUpdate16(tcpHdr->check, tcpHdr->dest,
tcpAttr->tcp_dst);
tcpHdr->dest = tcpAttr->tcp_dst;
+ key->ipKey.l4.tpDst = tcpAttr->tcp_dst;
}
return NDIS_STATUS_SUCCESS;
@@ -1579,6 +1590,7 @@ OvsUpdateAddressAndPort(OvsForwardingContext *ovsFwdCtx,
*/
NDIS_STATUS
OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_ipv4 *ipAttr)
{
PUINT8 bufferStart;
@@ -1632,6 +1644,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
ipAttr->ipv4_src);
}
ipHdr->saddr = ipAttr->ipv4_src;
+ key->ipKey.nwSrc = ipAttr->ipv4_src;
}
if (ipHdr->daddr != ipAttr->ipv4_dst) {
if (tcpHdr) {
@@ -1647,6 +1660,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
ipAttr->ipv4_dst);
}
ipHdr->daddr = ipAttr->ipv4_dst;
+ key->ipKey.nwDst = ipAttr->ipv4_dst;
}
if (ipHdr->protocol != ipAttr->ipv4_proto) {
UINT16 oldProto = (ipHdr->protocol << 16) & 0xff00;
@@ -1661,6 +1675,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
ipHdr->check = ChecksumUpdate16(ipHdr->check, oldProto, newProto);
}
ipHdr->protocol = ipAttr->ipv4_proto;
+ key->ipKey.nwProto = ipAttr->ipv4_proto;
}
if (ipHdr->ttl != ipAttr->ipv4_ttl) {
UINT16 oldTtl = (ipHdr->ttl) & 0xff;
@@ -1669,6 +1684,7 @@ OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
ipHdr->check = ChecksumUpdate16(ipHdr->check, oldTtl, newTtl);
}
ipHdr->ttl = ipAttr->ipv4_ttl;
+ key->ipKey.nwTtl = ipAttr->ipv4_ttl;
}
return NDIS_STATUS_SUCCESS;
@@ -1691,12 +1707,12 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
switch (type) {
case OVS_KEY_ATTR_ETHERNET:
- status = OvsUpdateEthHeader(ovsFwdCtx,
+ status = OvsUpdateEthHeader(ovsFwdCtx, key,
NlAttrGetUnspec(a, sizeof(struct ovs_key_ethernet)));
break;
case OVS_KEY_ATTR_IPV4:
- status = OvsUpdateIPv4Header(ovsFwdCtx,
+ status = OvsUpdateIPv4Header(ovsFwdCtx, key,
NlAttrGetUnspec(a, sizeof(struct ovs_key_ipv4)));
break;
@@ -1709,16 +1725,17 @@ OvsExecuteSetAction(OvsForwardingContext *ovsFwdCtx,
status = SUCCEEDED(convertStatus) ? NDIS_STATUS_SUCCESS : NDIS_STATUS_FAILURE;
ASSERT(status == NDIS_STATUS_SUCCESS);
RtlCopyMemory(&ovsFwdCtx->tunKey, &tunKey, sizeof ovsFwdCtx->tunKey);
+ RtlCopyMemory(&key->tunKey, &tunKey, sizeof key->tunKey);
break;
}
case OVS_KEY_ATTR_UDP:
- status = OvsUpdateUdpPorts(ovsFwdCtx,
+ status = OvsUpdateUdpPorts(ovsFwdCtx, key,
NlAttrGetUnspec(a, sizeof(struct ovs_key_udp)));
break;
case OVS_KEY_ATTR_TCP:
- status = OvsUpdateTcpPorts(ovsFwdCtx,
+ status = OvsUpdateTcpPorts(ovsFwdCtx, key,
NlAttrGetUnspec(a, sizeof(struct ovs_key_tcp)));
break;
@@ -1815,10 +1832,12 @@ OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx,
{
NTSTATUS status = NDIS_STATUS_SUCCESS;
PNL_ATTR userdataAttr;
- PNL_ATTR queueAttr;
+ PNL_ATTR egrTunAttr = NULL;
POVS_PACKET_QUEUE_ELEM elem;
POVS_PACKET_HDR_INFO layers = &ovsFwdCtx->layers;
BOOLEAN isRecv = FALSE;
+ OVS_FWD_INFO fwdInfo;
+ OvsIPv4TunnelKey tunKey;
POVS_VPORT_ENTRY vport = OvsFindVportByPortNo(ovsFwdCtx->switchContext,
ovsFwdCtx->srcVportNo);
@@ -1830,13 +1849,29 @@ OvsOutputUserspaceAction(OvsForwardingContext *ovsFwdCtx,
}
}
- queueAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_PID);
userdataAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_USERDATA);
+ /* Indicate the packet is from egress-tunnel direction */
+ egrTunAttr = NlAttrFindNested(attr, OVS_USERSPACE_ATTR_EGRESS_TUN_PORT);
+
+ /* Fill tunnel key to export to usersspace to calculate the template id */
+ if (egrTunAttr) {
+ RtlZeroMemory(&tunKey, sizeof tunKey);
+ RtlCopyMemory(&tunKey, &ovsFwdCtx->tunKey, sizeof tunKey);
+ if (!tunKey.src) {
+ status = OvsLookupIPFwdInfo(tunKey.src, tunKey.dst, &fwdInfo);
+ if (status == NDIS_STATUS_SUCCESS && tunKey.dst == fwdInfo.dstIpAddr) {
+ tunKey.src = fwdInfo.srcIpAddr;
+ }
+ }
+ tunKey.flow_hash = tunKey.flow_hash ? tunKey.flow_hash : MAXINT16;
+ }
elem = OvsCreateQueueNlPacket(NlAttrData(userdataAttr),
NlAttrGetSize(userdataAttr),
OVS_PACKET_CMD_ACTION,
- vport, key, ovsFwdCtx->curNbl,
+ vport, key,
+ egrTunAttr ? &(tunKey) : NULL,
+ ovsFwdCtx->curNbl,
NET_BUFFER_LIST_FIRST_NB(ovsFwdCtx->curNbl),
isRecv,
layers);
diff --git a/datapath-windows/ovsext/Actions.h b/datapath-windows/ovsext/Actions.h
index fd050d5dd8c9db481ec4adc035341d0bbecfddae..bc12e1166df6cd98d575e424f57a2d2511c3f0a2 100644
--- a/datapath-windows/ovsext/Actions.h
+++ b/datapath-windows/ovsext/Actions.h
@@ -115,14 +115,17 @@ PUINT8 OvsGetHeaderBySize(OvsForwardingContext *ovsFwdCtx,
NDIS_STATUS
OvsUpdateUdpPorts(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_udp *udpAttr);
NDIS_STATUS
OvsUpdateTcpPorts(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_tcp *tcpAttr);
NDIS_STATUS
OvsUpdateIPv4Header(OvsForwardingContext *ovsFwdCtx,
+ OvsFlowKey *key,
const struct ovs_key_ipv4 *ipAttr);
NDIS_STATUS
diff --git a/datapath-windows/ovsext/Conntrack-other.c b/datapath-windows/ovsext/Conntrack-other.c
index 962cc8ac65523bee8683f1a76580afce1a6074e8..8580415a6b38720e940f1045b41efe4082575b5a 100644
--- a/datapath-windows/ovsext/Conntrack-other.c
+++ b/datapath-windows/ovsext/Conntrack-other.c
@@ -49,17 +49,19 @@ OvsConntrackUpdateOtherEntry(OVS_CT_ENTRY *conn_,
{
ASSERT(conn_);
struct conn_other *conn = OvsCastConntrackEntryToOtherEntry(conn_);
+ enum CT_UPDATE_RES ret = CT_UPDATE_VALID;
if (reply && conn->state != OTHERS_BIDIR) {
conn->state = OTHERS_BIDIR;
} else if (conn->state == OTHERS_FIRST) {
conn->state = OTHERS_MULTIPLE;
+ ret = CT_UPDATE_VALID_NEW;
}
OvsConntrackUpdateExpiration(&conn->up, now,
other_timeouts[conn->state]);
- return CT_UPDATE_VALID;
+ return ret;
}
OVS_CT_ENTRY *
diff --git a/datapath-windows/ovsext/Conntrack-related.c b/datapath-windows/ovsext/Conntrack-related.c
index 950be98e99eee0edf2a20b0424696ec25dc9e76d..a5bba5cf802de66141f1e95111e65b64b38f73aa 100644
--- a/datapath-windows/ovsext/Conntrack-related.c
+++ b/datapath-windows/ovsext/Conntrack-related.c
@@ -47,8 +47,11 @@ OvsCtRelatedKeyAreSame(OVS_CT_KEY incomingKey, OVS_CT_KEY entryKey)
}
/* FTP ACTIVE - Server initiates the connection */
+ /* Some ftp server, such as pyftpdlib, may use random (>1024) data port
+ * except 20. In this case, the incomingKey's src port is different with
+ * entryKey's src port.
+ */
if ((incomingKey.src.addr.ipv4 == entryKey.src.addr.ipv4) &&
- (incomingKey.src.port == entryKey.src.port) &&
(incomingKey.dst.addr.ipv4 == entryKey.dst.addr.ipv4) &&
(incomingKey.dst.port == entryKey.dst.port) &&
(incomingKey.dl_type == entryKey.dl_type) &&
diff --git a/datapath-windows/ovsext/Conntrack-tcp.c b/datapath-windows/ovsext/Conntrack-tcp.c
index eda42ac82310a59f694688583aea5369276668fb..a468c3e6bcf23ee54acf8e3b730e17d6ce1569ab 100644
--- a/datapath-windows/ovsext/Conntrack-tcp.c
+++ b/datapath-windows/ovsext/Conntrack-tcp.c
@@ -213,11 +213,17 @@ OvsConntrackUpdateTcpEntry(OVS_CT_ENTRY* conn_,
return CT_UPDATE_INVALID;
}
- if (((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN)
- && dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
+ if ((tcp_flags & (TCP_SYN|TCP_ACK)) == TCP_SYN) {
+ if (dst->state >= CT_DPIF_TCPS_FIN_WAIT_2
&& src->state >= CT_DPIF_TCPS_FIN_WAIT_2) {
- src->state = dst->state = CT_DPIF_TCPS_CLOSED;
- return CT_UPDATE_NEW;
+ src->state = dst->state = CT_DPIF_TCPS_CLOSED;
+ return CT_UPDATE_NEW;
+ } else if (src->state <= CT_DPIF_TCPS_SYN_SENT) {
+ src->state = CT_DPIF_TCPS_SYN_SENT;
+ OvsConntrackUpdateExpiration(&conn->up, now,
+ 30 * CT_INTERVAL_SEC);
+ return CT_UPDATE_VALID_NEW;
+ }
}
if (src->wscale & CT_WSCALE_FLAG
diff --git a/datapath-windows/ovsext/Conntrack.c b/datapath-windows/ovsext/Conntrack.c
index ba5611697a11246b17dc83e51aa82bb2dc6ce027..2610d626a0cb296194a555c29934332f863d5b75 100644
--- a/datapath-windows/ovsext/Conntrack.c
+++ b/datapath-windows/ovsext/Conntrack.c
@@ -246,7 +246,6 @@ OvsPostCtEventEntry(POVS_CT_ENTRY entry, UINT8 type)
{
OVS_CT_EVENT_ENTRY ctEventEntry = {0};
NdisMoveMemory(&ctEventEntry.entry, entry, sizeof(OVS_CT_ENTRY));
- ctEventEntry.entry.parent = NULL;
ctEventEntry.type = type;
OvsPostCtEvent(&ctEventEntry);
}
@@ -480,6 +479,9 @@ OvsCtEntryDelete(POVS_CT_ENTRY entry, BOOLEAN forceDelete)
RemoveEntryList(&entry->link);
OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
NdisFreeSpinLock(&(entry->lock));
+ if (entry->helper_name) {
+ OvsFreeMemoryWithTag(entry->helper_name, OVS_CT_POOL_TAG);
+ }
OvsFreeMemoryWithTag(entry, OVS_CT_POOL_TAG);
NdisInterlockedDecrement((PLONG)&ctTotalEntries);
return;
@@ -753,6 +755,9 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
return NULL;
}
break;
+ case CT_UPDATE_VALID_NEW:
+ state |= OVS_CS_F_NEW;
+ break;
}
}
if (entry) {
@@ -784,60 +789,82 @@ OvsProcessConntrackEntry(OvsForwardingContext *fwdCtx,
static __inline VOID
OvsConntrackSetMark(OvsFlowKey *key,
POVS_CT_ENTRY entry,
- UINT32 value,
- UINT32 mask,
+ MD_MARK *mark,
BOOLEAN *markChanged)
{
- UINT32 newMark;
- newMark = value | (entry->mark & ~(mask));
- if (entry->mark != newMark) {
+ POVS_CT_ENTRY parent = entry->parent;
+ BOOLEAN changed = FALSE;
+ UINT32 newMark = 0;
+
+ if (parent && parent->mark) {
+ newMark = parent->mark;
+ changed = TRUE;
+ } else if (mark) {
+ newMark = mark->value | (entry->mark & ~(mark->mask));
+ changed = TRUE;
+ }
+
+ if (changed && entry->mark != newMark) {
entry->mark = newMark;
key->ct.mark = newMark;
*markChanged = TRUE;
}
}
+static __inline BOOLEAN
+OvsConntrackIsLabelsNonZero(const struct ovs_key_ct_labels *labels)
+{
+ UINT8 i;
+
+ for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) {
+ if (labels->ct_labels_32[i]) {
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
static __inline void
OvsConntrackSetLabels(OvsFlowKey *key,
POVS_CT_ENTRY entry,
- struct ovs_key_ct_labels *val,
- struct ovs_key_ct_labels *mask,
+ MD_LABELS *labels,
BOOLEAN *labelChanged)
{
- ovs_u128 v, m, pktMdLabel = {0};
- memcpy(&v, val, sizeof v);
- memcpy(&m, mask, sizeof m);
- memcpy(&pktMdLabel, &entry->labels, sizeof(struct ovs_key_ct_labels));
+ POVS_CT_ENTRY parent = entry->parent;
+
+ /* Inherit master's labels at labels initialization, if any. */
+ if (!OvsConntrackIsLabelsNonZero(&entry->labels) &&
+ parent && OvsConntrackIsLabelsNonZero(&parent->labels)) {
+ RtlCopyMemory(&entry->labels, &parent->labels, OVS_CT_LABELS_LEN);
+ *labelChanged = TRUE;
+ }
- pktMdLabel.u64.lo = v.u64.lo | (pktMdLabel.u64.lo & ~(m.u64.lo));
- pktMdLabel.u64.hi = v.u64.hi | (pktMdLabel.u64.hi & ~(m.u64.hi));
+ /* Update labels according to value of ct_label in ct commit */
+ if (labels && OvsConntrackIsLabelsNonZero(&labels->mask)) {
+ UINT8 i;
+ UINT32 *dst = entry->labels.ct_labels_32;
+ for (i = 0; i < OVS_CT_LABELS_LEN_32; i++) {
+ dst[i] = (dst[i] & ~(labels->mask.ct_labels_32[i])) |
+ (labels->value.ct_labels_32[i] & labels->mask.ct_labels_32[i]);
+ }
- if (!NdisEqualMemory(&entry->labels, &pktMdLabel,
- sizeof(struct ovs_key_ct_labels))) {
*labelChanged = TRUE;
}
- NdisMoveMemory(&entry->labels, &pktMdLabel,
- sizeof(struct ovs_key_ct_labels));
- NdisMoveMemory(&key->ct.labels, &pktMdLabel,
- sizeof(struct ovs_key_ct_labels));
+
+ /* Update flow key's ct labels */
+ NdisMoveMemory(&key->ct.labels, &entry->labels, OVS_CT_LABELS_LEN);
}
static void
OvsCtSetMarkLabel(OvsFlowKey *key,
- POVS_CT_ENTRY entry,
- MD_MARK *mark,
- MD_LABELS *labels,
- BOOLEAN *triggerUpdateEvent)
+ POVS_CT_ENTRY entry,
+ MD_MARK *mark,
+ MD_LABELS *labels,
+ BOOLEAN *triggerUpdateEvent)
{
- if (mark) {
- OvsConntrackSetMark(key, entry, mark->value, mark->mask,
- triggerUpdateEvent);
- }
-
- if (labels) {
- OvsConntrackSetLabels(key, entry, &labels->value, &labels->mask,
- triggerUpdateEvent);
- }
+ OvsConntrackSetMark(key, entry, mark, triggerUpdateEvent);
+ OvsConntrackSetLabels(key, entry, labels, triggerUpdateEvent);
}
/*
@@ -880,6 +907,7 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
BOOLEAN triggerUpdateEvent = FALSE;
BOOLEAN entryCreated = FALSE;
POVS_CT_ENTRY entry = NULL;
+ POVS_CT_ENTRY parent = NULL;
PNET_BUFFER_LIST curNbl = fwdCtx->curNbl;
OvsConntrackKeyLookupCtx ctx = { 0 };
LOCK_STATE_EX lockStateTable;
@@ -956,8 +984,6 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
if (OvsDetectFtpPacket(key)) {
/* FTP parser will always be loaded */
- UNREFERENCED_PARAMETER(helper);
-
status = OvsCtHandleFtp(curNbl, key, layers, currentTime, entry,
(ntohs(key->ipKey.l4.tpDst) == IPPORT_FTP));
if (status != NDIS_STATUS_SUCCESS) {
@@ -965,10 +991,25 @@ OvsCtExecute_(OvsForwardingContext *fwdCtx,
}
}
+ parent = entry->parent;
+ /* The entry should have the same helper name with parent's */
+ if (!entry->helper_name &&
+ (helper || (parent && parent->helper_name))) {
+
+ helper = helper ? helper : parent->helper_name;
+ entry->helper_name = OvsAllocateMemoryWithTag(strlen(helper) + 1,
+ OVS_CT_POOL_TAG);
+ if (!entry->helper_name) {
+ OVS_LOG_ERROR("Error while allocating memory");
+ OVS_RELEASE_SPIN_LOCK(&(entry->lock), irql);
+ return NDIS_STATUS_RESOURCES;
+ }
+ memcpy(entry->helper_name, helper, strlen(helper) + 1);
+ }
+
/* Add original tuple information to flow Key */
if (entry->key.dl_type == ntohs(ETH_TYPE_IPV4)) {
- if (entry->parent != NULL) {
- POVS_CT_ENTRY parent = entry->parent;
+ if (parent != NULL) {
OVS_ACQUIRE_SPIN_LOCK(&(parent->lock), irql);
OvsCtUpdateTuple(key, &parent->key);
OVS_RELEASE_SPIN_LOCK(&(parent->lock), irql);
@@ -1039,8 +1080,8 @@ OvsExecuteConntrackAction(OvsForwardingContext *fwdCtx,
if (helper == NULL) {
return NDIS_STATUS_INVALID_PARAMETER;
}
- if (strcmp("ftp", helper) != 0) {
- /* Only support FTP */
+ if (strcmp("ftp", helper) != 0 && strcmp("tftp", helper) != 0) {
+ /* Only support FTP/TFTP */
return NDIS_STATUS_NOT_SUPPORTED;
}
break;
@@ -1680,6 +1721,26 @@ OvsCreateNlMsgFromCtEntry(POVS_CT_ENTRY entry,
}
}
+ if (entry->helper_name) {
+ UINT32 offset;
+ offset = NlMsgStartNested(&nlBuf, CTA_HELP);
+ if (!offset) {
+ return NDIS_STATUS_FAILURE;
+ }
+ if (!NlMsgPutTailString(&nlBuf, CTA_HELP_NAME, entry->helper_name)) {
+ return STATUS_INVALID_BUFFER_SIZE;
+ }
+ NlMsgEndNested(&nlBuf, offset);
+ }
+
+ if (entry->parent) {
+ status = MapCtKeyTupleToNl(&nlBuf, CTA_TUPLE_MASTER,
+ &((POVS_CT_ENTRY)entry->parent)->key);
+ if (status != NDIS_STATUS_SUCCESS) {
+ return STATUS_UNSUCCESSFUL;
+ }
+ }
+
/* CTA_STATUS is required but not implemented. Default to 0 */
if (!NlMsgPutTailU32(&nlBuf, CTA_STATUS, 0)) {
return STATUS_INVALID_BUFFER_SIZE;
diff --git a/datapath-windows/ovsext/Conntrack.h b/datapath-windows/ovsext/Conntrack.h
index bc6580d7083cd7a15e9334ea25c9daa4418b50e6..bbbf49c115ef663de04b84de73c64484abdd35cd 100644
--- a/datapath-windows/ovsext/Conntrack.h
+++ b/datapath-windows/ovsext/Conntrack.h
@@ -56,6 +56,7 @@ typedef enum CT_UPDATE_RES {
CT_UPDATE_INVALID,
CT_UPDATE_VALID,
CT_UPDATE_NEW,
+ CT_UPDATE_VALID_NEW,
} CT_UPDATE_RES;
/* Metadata mark for masked write to conntrack mark */
@@ -108,6 +109,7 @@ typedef struct OVS_CT_ENTRY {
struct ovs_key_ct_labels labels;
NAT_ACTION_INFO natInfo;
PVOID parent; /* Points to main connection */
+ PCHAR helper_name;
} OVS_CT_ENTRY, *POVS_CT_ENTRY;
typedef struct OVS_CT_REL_ENTRY {
diff --git a/datapath-windows/ovsext/Flow.c b/datapath-windows/ovsext/Flow.c
index fdb10105157c1928afe079aa8ef878186acb3221..ac0582c18f2f4f9bdf8ee442df4fb3891d9ee0bd 100644
--- a/datapath-windows/ovsext/Flow.c
+++ b/datapath-windows/ovsext/Flow.c
@@ -1094,6 +1094,18 @@ MapFlowTunKeyToNlKey(PNL_BUFFER nlBuf,
goto done;
}
+ if (!NlMsgPutTailU16(nlBuf, OVS_TUNNEL_KEY_ATTR_TP_SRC,
+ tunKey->flow_hash)) {
+ rc = STATUS_UNSUCCESSFUL;
+ goto done;
+ }
+
+ if (!NlMsgPutTailU16(nlBuf, OVS_TUNNEL_KEY_ATTR_TP_DST,
+ tunKey->dst_port)) {
+ rc = STATUS_UNSUCCESSFUL;
+ goto done;
+ }
+
done:
NlMsgEndNested(nlBuf, offset);
error_nested_start:
diff --git a/datapath-windows/ovsext/Tunnel.c b/datapath-windows/ovsext/Tunnel.c
index ad2c254f56dcd6f3fa268d069e9fb2c19eff4557..5d1be80f4c57c36a053f5d97be6ea49c1ef6283f 100644
--- a/datapath-windows/ovsext/Tunnel.c
+++ b/datapath-windows/ovsext/Tunnel.c
@@ -308,7 +308,7 @@ OvsInjectPacketThroughActions(PNET_BUFFER_LIST pNbl,
datapath->misses++;
elem = OvsCreateQueueNlPacket(NULL, 0, OVS_PACKET_CMD_MISS,
- vport, &key, pNbl, curNb,
+ vport, &key, NULL, pNbl, curNb,
TRUE, &layers);
if (elem) {
/* Complete the packet since it was copied to user buffer. */
diff --git a/datapath-windows/ovsext/User.c b/datapath-windows/ovsext/User.c
index ed1fcbea8a185c6585b41d98913695e08cd43ee1..ee0e38d99ca3e3e6b00ba1e341fad08e0378a0eb 100644
--- a/datapath-windows/ovsext/User.c
+++ b/datapath-windows/ovsext/User.c
@@ -830,7 +830,7 @@ OvsCreateAndAddPackets(PVOID userData,
nb = NET_BUFFER_LIST_FIRST_NB(nbl);
while (nb) {
elem = OvsCreateQueueNlPacket(userData, userDataLen,
- cmd, vport, key, nbl, nb,
+ cmd, vport, key, NULL, nbl, nb,
isRecv, hdrInfo);
if (elem) {
InsertTailList(list, &elem->link);
@@ -1013,6 +1013,7 @@ OvsCreateQueueNlPacket(PVOID userData,
UINT32 cmd,
POVS_VPORT_ENTRY vport,
OvsFlowKey *key,
+ OvsIPv4TunnelKey *tunnelKey,
PNET_BUFFER_LIST nbl,
PNET_BUFFER nb,
BOOLEAN isRecv,
@@ -1025,7 +1026,6 @@ OvsCreateQueueNlPacket(PVOID userData,
NDIS_TCP_IP_CHECKSUM_NET_BUFFER_LIST_INFO csumInfo;
PNDIS_NET_BUFFER_LIST_8021Q_INFO vlanInfo = NULL;
PVOID vlanTag;
- OvsIPv4TunnelKey *tunnelKey = (OvsIPv4TunnelKey *)&key->tunKey;
UINT32 pid;
UINT32 nlMsgSize;
NL_BUFFER nlBuf;
@@ -1127,7 +1127,13 @@ OvsCreateQueueNlPacket(PVOID userData,
}
}
- /* XXX must send OVS_PACKET_ATTR_EGRESS_TUN_KEY if set by vswtchd */
+ /* Set OVS_PACKET_ATTR_EGRESS_TUN_KEY attribute */
+ if (tunnelKey) {
+ if (MapFlowTunKeyToNlKey(&nlBuf, tunnelKey,
+ OVS_PACKET_ATTR_EGRESS_TUN_KEY) != STATUS_SUCCESS) {
+ goto fail;
+ }
+ }
if (userData){
if (!NlMsgPutTailUnspec(&nlBuf, OVS_PACKET_ATTR_USERDATA,
userData, (UINT16)userDataLen)) {
diff --git a/datapath-windows/ovsext/User.h b/datapath-windows/ovsext/User.h
index 3a42888945278a357784bdb76cd00def6fc48c1a..ccca0ba5f9ae35e4889c3bf179c3f890a3ce4ca4 100644
--- a/datapath-windows/ovsext/User.h
+++ b/datapath-windows/ovsext/User.h
@@ -75,6 +75,7 @@ POVS_PACKET_QUEUE_ELEM OvsCreateQueueNlPacket(PVOID userData,
UINT32 cmd,
POVS_VPORT_ENTRY vport,
OvsFlowKey *key,
+ OvsIPv4TunnelKey *tunnelKey,
PNET_BUFFER_LIST nbl,
PNET_BUFFER nb,
BOOLEAN isRecv,
diff --git a/datapath-windows/ovsext/Vxlan.c b/datapath-windows/ovsext/Vxlan.c
index 09809d3979bd45a5808539db88ef312934720272..04df9f6c9fa39f9310430ed5d54cdab62c43ddfa 100644
--- a/datapath-windows/ovsext/Vxlan.c
+++ b/datapath-windows/ovsext/Vxlan.c
@@ -19,7 +19,6 @@
#include "Atomic.h"
#include "Debug.h"
#include "Flow.h"
-#include "Flow.h"
#include "IpHelper.h"
#include "NetProto.h"
#include "Offload.h"
diff --git a/datapath/Makefile.am b/datapath/Makefile.am
index f2a85bc3bc3c50f22a9c2a949af387f122cbea59..e4dd0c7044d4653b3f83b525953178225f148622 100644
--- a/datapath/Makefile.am
+++ b/datapath/Makefile.am
@@ -42,7 +42,7 @@ COMPAT_EXPORTS := $(shell $(COMPAT_GET_EXPORTS))
# Checks that all EXPORT_SYMBOL_GPL() export 'rpl_' or 'ovs_' prefixed functions.
check-export-symbol:
@for fun_ in $(COMPAT_FUNCTIONS); do \
- if ! grep -- $${fun_} $(top_srcdir)/datapath/linux/compat/build-aux/export-check-whitelist > /dev/null; then \
+ if ! grep -- $${fun_} $(top_srcdir)/datapath/linux/compat/build-aux/export-check-allow-list > /dev/null; then \
if ! echo $${fun_} | grep -q -E '^(rpl|ovs)_'; then \
echo "error: $${fun_}() needs to be prefixed with 'rpl_' or 'ovs_'."; \
exit 1; \
diff --git a/datapath/conntrack.c b/datapath/conntrack.c
index 838cf63c908ff49ccc651344b4e93a87459efce0..50b4d7bd6aa0aa1c630c6438495068c56c5fb0b6 100644
--- a/datapath/conntrack.c
+++ b/datapath/conntrack.c
@@ -844,6 +844,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
}
}
/* Non-ICMP, fall thru to initialize if needed. */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -977,6 +978,17 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key,
}
err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype);
+ if (err == NF_ACCEPT &&
+ ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) {
+ if (maniptype == NF_NAT_MANIP_SRC)
+ maniptype = NF_NAT_MANIP_DST;
+ else
+ maniptype = NF_NAT_MANIP_SRC;
+
+ err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range,
+ maniptype);
+ }
+
/* Mark NAT done if successful and update the flow key. */
if (err == NF_ACCEPT)
ovs_nat_update_key(key, skb, maniptype);
@@ -1972,7 +1984,8 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node)
+ hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
+ lockdep_ovsl_is_held())
kfree_rcu(ct_limit, rcu);
}
kfree(ovs_net->ct_limit_info->limits);
@@ -2312,7 +2325,9 @@ static struct genl_ops ct_limit_genl_ops[] = {
#endif
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
* privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = ct_limit_policy,
+#endif
.doit = ovs_ct_limit_cmd_set,
},
{ .cmd = OVS_CT_LIMIT_CMD_DEL,
@@ -2321,7 +2336,9 @@ static struct genl_ops ct_limit_genl_ops[] = {
#endif
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
* privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = ct_limit_policy,
+#endif
.doit = ovs_ct_limit_cmd_del,
},
{ .cmd = OVS_CT_LIMIT_CMD_GET,
@@ -2329,7 +2346,9 @@ static struct genl_ops ct_limit_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = 0, /* OK for unprivileged users. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = ct_limit_policy,
+#endif
.doit = ovs_ct_limit_cmd_get,
},
};
@@ -2343,6 +2362,9 @@ struct genl_family dp_ct_limit_genl_family __ro_after_init = {
.name = OVS_CT_LIMIT_FAMILY,
.version = OVS_CT_LIMIT_VERSION,
.maxattr = OVS_CT_LIMIT_ATTR_MAX,
+#ifndef HAVE_GENL_OPS_POLICY
+ .policy = ct_limit_policy,
+#endif
.netnsok = true,
.parallel_ops = true,
.ops = ct_limit_genl_ops,
diff --git a/datapath/datapath.c b/datapath/datapath.c
index 853bfb5af13b0b15566f17f3a0d25d2b29f0f3ff..b88d16107cbfc74ec8f931285278f9fcbcd9028c 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -240,6 +240,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
struct dp_stats_percpu *stats;
u64 *stats_counter;
u32 n_mask_hit;
+ int error;
stats = this_cpu_ptr(dp->stats_percpu);
@@ -248,7 +249,6 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
&n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
- int error;
memset(&upcall, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
@@ -265,7 +265,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
ovs_flow_stats_update(flow, key->tp.flags, skb);
sf_acts = rcu_dereference(flow->sf_acts);
- ovs_execute_actions(dp, skb, sf_acts, key);
+ error = ovs_execute_actions(dp, skb, sf_acts, key);
+ if (unlikely(error))
+ net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
+ ovs_dp_name(dp), error);
stats_counter = &stats->n_hit;
@@ -340,8 +343,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
}
#endif
/* Queue all of the segments. */
- skb = segs;
- do {
+ skb_list_walk_safe(segs, skb, nskb) {
*OVS_CB(skb) = ovs_cb;
#ifdef HAVE_SKB_GSO_UDP
if (gso_type & SKB_GSO_UDP && skb != segs)
@@ -351,17 +353,15 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
if (err)
break;
- } while ((skb = skb->next));
+ }
/* Free all of the segments. */
- skb = segs;
- do {
- nskb = skb->next;
+ skb_list_walk_safe(segs, skb, nskb) {
if (err)
kfree_skb(skb);
else
consume_skb(skb);
- } while ((skb = nskb));
+ }
return err;
}
@@ -371,7 +371,8 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
+ nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
- + nla_total_size(sizeof(unsigned int)); /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
+ + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
/* OVS_PACKET_ATTR_USERDATA */
if (upcall_info->userdata)
@@ -414,6 +415,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
size_t len;
unsigned int hlen;
int err, dp_ifindex;
+ u64 hash;
dp_ifindex = get_dpifindex(dp);
if (!dp_ifindex)
@@ -467,7 +469,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
upcall->dp_ifindex = dp_ifindex;
err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
- BUG_ON(err);
+ if (err)
+ goto out;
if (upcall_info->userdata)
__nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
@@ -484,7 +487,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
}
err = ovs_nla_put_tunnel_info(user_skb,
upcall_info->egress_tun_info);
- BUG_ON(err);
+ if (err)
+ goto out;
+
nla_nest_end(user_skb, nla);
}
@@ -504,23 +509,32 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
}
/* Add OVS_PACKET_ATTR_MRU */
- if (upcall_info->mru) {
- if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU,
- upcall_info->mru)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (upcall_info->mru &&
+ nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Add OVS_PACKET_ATTR_LEN when packet is truncated */
- if (cutlen > 0) {
- if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN,
- skb->len)) {
- err = -ENOBUFS;
- goto out;
- }
- pad_packet(dp, user_skb);
+ if (cutlen > 0 &&
+ nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
+ err = -ENOBUFS;
+ goto out;
+ }
+
+ /* Add OVS_PACKET_ATTR_HASH */
+ hash = skb_get_hash_raw(skb);
+#ifdef HAVE_SW_HASH
+ if (skb->sw_hash)
+ hash |= OVS_PACKET_HASH_SW_BIT;
+#endif
+
+ if (skb->l4_hash)
+ hash |= OVS_PACKET_HASH_L4_BIT;
+
+ if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
+ err = -ENOBUFS;
+ goto out;
}
/* Only reserve room for attribute header, packet data is added
@@ -563,6 +577,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *input_vport;
u16 mru = 0;
+ u64 hash;
int len;
int err;
bool log = !a[OVS_PACKET_ATTR_PROBE];
@@ -588,6 +603,14 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
}
OVS_CB(packet)->mru = mru;
+ if (a[OVS_PACKET_ATTR_HASH]) {
+ hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
+
+ __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
+ !!(hash & OVS_PACKET_HASH_SW_BIT),
+ !!(hash & OVS_PACKET_HASH_L4_BIT));
+ }
+
/* Build an sw_flow for sending this packet. */
flow = ovs_flow_alloc();
err = PTR_ERR(flow);
@@ -649,6 +672,7 @@ static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
[OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
[OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
+ [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
};
static struct genl_ops dp_packet_genl_ops[] = {
@@ -657,7 +681,9 @@ static struct genl_ops dp_packet_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = packet_policy,
+#endif
.doit = ovs_packet_cmd_execute
}
};
@@ -667,6 +693,9 @@ static struct genl_family dp_packet_genl_family __ro_after_init = {
.name = OVS_PACKET_FAMILY,
.version = OVS_PACKET_VERSION,
.maxattr = OVS_PACKET_ATTR_MAX,
+#ifndef HAVE_GENL_OPS_POLICY
+ .policy = packet_policy,
+#endif
.netnsok = true,
.parallel_ops = true,
.ops = dp_packet_genl_ops,
@@ -727,9 +756,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
{
size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
- /* OVS_FLOW_ATTR_UFID */
+ /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
+ * see ovs_nla_put_identifier()
+ */
if (sfid && ovs_identifier_is_ufid(sfid))
len += nla_total_size(sfid->ufid_len);
+ else
+ len += nla_total_size(ovs_key_attr_size());
/* OVS_FLOW_ATTR_KEY */
if (!sfid || should_fill_key(sfid, ufid_flags))
@@ -906,7 +939,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
info->snd_portid, info->snd_seq, 0,
cmd, ufid_flags);
- BUG_ON(retval < 0);
+ if (WARN_ON_ONCE(retval < 0)) {
+ kfree_skb(skb);
+ skb = ERR_PTR(retval);
+ }
return skb;
}
@@ -1363,7 +1399,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
&flow->id, info, false, ufid_flags);
if (likely(reply)) {
- if (likely(!IS_ERR(reply))) {
+ if (!IS_ERR(reply)) {
rcu_read_lock(); /*To keep RCU checker happy. */
err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
reply, info->snd_portid,
@@ -1371,7 +1407,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
OVS_FLOW_CMD_DEL,
ufid_flags);
rcu_read_unlock();
- BUG_ON(err < 0);
+ if (WARN_ON_ONCE(err < 0)) {
+ kfree_skb(reply);
+ goto out_free;
+ }
ovs_notify(&dp_flow_genl_family, &ovs_dp_flow_multicast_group, reply, info);
} else {
genl_set_err(&dp_flow_genl_family, sock_net(skb->sk), 0,
@@ -1380,6 +1419,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}
}
+out_free:
ovs_flow_free(flow, true);
return 0;
unlock:
@@ -1396,8 +1436,8 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
u32 ufid_flags;
int err;
- err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
- OVS_FLOW_ATTR_MAX, flow_policy, NULL);
+ err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
+ OVS_FLOW_ATTR_MAX, flow_policy, NULL);
if (err)
return err;
ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -1449,7 +1489,9 @@ static const struct genl_ops dp_flow_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = flow_policy,
+#endif
.doit = ovs_flow_cmd_new
},
{ .cmd = OVS_FLOW_CMD_DEL,
@@ -1457,7 +1499,9 @@ static const struct genl_ops dp_flow_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = flow_policy,
+#endif
.doit = ovs_flow_cmd_del
},
{ .cmd = OVS_FLOW_CMD_GET,
@@ -1465,7 +1509,9 @@ static const struct genl_ops dp_flow_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = 0, /* OK for unprivileged users. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = flow_policy,
+#endif
.doit = ovs_flow_cmd_get,
.dumpit = ovs_flow_cmd_dump
},
@@ -1474,7 +1520,9 @@ static const struct genl_ops dp_flow_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = flow_policy,
+#endif
.doit = ovs_flow_cmd_set,
},
};
@@ -1484,6 +1532,9 @@ static struct genl_family dp_flow_genl_family __ro_after_init = {
.name = OVS_FLOW_FAMILY,
.version = OVS_FLOW_VERSION,
.maxattr = OVS_FLOW_ATTR_MAX,
+#ifndef HAVE_GENL_OPS_POLICY
+ .policy = flow_policy,
+#endif
.netnsok = true,
.parallel_ops = true,
.ops = dp_flow_genl_ops,
@@ -1582,10 +1633,59 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
dp->user_features = 0;
}
-static void ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
+static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
+{
+ u32 user_features = 0;
+
+ if (a[OVS_DP_ATTR_USER_FEATURES]) {
+ user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+
+ if (user_features & ~(OVS_DP_F_VPORT_PIDS |
+ OVS_DP_F_UNALIGNED |
+ OVS_DP_F_TC_RECIRC_SHARING))
+ return -EOPNOTSUPP;
+
+#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ return -EOPNOTSUPP;
+#endif
+ }
+
+ dp->user_features = user_features;
+
+ if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+ static_branch_enable(&tc_recirc_sharing_support);
+ else
+ static_branch_disable(&tc_recirc_sharing_support);
+
+ return 0;
+}
+
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+ dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+ if (!dp->stats_percpu)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
{
- if (a[OVS_DP_ATTR_USER_FEATURES])
- dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
+ int i;
+
+ dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+ if (!dp->ports)
+ return -ENOMEM;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ INIT_HLIST_HEAD(&dp->ports[i]);
+
+ return 0;
}
static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -1596,7 +1696,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
- int err, i;
+ int err;
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1609,35 +1709,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
- goto err_free_reply;
+ goto err_destroy_reply;
ovs_dp_set_net(dp, sock_net(skb->sk));
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
- goto err_free_dp;
+ goto err_destroy_dp;
- dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
- if (!dp->stats_percpu) {
- err = -ENOMEM;
+ err = ovs_dp_stats_init(dp);
+ if (err)
goto err_destroy_table;
- }
-
- dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
- if (!dp->ports) {
- err = -ENOMEM;
- goto err_destroy_percpu;
- }
- for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
- INIT_HLIST_HEAD(&dp->ports[i]);
+ err = ovs_dp_vport_init(dp);
+ if (err)
+ goto err_destroy_stats;
err = ovs_meters_init(dp);
if (err)
- goto err_destroy_ports_array;
+ goto err_destroy_ports;
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1647,7 +1738,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
- ovs_dp_change(dp, a);
+ err = ovs_dp_change(dp, a);
+ if (err)
+ goto err_destroy_meters;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -1667,6 +1760,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
+ ovs_unlock();
goto err_destroy_meters;
}
@@ -1683,17 +1777,16 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
return 0;
err_destroy_meters:
- ovs_unlock();
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
err:
return err;
@@ -1772,7 +1865,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (IS_ERR(dp))
goto err_unlock_free;
- ovs_dp_change(dp, info->attrs);
+ err = ovs_dp_change(dp, info->attrs);
+ if (err)
+ goto err_unlock_free;
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
info->snd_seq, 0, OVS_DP_CMD_GET);
@@ -1853,7 +1948,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = datapath_policy,
+#endif
.doit = ovs_dp_cmd_new
},
{ .cmd = OVS_DP_CMD_DEL,
@@ -1861,7 +1958,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = datapath_policy,
+#endif
.doit = ovs_dp_cmd_del
},
{ .cmd = OVS_DP_CMD_GET,
@@ -1869,7 +1968,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = 0, /* OK for unprivileged users. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = datapath_policy,
+#endif
.doit = ovs_dp_cmd_get,
.dumpit = ovs_dp_cmd_dump
},
@@ -1878,7 +1979,9 @@ static const struct genl_ops dp_datapath_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = datapath_policy,
+#endif
.doit = ovs_dp_cmd_set,
},
};
@@ -1888,6 +1991,9 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
.name = OVS_DATAPATH_FAMILY,
.version = OVS_DATAPATH_VERSION,
.maxattr = OVS_DP_ATTR_MAX,
+#ifndef HAVE_GENL_OPS_POLICY
+ .policy = datapath_policy,
+#endif
.netnsok = true,
.parallel_ops = true,
.ops = dp_datapath_genl_ops,
@@ -1900,7 +2006,7 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = {
/* Called with ovs_mutex or RCU read lock. */
static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
struct net *net, u32 portid, u32 seq,
- u32 flags, u8 cmd)
+ u32 flags, u8 cmd, gfp_t gfp)
{
struct ovs_header *ovs_header;
struct ovs_vport_stats vport_stats;
@@ -1922,7 +2028,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
#ifdef HAVE_PEERNET2ID_ALLOC
if (!net_eq(net, dev_net(vport->dev))) {
- int id = peernet2id_alloc(net, dev_net(vport->dev));
+ int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
goto nla_put_failure;
@@ -1964,11 +2070,12 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
struct sk_buff *skb;
int retval;
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!skb)
return ERR_PTR(-ENOMEM);
- retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd);
+ retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
+ GFP_KERNEL);
BUG_ON(retval < 0);
return skb;
@@ -2011,10 +2118,9 @@ static struct vport *lookup_vport(struct net *net,
}
-/* Called with ovs_mutex */
-static void update_headroom(struct datapath *dp)
+static unsigned int ovs_get_max_headroom(struct datapath *dp)
{
- unsigned dev_headroom, max_headroom = 0;
+ unsigned int dev_headroom, max_headroom = 0;
struct net_device *dev;
struct vport *vport;
int i;
@@ -2028,10 +2134,19 @@ static void update_headroom(struct datapath *dp)
}
}
- dp->max_headroom = max_headroom;
+ return max_headroom;
+}
+
+/* Called with ovs_mutex */
+static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
+{
+ struct vport *vport;
+ int i;
+
+ dp->max_headroom = new_headroom;
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
- netdev_set_rx_headroom(vport->dev, max_headroom);
+ netdev_set_rx_headroom(vport->dev, new_headroom);
}
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
@@ -2042,6 +2157,7 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *reply;
struct vport *vport;
struct datapath *dp;
+ unsigned int new_headroom;
u32 port_no;
int err;
@@ -2101,11 +2217,13 @@ restart:
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_NEW);
+ OVS_VPORT_CMD_NEW, GFP_KERNEL);
BUG_ON(err < 0);
- if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
- update_headroom(dp);
+ new_headroom = netdev_get_fwd_headroom(vport->dev);
+
+ if (new_headroom > dp->max_headroom)
+ ovs_update_headroom(dp, new_headroom);
else
netdev_set_rx_headroom(vport->dev, dp->max_headroom);
@@ -2159,7 +2277,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_SET);
+ OVS_VPORT_CMD_SET, GFP_KERNEL);
BUG_ON(err < 0);
ovs_unlock();
@@ -2174,11 +2292,12 @@ exit_unlock_free:
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
- bool must_update_headroom = false;
+ bool update_headroom = false;
struct nlattr **a = info->attrs;
struct sk_buff *reply;
struct datapath *dp;
struct vport *vport;
+ unsigned int new_headroom;
int err;
reply = ovs_vport_cmd_alloc_info();
@@ -2198,19 +2317,23 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_DEL);
+ OVS_VPORT_CMD_DEL, GFP_KERNEL);
BUG_ON(err < 0);
/* the vport deletion may trigger dp headroom update */
dp = vport->dp;
if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
- must_update_headroom = true;
+ update_headroom = true;
+
netdev_reset_rx_headroom(vport->dev);
ovs_dp_detach_port(vport);
- if (must_update_headroom)
- update_headroom(dp);
+ if (update_headroom) {
+ new_headroom = ovs_get_max_headroom(dp);
+ if (new_headroom < dp->max_headroom)
+ ovs_update_headroom(dp, new_headroom);
+ }
ovs_unlock();
ovs_notify(&dp_vport_genl_family, &ovs_dp_vport_multicast_group, reply, info);
@@ -2241,7 +2364,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
goto exit_unlock_free;
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
info->snd_portid, info->snd_seq, 0,
- OVS_VPORT_CMD_GET);
+ OVS_VPORT_CMD_GET, GFP_ATOMIC);
BUG_ON(err < 0);
rcu_read_unlock();
@@ -2277,7 +2400,8 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- OVS_VPORT_CMD_GET) < 0)
+ OVS_VPORT_CMD_GET,
+ GFP_ATOMIC) < 0)
goto out;
j++;
@@ -2310,7 +2434,9 @@ static const struct genl_ops dp_vport_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = vport_policy,
+#endif
.doit = ovs_vport_cmd_new
},
{ .cmd = OVS_VPORT_CMD_DEL,
@@ -2318,7 +2444,9 @@ static const struct genl_ops dp_vport_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = vport_policy,
+#endif
.doit = ovs_vport_cmd_del
},
{ .cmd = OVS_VPORT_CMD_GET,
@@ -2326,7 +2454,9 @@ static const struct genl_ops dp_vport_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = 0, /* OK for unprivileged users. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = vport_policy,
+#endif
.doit = ovs_vport_cmd_get,
.dumpit = ovs_vport_cmd_dump
},
@@ -2335,7 +2465,9 @@ static const struct genl_ops dp_vport_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = vport_policy,
+#endif
.doit = ovs_vport_cmd_set,
},
};
@@ -2345,6 +2477,9 @@ struct genl_family dp_vport_genl_family __ro_after_init = {
.name = OVS_VPORT_FAMILY,
.version = OVS_VPORT_VERSION,
.maxattr = OVS_VPORT_ATTR_MAX,
+#ifndef HAVE_GENL_OPS_POLICY
+ .policy = vport_policy,
+#endif
.netnsok = true,
.parallel_ops = true,
.ops = dp_vport_genl_ops,
@@ -2437,8 +2572,10 @@ static void __net_exit ovs_exit_net(struct net *dnet)
ovs_netns_frags6_exit(dnet);
ovs_netns_frags_exit(dnet);
- ovs_ct_exit(dnet);
ovs_lock();
+
+ ovs_ct_exit(dnet);
+
list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
__dp_destroy(dp);
@@ -2477,7 +2614,7 @@ static int __init dp_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath %s\n", VERSION);
diff --git a/datapath/datapath.h b/datapath/datapath.h
index 3bffa1dcb77d0aad00aa8d6840891ba3b456f280..c377e9b240c52e8393af05ff01170215bd1d978a 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -159,6 +159,18 @@ struct ovs_net {
#endif
};
+/**
+ * enum ovs_pkt_hash_types - hash info to include with a packet
+ * to send to userspace.
+ * @OVS_PACKET_HASH_SW_BIT: indicates hash was computed in software stack.
+ * @OVS_PACKET_HASH_L4_BIT: indicates hash is a canonical 4-tuple hash
+ * over transport ports.
+ */
+enum ovs_pkt_hash_types {
+ OVS_PACKET_HASH_SW_BIT = (1ULL << 32),
+ OVS_PACKET_HASH_L4_BIT = (1ULL << 33),
+};
+
extern unsigned int ovs_net_id;
void ovs_lock(void);
void ovs_unlock(void);
@@ -239,6 +251,8 @@ extern struct notifier_block ovs_dp_device_notifier;
extern struct genl_family dp_vport_genl_family;
extern const struct genl_multicast_group ovs_dp_vport_multicast_group;
+DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
+
void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_dp_detach_port(struct vport *);
int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/datapath/flow.c b/datapath/flow.c
index 6dc7402d50902a0c310c870c39620d2414a295b8..5a00c238ca9f7478dfab3be1726e9740b29b4455 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -874,6 +874,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *tc_ext;
+#endif
int res, err;
/* Extract metadata from packet. */
@@ -904,7 +907,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
if (res < 0)
return res;
key->mac_proto = res;
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (static_branch_unlikely(&tc_recirc_sharing_support)) {
+ tc_ext = skb_ext_find(skb, TC_SKB_EXT);
+ key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ } else {
+ key->recirc_id = 0;
+ }
+#else
key->recirc_id = 0;
+#endif
err = key_extract(skb, key);
if (!err)
diff --git a/datapath/flow.h b/datapath/flow.h
index 4ad5363e3450088b634b4ea0f0819cc86030e86b..584d9f565acd69c99ef9d912f9c5cb170c439dd1 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -50,7 +50,7 @@ enum sw_flow_mac_proto {
* matching for small options.
*/
#define TUN_METADATA_OFFSET(opt_len) \
- (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+ (sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
@@ -65,7 +65,7 @@ struct vlan_head {
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
- FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+ sizeof_field(struct sw_flow_key, recirc_id))
struct ovs_key_nsh {
struct ovs_nsh_key_base base;
diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c
index 9fc1a19221d0e0a03f1dcce49ace62dd2f97139c..996041602cd0bb075684734b3df2282a27e9f4a7 100644
--- a/datapath/flow_netlink.c
+++ b/datapath/flow_netlink.c
@@ -2700,10 +2700,6 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
switch (key_type) {
- const struct ovs_key_ipv4 *ipv4_key;
- const struct ovs_key_ipv6 *ipv6_key;
- int err;
-
case OVS_KEY_ATTR_PRIORITY:
case OVS_KEY_ATTR_SKB_MARK:
case OVS_KEY_ATTR_CT_MARK:
@@ -2715,7 +2711,9 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
- case OVS_KEY_ATTR_TUNNEL:
+ case OVS_KEY_ATTR_TUNNEL: {
+ int err;
+
#ifndef USE_UPSTREAM_TUNNEL
if (eth_p_mpls(eth_type))
return -EINVAL;
@@ -2728,8 +2726,10 @@ static int validate_set(const struct nlattr *a,
if (err)
return err;
break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ const struct ovs_key_ipv4 *ipv4_key;
- case OVS_KEY_ATTR_IPV4:
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
@@ -2749,8 +2749,10 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
}
break;
+ }
+ case OVS_KEY_ATTR_IPV6: {
+ const struct ovs_key_ipv6 *ipv6_key;
- case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
@@ -2777,7 +2779,7 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
break;
-
+ }
case OVS_KEY_ATTR_TCP:
if ((eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6)) ||
@@ -2859,8 +2861,8 @@ static int validate_userspace(const struct nlattr *attr)
struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
int error;
- error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
- userspace_policy, NULL);
+ error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr,
+ userspace_policy, NULL);
if (error)
return error;
@@ -2891,8 +2893,9 @@ static int validate_and_copy_check_pkt_len(struct net *net,
int nested_acts_start;
int start, err;
- err = nla_parse_nested(a, OVS_CHECK_PKT_LEN_ATTR_MAX, attr,
- cpl_policy, NULL);
+ err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX,
+ nla_data(attr), nla_len(attr),
+ cpl_policy, NULL);
if (err)
return err;
diff --git a/datapath/flow_table.c b/datapath/flow_table.c
index 76b390e9cd1c3b3aeabd5217ecaae204eef88a61..650338fb05809a6c947e067c2c7486e5cf252dc5 100644
--- a/datapath/flow_table.c
+++ b/datapath/flow_table.c
@@ -234,6 +234,74 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
return 0;
}
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+ struct sw_flow_mask *new)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int err, ma_count = READ_ONCE(ma->count);
+
+ if (ma_count >= ma->max) {
+ err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+ if (err)
+ return err;
+
+ ma = ovsl_dereference(tbl->mask_array);
+ }
+
+ BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+ rcu_assign_pointer(ma->masks[ma_count], new);
+ WRITE_ONCE(ma->count, ma_count +1);
+
+ return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+ struct sw_flow_mask *mask)
+{
+ struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+ int i, ma_count = READ_ONCE(ma->count);
+
+ /* Remove the deleted mask pointers from the array */
+ for (i = 0; i < ma_count; i++) {
+ if (mask == ovsl_dereference(ma->masks[i]))
+ goto found;
+ }
+
+ BUG();
+ return;
+
+found:
+ WRITE_ONCE(ma->count, ma_count -1);
+
+ rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+ RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+ kfree_rcu(mask, rcu);
+
+ /* Shrink the mask array if necessary. */
+ if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+ ma_count <= (ma->max / 3))
+ tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+ if (mask) {
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count)
+ tbl_mask_array_del_mask(tbl, mask);
+ }
+}
+
int ovs_flow_tbl_init(struct flow_table *table)
{
struct table_instance *ti, *ufid_ti;
@@ -280,7 +348,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
}
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow,
+ bool count)
+{
+ hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+ if (count)
+ table->count--;
+
+ if (ovs_identifier_is_ufid(&flow->id)) {
+ hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+ if (count)
+ table->ufid_count--;
+ }
+
+ flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+ struct table_instance *ti,
struct table_instance *ufid_ti,
bool deferred)
{
@@ -297,13 +386,12 @@ static void table_instance_destroy(struct table_instance *ti,
struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
- int ver = ti->node_ver;
- int ufid_ver = ufid_ti->node_ver;
- hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
- hlist_del_rcu(&flow->flow_table.node[ver]);
- if (ovs_identifier_is_ufid(&flow->id))
- hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+ hlist_for_each_entry_safe(flow, n, head,
+ flow_table.node[ti->node_ver]) {
+
+ table_instance_flow_free(table, ti, ufid_ti,
+ flow, false);
ovs_flow_free(flow, deferred);
}
}
@@ -328,7 +416,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
free_percpu(table->mask_cache);
kfree(rcu_dereference_raw(table->mask_array));
- table_instance_destroy(ti, ufid_ti, false);
+ table_instance_destroy(table, ti, ufid_ti, false);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -397,12 +485,12 @@ static void flow_table_copy_flows(struct table_instance *old,
struct hlist_head *head = &old->buckets[i];
if (ufid)
- hlist_for_each_entry(flow, head,
- ufid_table.node[old_ver])
+ hlist_for_each_entry_rcu(flow, head,
+ ufid_table.node[old_ver])
ufid_table_instance_insert(new, flow);
else
- hlist_for_each_entry(flow, head,
- flow_table.node[old_ver])
+ hlist_for_each_entry_rcu(flow, head,
+ flow_table.node[old_ver])
table_instance_insert(new, flow);
}
@@ -444,7 +532,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
flow_table->count = 0;
flow_table->ufid_count = 0;
- table_instance_destroy(old_ti, old_ufid_ti, true);
+ table_instance_destroy(flow_table, old_ti, old_ufid_ti, true);
return 0;
err_free_ti:
@@ -455,13 +543,10 @@ err_free_ti:
static u32 flow_hash(const struct sw_flow_key *key,
const struct sw_flow_key_range *range)
{
- int key_start = range->start;
- int key_end = range->end;
- const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
- int hash_u32s = (key_end - key_start) >> 2;
+ const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
- BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+ int hash_u32s = range_n_bytes(range) >> 2;
return jhash2(hash_key, hash_u32s, 0);
}
@@ -540,11 +625,11 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
u32 *n_mask_hit,
u32 *index)
{
- struct sw_flow_mask *mask;
struct sw_flow *flow;
+ struct sw_flow_mask *mask;
int i;
- if (*index < ma->max) {
+ if (likely(*index < ma->max)) {
mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
@@ -559,8 +644,8 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
continue;
mask = rcu_dereference_ovsl(ma->masks[i]);
- if (!mask)
- continue;
+ if (unlikely(!mask))
+ break;
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
@@ -716,7 +801,7 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
struct mask_array *ma;
ma = rcu_dereference_ovsl(table->mask_array);
- return ma->count;
+ return READ_ONCE(ma->count);
}
static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -725,49 +810,6 @@ static struct table_instance *table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
}
-static void tbl_mask_array_delete_mask(struct mask_array *ma,
- struct sw_flow_mask *mask)
-{
- int i;
-
- /* Remove the deleted mask pointers from the array */
- for (i = 0; i < ma->max; i++) {
- if (mask == ovsl_dereference(ma->masks[i])) {
- RCU_INIT_POINTER(ma->masks[i], NULL);
- ma->count--;
- kfree_rcu(mask, rcu);
- return;
- }
- }
- BUG();
-}
-
-/* Remove 'mask' from the mask list, if it is not needed any more. */
-static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
-{
- if (mask) {
- /* ovs-lock is required to protect mask-refcount and
- * mask list.
- */
- ASSERT_OVSL();
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- struct mask_array *ma;
-
- ma = ovsl_dereference(tbl->mask_array);
- tbl_mask_array_delete_mask(ma, mask);
-
- /* Shrink the mask array if necessary. */
- if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
- ma->count <= (ma->max / 3))
- tbl_mask_array_realloc(tbl, ma->max / 2);
-
- }
- }
-}
-
/* Must be called with OVS mutex held. */
void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
{
@@ -775,17 +817,7 @@ void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
BUG_ON(table->count == 0);
- hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
- table->count--;
- if (ovs_identifier_is_ufid(&flow->id)) {
- hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
- table->ufid_count--;
- }
-
- /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
- * accessible as long as the RCU read lock is held.
- */
- flow_mask_remove(table, flow->mask);
+ table_instance_flow_free(table, ti, ufid_ti, flow, true);
}
static struct sw_flow_mask *mask_alloc(void)
@@ -836,9 +868,6 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask = flow_mask_find(tbl, new);
if (!mask) {
- struct mask_array *ma;
- int i;
-
/* Allocate a new mask if none exsits. */
mask = mask_alloc();
if (!mask)
@@ -848,28 +877,9 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask->range = new->range;
/* Add mask to mask-list. */
- ma = ovsl_dereference(tbl->mask_array);
- if (ma->count >= ma->max) {
- int err;
-
- err = tbl_mask_array_realloc(tbl, ma->max +
- MASK_ARRAY_SIZE_MIN);
- if (err) {
- kfree(mask);
- return err;
- }
- ma = ovsl_dereference(tbl->mask_array);
- }
-
- for (i = 0; i < ma->max; i++) {
- struct sw_flow_mask *t;
-
- t = ovsl_dereference(ma->masks[i]);
- if (!t) {
- rcu_assign_pointer(ma->masks[i], mask);
- ma->count++;
- break;
- }
+ if (tbl_mask_array_add_mask(tbl, mask)) {
+ kfree(mask);
+ return -ENOMEM;
}
} else {
diff --git a/datapath/linux/Kbuild.in b/datapath/linux/Kbuild.in
index 9e3259f1919da46a05fdf420ceb46e729894f34c..395b0cbc00a92a6b5b97d3e3fd8a11ff392310c2 100644
--- a/datapath/linux/Kbuild.in
+++ b/datapath/linux/Kbuild.in
@@ -16,7 +16,7 @@ ccflags-y += -include $(builddir)/kcompat.h
# These include directories have to go before -I$(KSRC)/include.
# NOSTDINC_FLAGS just happens to be a variable that goes in the
# right place, even though it's conceptually incorrect.
-NOSTDINC_FLAGS += -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include
+NOSTDINC_FLAGS += -include $(builddir)/kcompat.h -I$(top_srcdir)/include -I$(srcdir)/compat -I$(srcdir)/compat/include
obj-m := $(subst _,-,$(patsubst %,%.o,$(build_modules)))
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index 63a5cbae40ccd3de72d30fc00c8a66d34c0e280b..37224398800c9d6a3e67455098c41ddd60c2bddd 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -120,4 +120,4 @@ openvswitch_headers += \
linux/compat/include/linux/netfilter.h \
linux/compat/include/linux/overflow.h \
linux/compat/include/linux/rbtree.h
-EXTRA_DIST += linux/compat/build-aux/export-check-whitelist
+EXTRA_DIST += linux/compat/build-aux/export-check-allow-list
diff --git a/datapath/linux/compat/build-aux/export-check-whitelist b/datapath/linux/compat/build-aux/export-check-allow-list
similarity index 100%
rename from datapath/linux/compat/build-aux/export-check-whitelist
rename to datapath/linux/compat/build-aux/export-check-allow-list
diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
index c044b148969f5f808f93663a3967f4ca6da76a52..02c6403e646ed0b54b1798e7f0662aa72a8fb1b1 100644
--- a/datapath/linux/compat/geneve.c
+++ b/datapath/linux/compat/geneve.c
@@ -962,14 +962,26 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
return dst;
}
-#ifdef HAVE_IPV6_DST_LOOKUP_NET
- if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
+#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW)
+#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET
+ dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
+ NULL);
#else
-#ifdef HAVE_IPV6_STUB
+ dst = ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, fl6,
+ NULL);
+#endif
+ if (IS_ERR(dst)) {
+#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET)
+ if (ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, &dst,
+ fl6)) {
+#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW)
+ if (ipv6_stub->ipv6_dst_lookup_flow(gs6->sock->sk, &dst, fl6)) {
+#elif defined(HAVE_IPV6_DST_LOOKUP_NET)
+ if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
+#elif defined(HAVE_IPV6_STUB)
if (ipv6_stub->ipv6_dst_lookup(gs6->sock->sk, &dst, fl6)) {
#else
if (ip6_dst_lookup(gs6->sock->sk, &dst, fl6)) {
-#endif
#endif
netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
return ERR_PTR(-ENETUNREACH);
@@ -1407,7 +1419,7 @@ static void geneve_setup(struct net_device *dev)
static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_ID] = { .type = NLA_U32 },
- [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
[IFLA_GENEVE_TTL] = { .type = NLA_U8 },
[IFLA_GENEVE_TOS] = { .type = NLA_U8 },
@@ -1419,7 +1431,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
};
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c
index 48a56b9f5d5fa5659fbccf79fc4ef958d70e5eb1..65da5d87604dd043b9da92ea545cc9b99208a3f1 100644
--- a/datapath/linux/compat/gso.c
+++ b/datapath/linux/compat/gso.c
@@ -171,7 +171,7 @@ static struct sk_buff *tnl_skb_gso_segment(struct sk_buff *skb,
__be16 proto = skb->protocol;
char cb[sizeof(skb->cb)];
- BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_gso_cb) > sizeof_field(struct sk_buff, cb));
OVS_GSO_CB(skb)->ipv6 = (sa_family == AF_INET6);
/* setup whole inner packet to get protocol. */
__skb_pull(skb, mac_offset);
diff --git a/datapath/linux/compat/include/linux/compiler.h b/datapath/linux/compat/include/linux/compiler.h
index 65f3ba6f4dcab354ff62caf3e7fce1de6cfc8366..59b506fd4d1e75d32fbddf0db3c2cfe9d6361e56 100644
--- a/datapath/linux/compat/include/linux/compiler.h
+++ b/datapath/linux/compat/include/linux/compiler.h
@@ -15,4 +15,12 @@
#define READ_ONCE(x) (x)
#endif
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) \
+do { \
+ *(volatile typeof(x) *)&(x) = (val); \
+} while (0)
+#endif
+
+
#endif
diff --git a/datapath/linux/compat/include/linux/kernel.h b/datapath/linux/compat/include/linux/kernel.h
index 2e81abc2fbd98202bbddf792a967ad519838fb36..106b5940a15f37b503e8137586b73ac5791a39bb 100644
--- a/datapath/linux/compat/include/linux/kernel.h
+++ b/datapath/linux/compat/include/linux/kernel.h
@@ -32,4 +32,8 @@
#define U32_MAX ((u32)~0U)
#endif
+#ifndef sizeof_field
+#define sizeof_field(t, f) (sizeof(((t*)0)->f))
+#endif
+
#endif /* linux/kernel.h */
diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h
index 2f0c6559eaf58ec8c7c5805feb8b71178c7184cb..875de20250ceb6f9313302c4f56e2fcd715f1340 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -245,6 +245,8 @@ enum ovs_vport_type {
OVS_VPORT_TYPE_ERSPAN = 107, /* ERSPAN tunnel. */
OVS_VPORT_TYPE_IP6ERSPAN = 108, /* ERSPAN tunnel. */
OVS_VPORT_TYPE_IP6GRE = 109,
+ OVS_VPORT_TYPE_GTPU = 110,
+ OVS_VPORT_TYPE_BAREUDP = 111, /* Bareudp tunnel. */
__OVS_VPORT_TYPE_MAX
};
@@ -307,6 +309,14 @@ enum {
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
+enum {
+ OVS_BAREUDP_EXT_UNSPEC,
+ OVS_BAREUDP_EXT_MULTIPROTO_MODE,
+ __OVS_BAREUDP_EXT_MAX,
+};
+
+#define OVS_BAREUDP_EXT_MAX (__OVS_BAREUDP_EXT_MAX - 1)
+
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
*/
enum {
@@ -404,6 +414,10 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
OVS_TUNNEL_KEY_ATTR_PAD,
OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* struct erspan_metadata */
+#ifndef __KERNEL__
+ /* Only used within userspace data path. */
+ OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, /* struct gtpu_metadata */
+#endif
__OVS_TUNNEL_KEY_ATTR_MAX
};
@@ -1021,6 +1035,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_TUNNEL_PUSH, /* struct ovs_action_push_tnl*/
OVS_ACTION_ATTR_TUNNEL_POP, /* u32 port number. */
OVS_ACTION_ATTR_DROP, /* u32 xlate_error. */
+ OVS_ACTION_ATTR_LB_OUTPUT, /* u32 bond-id. */
#endif
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
diff --git a/datapath/linux/compat/include/linux/percpu.h b/datapath/linux/compat/include/linux/percpu.h
index 7c346aa31ab9d048bff38130a301828bc258334f..a039142e222333f8eda1afaac2fe5d65803f53e8 100644
--- a/datapath/linux/compat/include/linux/percpu.h
+++ b/datapath/linux/compat/include/linux/percpu.h
@@ -7,12 +7,6 @@
#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, smp_processor_id())
#endif
-#ifdef HAVE_RHEL6_PER_CPU
-#undef this_cpu_read
-#undef this_cpu_inc
-#undef this_cpu_dec
-#endif
-
#if !defined this_cpu_read
#define this_cpu_read(ptr) percpu_read(ptr)
#endif
diff --git a/datapath/linux/compat/include/linux/rculist.h b/datapath/linux/compat/include/linux/rculist.h
index 8df8ad8a2774f674ec62a899bbf72f2e52783a9e..40fd5e1710969eec91bd0bdc3e66774eefb0c5e8 100644
--- a/datapath/linux/compat/include/linux/rculist.h
+++ b/datapath/linux/compat/include/linux/rculist.h
@@ -9,9 +9,28 @@
#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
#endif
+/*
+ * Check during list traversal that we are within an RCU reader
+ */
+
+#define check_arg_count_one(dummy)
+
+#ifdef CONFIG_PROVE_RCU_LIST
+#define __list_check_rcu(dummy, cond, extra...) \
+ ({ \
+ check_arg_count_one(extra); \
+ RCU_LOCKDEP_WARN(!cond && !rcu_read_lock_any_held(), \
+ "RCU-list traversed in non-reader section!"); \
+ })
+#else
+#define __list_check_rcu(dummy, cond, extra...) \
+ ({ check_arg_count_one(extra); })
+#endif
+
#undef hlist_for_each_entry_rcu
-#define hlist_for_each_entry_rcu(pos, head, member) \
- for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\
+#define hlist_for_each_entry_rcu(pos, head, member, cond...) \
+ for (__list_check_rcu(dummy, ## cond, 0), \
+ pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\
typeof(*(pos)), member); \
pos; \
pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\
diff --git a/datapath/linux/compat/include/linux/skbuff.h b/datapath/linux/compat/include/linux/skbuff.h
index 63972891bf4892249654228d050da8c616f1a703..396a5e40699fdbdedfd9bb37545f16704c763cdf 100644
--- a/datapath/linux/compat/include/linux/skbuff.h
+++ b/datapath/linux/compat/include/linux/skbuff.h
@@ -278,9 +278,7 @@ static inline void skb_clear_hash(struct sk_buff *skb)
#ifdef HAVE_RXHASH
skb->rxhash = 0;
#endif
-#if defined(HAVE_L4_RXHASH) && !defined(HAVE_RHEL_OVS_HOOK)
- skb->l4_rxhash = 0;
-#endif
+ skb->l4_hash = 0;
}
#endif
@@ -371,7 +369,7 @@ static inline void skb_pop_mac_header(struct sk_buff *skb)
#ifndef HAVE_SKB_CLEAR_HASH_IF_NOT_L4
static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
{
- if (!skb->l4_rxhash)
+ if (!skb->l4_hash)
skb_clear_hash(skb);
}
#endif
@@ -456,4 +454,38 @@ static inline void skb_set_inner_ipproto(struct sk_buff *skb,
#define nf_reset_ct nf_reset
#endif
+#ifndef HAVE___SKB_SET_HASH
+static inline void
+__skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4)
+{
+#ifdef HAVE_RXHASH
+ skb->rxhash = hash;
+#else
+ skb->hash = hash;
+#endif
+ skb->l4_hash = is_l4;
+#ifdef HAVE_SW_HASH
+ skb->sw_hash = is_sw;
+#endif
+}
+#endif
+
+#ifndef HAVE_SKB_GET_HASH_RAW
+static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
+{
+#ifdef HAVE_RXHASH
+ return skb->rxhash;
+#else
+ return skb->hash;
+#endif
+}
+#endif
+
+#ifndef skb_list_walk_safe
+/* Iterate through singly-linked GSO fragments of an skb. */
+#define skb_list_walk_safe(first, skb, next_skb) \
+ for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \
+ (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL)
+#endif
+
#endif
diff --git a/datapath/linux/compat/include/linux/static_key.h b/datapath/linux/compat/include/linux/static_key.h
index 7e43a49e84f122b36ea468454ea3333848bf7727..432feccb9b228cbd30f4cccfc14ea93cd07033d7 100644
--- a/datapath/linux/compat/include/linux/static_key.h
+++ b/datapath/linux/compat/include/linux/static_key.h
@@ -74,6 +74,13 @@ static inline void rpl_static_key_disable(struct static_key *key)
#define static_branch_enable(x) rpl_static_key_enable(&(x)->key)
#define static_branch_disable(x) rpl_static_key_disable(&(x)->key)
+#ifndef HAVE_DECLARE_STATIC_KEY
+#define DECLARE_STATIC_KEY_TRUE(name) \
+ extern struct static_key_true name
+#define DECLARE_STATIC_KEY_FALSE(name) \
+ extern struct static_key_false name
+#endif
+
#endif /* HAVE_UPSTREAM_STATIC_KEY */
#endif /* _STATIC_KEY_WRAPPER_H */
diff --git a/datapath/linux/compat/include/linux/stddef.h b/datapath/linux/compat/include/linux/stddef.h
index f2b7c319aa857f23b6f79ad877a9d65db5651875..5b44c0dee4eeac54eab9509d5a05da277f3b8c2c 100644
--- a/datapath/linux/compat/include/linux/stddef.h
+++ b/datapath/linux/compat/include/linux/stddef.h
@@ -5,13 +5,6 @@
#ifdef __KERNEL__
-#ifndef HAVE_BOOL_TYPE
-enum {
- false = 0,
- true = 1
-};
-#endif /* !HAVE_BOOL_TYPE */
-
#ifndef offsetofend
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
diff --git a/datapath/linux/compat/include/linux/types.h b/datapath/linux/compat/include/linux/types.h
index b989d96c38ef18dccf0fee557d89484e185bdd23..a58623e70b8ff9d6faf44d07c5dbe237e1f9aa9e 100644
--- a/datapath/linux/compat/include/linux/types.h
+++ b/datapath/linux/compat/include/linux/types.h
@@ -8,8 +8,4 @@ typedef __u16 __bitwise __sum16;
typedef __u32 __bitwise __wsum;
#endif
-#ifndef HAVE_BOOL_TYPE
-typedef _Bool bool;
-#endif /* !HAVE_BOOL_TYPE */
-
#endif
diff --git a/datapath/linux/compat/include/net/inet_frag.h b/datapath/linux/compat/include/net/inet_frag.h
index 124c8bea7c5698fa08fb1909e1bc8f671d866a7a..00784da2b7297efa14dcae8dce087e8882961660 100644
--- a/datapath/linux/compat/include/net/inet_frag.h
+++ b/datapath/linux/compat/include/net/inet_frag.h
@@ -12,6 +12,7 @@
#define qp_flags(qp) (qp->q.flags)
#endif
+#ifndef HAVE_CORRECT_MRU_HANDLING
#ifndef HAVE_INET_FRAG_EVICTING
static inline bool inet_frag_evicting(struct inet_frag_queue *q)
{
@@ -22,6 +23,7 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
#endif /* HAVE_INET_FRAG_QUEUE_WITH_LIST_EVICTOR */
}
#endif /* HAVE_INET_FRAG_EVICTING */
+#endif /* HAVE_CORRECT_MRU_HANDLING */
/* Upstream commit 3fd588eb90bf ("inet: frag: remove lru list") dropped this
* function, but we call it from our compat code. Provide a noop version. */
@@ -29,6 +31,10 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
#define inet_frag_lru_move(q)
#endif
+#ifdef HAVE_INET_FRAG_FQDIR
+#define netns_frags fqdir
+#endif
+
#ifndef HAVE_SUB_FRAG_MEM_LIMIT_ARG_STRUCT_NETNS_FRAGS
#ifdef HAVE_FRAG_PERCPU_COUNTER_BATCH
static inline void rpl_sub_frag_mem_limit(struct netns_frags *nf, int i)
@@ -45,13 +51,21 @@ static inline void rpl_add_frag_mem_limit(struct netns_frags *nf, int i)
#else /* !frag_percpu_counter_batch */
static inline void rpl_sub_frag_mem_limit(struct netns_frags *nf, int i)
{
+#ifdef HAVE_INET_FRAG_FQDIR
+ atomic_long_sub(i, &nf->mem);
+#else
atomic_sub(i, &nf->mem);
+#endif
}
#define sub_frag_mem_limit rpl_sub_frag_mem_limit
static inline void rpl_add_frag_mem_limit(struct netns_frags *nf, int i)
{
+#ifdef HAVE_INET_FRAG_FQDIR
+ atomic_long_add(i, &nf->mem);
+#else
atomic_add(i, &nf->mem);
+#endif
}
#define add_frag_mem_limit rpl_add_frag_mem_limit
#endif /* frag_percpu_counter_batch */
diff --git a/datapath/linux/compat/include/net/ip_tunnels.h b/datapath/linux/compat/include/net/ip_tunnels.h
index da64a94adc08e0fd80d3c9e8fa6ff36b4bc95df1..617a753c7c013570548a00d7bf7920ee3fd747b4 100644
--- a/datapath/linux/compat/include/net/ip_tunnels.h
+++ b/datapath/linux/compat/include/net/ip_tunnels.h
@@ -139,8 +139,8 @@ struct tnl_ptk_info {
/* Used to memset ipv4 address padding. */
#define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst)
#define IP_TUNNEL_KEY_IPV4_PAD_LEN \
- (FIELD_SIZEOF(struct ip_tunnel_key, u) - \
- FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4))
+ (sizeof_field(struct ip_tunnel_key, u) - \
+ sizeof_field(struct ip_tunnel_key, u.ipv4))
struct ip_tunnel_key {
__be64 tun_id;
diff --git a/datapath/linux/compat/include/net/netlink.h b/datapath/linux/compat/include/net/netlink.h
index 34fc3460dc81fe3ab742f950c32d64858aaa634d..84e073974bdd26dbfd350eb6a2c139d7ac22c93b 100644
--- a/datapath/linux/compat/include/net/netlink.h
+++ b/datapath/linux/compat/include/net/netlink.h
@@ -143,6 +143,11 @@ static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value,
#endif
+#ifndef HAVE_NLA_PARSE_DEPRECATED_STRICT
+#define nla_parse_nested_deprecated nla_parse_nested
+#define nla_parse_deprecated_strict nla_parse
+#define genlmsg_parse_deprecated genlmsg_parse
+
#ifndef HAVE_NETLINK_EXT_ACK
struct netlink_ext_ack;
@@ -153,7 +158,8 @@ static inline int rpl_nla_parse_nested(struct nlattr *tb[], int maxtype,
{
return nla_parse_nested(tb, maxtype, nla, policy);
}
-#define nla_parse_nested rpl_nla_parse_nested
+#undef nla_parse_nested_deprecated
+#define nla_parse_nested_deprecated rpl_nla_parse_nested
static inline int rpl_nla_parse(struct nlattr **tb, int maxtype,
const struct nlattr *head, int len,
@@ -162,8 +168,10 @@ static inline int rpl_nla_parse(struct nlattr **tb, int maxtype,
{
return nla_parse(tb, maxtype, head, len, policy);
}
-#define nla_parse rpl_nla_parse
+#undef nla_parse_deprecated_strict
+#define nla_parse_deprecated_strict rpl_nla_parse
#endif
+#endif /* HAVE_NLA_PARSE_DEPRECATED_STRICT */
#ifndef HAVE_NLA_NEST_START_NOFLAG
static inline struct nlattr *rpl_nla_nest_start_noflag(struct sk_buff *skb,
diff --git a/datapath/linux/compat/ip6_gre.c b/datapath/linux/compat/ip6_gre.c
index 7fd34530999cef8c57c84ece14092e82d1e5aa8a..3aa9844b370c4a7b05ae87e9e5e82732a976a535 100644
--- a/datapath/linux/compat/ip6_gre.c
+++ b/datapath/linux/compat/ip6_gre.c
@@ -1687,7 +1687,7 @@ static struct pernet_operations ip6gre_net_ops = {
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1713,7 +1713,7 @@ static int rpl_ip6gre_tunnel_validate(struct nlattr *tb[],
}
#define ip6gre_tunnel_validate rpl_ip6gre_tunnel_validate
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
@@ -1739,7 +1739,7 @@ static int rpl_ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
}
out:
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
return ip6gre_tunnel_validate(tb, data, extack);
#else
return ip6gre_tunnel_validate(tb, data);
@@ -1747,7 +1747,7 @@ out:
}
#define ip6gre_tap_validate rpl_ip6gre_tap_validate
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int rpl_ip6erspan_tap_validate(struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -1762,7 +1762,7 @@ static int rpl_ip6erspan_tap_validate(struct nlattr *tb[],
if (!data)
return 0;
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
ret = ip6gre_tap_validate(tb, data, extack);
#else
ret = ip6gre_tap_validate(tb, data);
@@ -2311,8 +2311,8 @@ static const struct nla_policy ip6gre_policy[RPL_IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
[IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
diff --git a/datapath/linux/compat/ip6_tunnel.c b/datapath/linux/compat/ip6_tunnel.c
index 9f4bae7dd3d1406499e58fdd8c37570884822ae2..984a51bfb942d2dd80843ac20b926c976f7d7189 100644
--- a/datapath/linux/compat/ip6_tunnel.c
+++ b/datapath/linux/compat/ip6_tunnel.c
@@ -1754,7 +1754,7 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
return 0;
}
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int rpl_ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
diff --git a/datapath/linux/compat/ip_gre.c b/datapath/linux/compat/ip_gre.c
index 04f994f9772a06f316ae56bf1f3adafce1790c15..c194ffe0064453b271cb11b3c01fa4806ec6f986 100644
--- a/datapath/linux/compat/ip_gre.c
+++ b/datapath/linux/compat/ip_gre.c
@@ -623,7 +623,7 @@ static const struct gre_protocol ipgre_protocol = {
.err_handler = __gre_err,
};
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
@@ -646,7 +646,7 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
}
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
@@ -672,7 +672,7 @@ static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
}
out:
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
return ipgre_tunnel_validate(tb, data, NULL);
#else
return ipgre_tunnel_validate(tb, data);
@@ -707,7 +707,7 @@ enum {
#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_HWID + 1)
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
@@ -720,7 +720,7 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[])
if (!data)
return 0;
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
ret = ipgre_tap_validate(tb, data, NULL);
#else
ret = ipgre_tap_validate(tb, data);
@@ -1096,8 +1096,8 @@ static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
diff --git a/datapath/linux/compat/lisp.c b/datapath/linux/compat/lisp.c
index 58144adf6a363ccd932c5496d54628a55b4b4a50..49c60f4edb3c57b0bb6f827f38027ec7c1ddcf12 100644
--- a/datapath/linux/compat/lisp.c
+++ b/datapath/linux/compat/lisp.c
@@ -38,7 +38,6 @@
#include "datapath.h"
#include "gso.h"
#include "vport.h"
-#include "gso.h"
#include "vport-netdev.h"
#define LISP_UDP_PORT 4341
@@ -612,7 +611,7 @@ static const struct nla_policy lisp_policy[IFLA_LISP_MAX + 1] = {
[IFLA_LISP_PORT] = { .type = NLA_U16 },
};
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int lisp_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack __always_unused *extack)
#else
diff --git a/datapath/linux/compat/nf_conntrack_reasm.c b/datapath/linux/compat/nf_conntrack_reasm.c
index ced9fba98b9af23025778e487b6cc2c0e796e843..77b4b25485ffeace2ebcd51da2eb27838976b9fc 100644
--- a/datapath/linux/compat/nf_conntrack_reasm.c
+++ b/datapath/linux/compat/nf_conntrack_reasm.c
@@ -57,10 +57,13 @@
#include
#include "datapath.h"
-#ifdef OVS_NF_DEFRAG6_BACKPORT
+#if defined(HAVE_INET_FRAGS_WITH_FRAGS_WORK) || !defined(HAVE_INET_FRAGS_RND)
static const char nf_frags_cache_name[] = "ovs-frag6";
+#endif
+
+#ifdef OVS_NF_DEFRAG6_BACKPORT
struct nf_ct_frag6_skb_cb
{
struct inet6_skb_parm h;
diff --git a/datapath/linux/compat/stt.c b/datapath/linux/compat/stt.c
index 21fef09f43299d53dd2bc2865c0f664d6d020077..39a294764ec8fb053b5360fb813c61a2f6392571 100644
--- a/datapath/linux/compat/stt.c
+++ b/datapath/linux/compat/stt.c
@@ -13,7 +13,6 @@
#include
#include
-#include
#include
#include
#include
@@ -136,7 +135,7 @@ struct pkt_frag {
};
struct stt_percpu {
- struct flex_array *frag_hash;
+ struct pkt_frag *frag_hash;
struct list_head frag_lru;
unsigned int frag_mem_used;
@@ -1079,8 +1078,7 @@ static struct pkt_frag *lookup_frag(struct net *net,
int i;
for (i = 0; i < FRAG_HASH_SEGS; i++) {
- frag = flex_array_get(stt_percpu->frag_hash,
- hash & (FRAG_HASH_ENTRIES - 1));
+ frag = &stt_percpu->frag_hash[hash & (FRAG_HASH_ENTRIES - 1)];
if (frag->skbs &&
time_before(jiffies, frag->timestamp + FRAG_EXP_TIME) &&
@@ -1533,7 +1531,7 @@ static void clean_percpu(struct work_struct *work)
for (j = 0; j < FRAG_HASH_ENTRIES; j++) {
struct pkt_frag *frag;
- frag = flex_array_get(stt_percpu->frag_hash, j);
+ frag = &stt_percpu->frag_hash[j];
if (!frag->skbs ||
time_before(jiffies, frag->timestamp + FRAG_EXP_TIME))
continue;
@@ -1561,7 +1559,7 @@ static void clean_percpu(struct work_struct *work)
#endif
#ifdef HAVE_NF_HOOK_STATE
-#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0)
+#if RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(7,0) && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(8,0)
/* RHEL nfhook hacks. */
#ifndef __GENKSYMS__
#define LAST_PARAM const struct net_device *in, const struct net_device *out, \
@@ -1631,26 +1629,20 @@ static int stt_start(struct net *net)
for_each_possible_cpu(i) {
struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i);
- struct flex_array *frag_hash;
+ struct pkt_frag *frag_hash;
spin_lock_init(&stt_percpu->lock);
INIT_LIST_HEAD(&stt_percpu->frag_lru);
get_random_bytes(&per_cpu(pkt_seq_counter, i), sizeof(u32));
- frag_hash = flex_array_alloc(sizeof(struct pkt_frag),
- FRAG_HASH_ENTRIES,
- GFP_KERNEL | __GFP_ZERO);
+ frag_hash = kvmalloc_array(sizeof(struct pkt_frag),
+ FRAG_HASH_ENTRIES,
+ GFP_KERNEL | __GFP_ZERO);
if (!frag_hash) {
err = -ENOMEM;
goto free_percpu;
}
stt_percpu->frag_hash = frag_hash;
-
- err = flex_array_prealloc(stt_percpu->frag_hash, 0,
- FRAG_HASH_ENTRIES,
- GFP_KERNEL | __GFP_ZERO);
- if (err)
- goto free_percpu;
}
schedule_clean_percpu();
n_tunnels++;
@@ -1691,7 +1683,7 @@ free_percpu:
struct stt_percpu *stt_percpu = per_cpu_ptr(stt_percpu_data, i);
if (stt_percpu->frag_hash)
- flex_array_free(stt_percpu->frag_hash);
+ kvfree(stt_percpu->frag_hash);
}
free_percpu(stt_percpu_data);
@@ -1718,11 +1710,11 @@ static void stt_cleanup(struct net *net)
for (j = 0; j < FRAG_HASH_ENTRIES; j++) {
struct pkt_frag *frag;
- frag = flex_array_get(stt_percpu->frag_hash, j);
+ frag = &stt_percpu->frag_hash[j];
kfree_skb_list(frag->skbs);
}
- flex_array_free(stt_percpu->frag_hash);
+ kvfree(stt_percpu->frag_hash);
}
free_percpu(stt_percpu_data);
@@ -1912,7 +1904,7 @@ static const struct nla_policy stt_policy[IFLA_STT_MAX + 1] = {
[IFLA_STT_PORT] = { .type = NLA_U16 },
};
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int stt_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack __always_unused *extack)
#else
diff --git a/datapath/linux/compat/vxlan.c b/datapath/linux/compat/vxlan.c
index 23118e8b63167783ba5921d57f2116a0f4228cde..e65d955e94133360991365dd77c6a5b94b75c627 100644
--- a/datapath/linux/compat/vxlan.c
+++ b/datapath/linux/compat/vxlan.c
@@ -967,7 +967,10 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
bool use_cache = (dst_cache && ip_tunnel_dst_cache_usable(skb, info));
struct dst_entry *ndst;
struct flowi6 fl6;
+#if !defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) || \
+ !defined(HAVE_IPV6_DST_LOOKUP_FLOW)
int err;
+#endif
if (!sock6)
return ERR_PTR(-EIO);
@@ -990,20 +993,35 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
fl6.fl6_dport = dport;
fl6.fl6_sport = sport;
-#ifdef HAVE_IPV6_DST_LOOKUP_NET
- err = ipv6_stub->ipv6_dst_lookup(vxlan->net,
- sock6->sock->sk,
- &ndst, &fl6);
+#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW)
+#ifdef HAVE_IPV6_DST_LOOKUP_FLOW_NET
+ ndst = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
+ &fl6, NULL);
#else
-#ifdef HAVE_IPV6_STUB
+ ndst = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &fl6, NULL);
+#endif
+ if (unlikely(IS_ERR(ndst))) {
+#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW_NET)
+ err = ipv6_stub->ipv6_dst_lookup_flow(vxlan->net, sock6->sock->sk,
+ &ndst, &fl6);
+#elif defined(HAVE_IPV6_DST_LOOKUP_FLOW)
+ err = ipv6_stub->ipv6_dst_lookup_flow(sock6->sock->sk, &ndst, &fl6);
+#elif defined(HAVE_IPV6_DST_LOOKUP_NET)
+ err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk,
+ &ndst, &fl6);
+#elif defined(HAVE_IPV6_STUB)
err = ipv6_stub->ipv6_dst_lookup(vxlan->vn6_sock->sock->sk,
&ndst, &fl6);
#else
err = ip6_dst_lookup(vxlan->vn6_sock->sock->sk, &ndst, &fl6);
#endif
-#endif
+#if defined(HAVE_IPV6_STUB_WITH_DST_ENTRY) && defined(HAVE_IPV6_DST_LOOKUP_FLOW)
+ return ERR_PTR(-ENETUNREACH);
+ }
+#else
if (err < 0)
return ERR_PTR(err);
+#endif
*saddr = fl6.saddr;
if (use_cache)
@@ -1680,10 +1698,10 @@ static void vxlan_raw_setup(struct net_device *dev)
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
- [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_LINK] = { .type = NLA_U32 },
- [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+ [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
@@ -1708,7 +1726,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
};
-#ifdef HAVE_EXT_ACK_IN_RTNL_LINKOPS
+#ifdef HAVE_RTNLOP_VALIDATE_WITH_EXTACK
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
#else
diff --git a/datapath/meter.c b/datapath/meter.c
index 7d8f51a8fcd1f39549e3e302a9d42c9c5a43b5f9..92c9c3671dbf4f6b0da1c0262274d6ec923d15af 100644
--- a/datapath/meter.c
+++ b/datapath/meter.c
@@ -239,9 +239,11 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
struct nlattr *attr[OVS_BAND_ATTR_MAX + 1];
u32 band_max_delta_t;
- err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX,
- nla_data(nla), nla_len(nla), band_policy,
- NULL);
+ err = nla_parse_deprecated_strict((struct nlattr **)&attr,
+ OVS_BAND_ATTR_MAX,
+ nla_data(nla),
+ nla_len(nla),
+ band_policy, NULL);
if (err)
goto exit_free_meter;
@@ -542,7 +544,9 @@ static struct genl_ops dp_meter_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = 0, /* OK for unprivileged users. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = meter_policy,
+#endif
.doit = ovs_meter_cmd_features
},
{ .cmd = OVS_METER_CMD_SET,
@@ -552,7 +556,9 @@ static struct genl_ops dp_meter_genl_ops[] = {
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
* privilege.
*/
+#ifdef HAVE_GENL_OPS_POLICY
.policy = meter_policy,
+#endif
.doit = ovs_meter_cmd_set,
},
{ .cmd = OVS_METER_CMD_GET,
@@ -560,7 +566,9 @@ static struct genl_ops dp_meter_genl_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
#endif
.flags = 0, /* OK for unprivileged users. */
+#ifdef HAVE_GENL_OPS_POLICY
.policy = meter_policy,
+#endif
.doit = ovs_meter_cmd_get,
},
{ .cmd = OVS_METER_CMD_DEL,
@@ -570,7 +578,9 @@ static struct genl_ops dp_meter_genl_ops[] = {
.flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN
* privilege.
*/
+#ifdef HAVE_GENL_OPS_POLICY
.policy = meter_policy,
+#endif
.doit = ovs_meter_cmd_del
},
};
@@ -584,6 +594,9 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
.name = OVS_METER_FAMILY,
.version = OVS_METER_VERSION,
.maxattr = OVS_METER_ATTR_MAX,
+#ifndef HAVE_GENL_OPS_POLICY
+ .policy = meter_policy,
+#endif
.netnsok = true,
.parallel_ops = true,
.ops = dp_meter_genl_ops,
diff --git a/datapath/vport-stt.c b/datapath/vport-stt.c
index 35c4942c5f51ef924e86c74a10658040365e7e20..71bbeda637ba902e507cd3c5284d8fc70d626b54 100644
--- a/datapath/vport-stt.c
+++ b/datapath/vport-stt.c
@@ -23,7 +23,6 @@
#include
#include
#include
-#include
#include "datapath.h"
#include "vport.h"
diff --git a/datapath/vport-vxlan.c b/datapath/vport-vxlan.c
index 70ed376e3869cbc99621c4bf7001ccef21d2b5c0..79331c968351af36f5108ed30a51e2154c1d94b6 100644
--- a/datapath/vport-vxlan.c
+++ b/datapath/vport-vxlan.c
@@ -99,8 +99,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
- err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy,
- NULL);
+ err = nla_parse_nested_deprecated(exts, OVS_VXLAN_EXT_MAX, attr,
+ exts_policy, NULL);
if (err < 0)
return err;
diff --git a/datapath/vport.c b/datapath/vport.c
index f929282dcec1850942c8d0cef42dda1045b71626..bd62c5612398022542b087bded992e3f5cc8b161 100644
--- a/datapath/vport.c
+++ b/datapath/vport.c
@@ -507,8 +507,9 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
ids = rcu_dereference(vport->upcall_portids);
- if (ids->n_ids == 1 && ids->ids[0] == 0)
- return 0;
+ /* If there is only one portid, select it in the fast-path. */
+ if (ids->n_ids == 1)
+ return ids->ids[0];
hash = skb_get_hash(skb);
ids_index = hash - ids->n_ids * reciprocal_divide(hash, ids->rn_ids);
diff --git a/debian/changelog b/debian/changelog
index d2058b7eaa598c7e80729383562c92aad191c08e..f0361edb8bf34b4a1044db518cf2a0aa1fac8a8b 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,13 @@
+openvswitch (2.15.0~git20210104.def6eb1ea+dfsg1-1) UNRELEASED; urgency=medium
+
+ * Merge branch 'master-dfsg' into 2.15
+ * Bump dependency on libbpdk-dev >= 20.11 (Closes: #974588)
+ * Refresh py3-compat.patch for v2.15
+ * Realign armhf test skip range due to upstream changes
+ * Use new --with-dpdk=shared configure flag value
+
+ -- Luca Boccassi Sat, 09 Jan 2021 15:21:29 +0000
+
openvswitch (2.13.0+dfsg1-16) unstable; urgency=medium
* Generating postinst at build time to avoid using dpkg-architecture at
diff --git a/debian/control b/debian/control
index ca258c60b34797131799b31c6857f081cd41422c..08aa82f891c0cd4cf5a03c7b4f94a98d62b19ac5 100644
--- a/debian/control
+++ b/debian/control
@@ -14,7 +14,7 @@ Build-Depends:
dh-python,
graphviz,
libcap-ng-dev,
- libdpdk-dev (>= 19.11) [amd64 i386 ppc64el arm64],
+ libdpdk-dev (>= 20.11) [amd64 i386 ppc64el arm64],
libnuma-dev [amd64 i386 ppc64el arm64],
libpcap-dev [amd64 i386 ppc64el arm64],
libssl-dev,
diff --git a/debian/openvswitch-common.postinst.in b/debian/openvswitch-common.postinst.in
index 079fff915228230e447810d3cc963ead2f662a86..226a2a52a6be5bf9ff771ce77f3007b1af60eed1 100644
--- a/debian/openvswitch-common.postinst.in
+++ b/debian/openvswitch-common.postinst.in
@@ -4,7 +4,7 @@ set -e
if [ "${1}" = "configure" ] ; then
update-alternatives --install /usr/sbin/ovs-vswitchd ovs-vswitchd /usr/lib/openvswitch-common/ovs-vswitchd 100 \
- --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.13.so.0.0.0 libopenvswitch.so /usr/lib/openvswitch-common/libopenvswitch-2.13.so.0.0.0
+ --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.14.so.0.0.90 libopenvswitch.so /usr/lib/openvswitch-common/libopenvswitch-2.14.so.0.0.90
fi
#DEBHELPER#
diff --git a/debian/openvswitch-switch-dpdk.postinst.in b/debian/openvswitch-switch-dpdk.postinst.in
index b5cd6d0dc0bd126d4ccfa9a7a335a3ed3d39d85f..e119e4c97f910cee4208dfe40f98226b782f4ce2 100644
--- a/debian/openvswitch-switch-dpdk.postinst.in
+++ b/debian/openvswitch-switch-dpdk.postinst.in
@@ -4,7 +4,7 @@ set -e
if [ "${1}" = "configure" ] ; then
update-alternatives --install /usr/sbin/ovs-vswitchd ovs-vswitchd /usr/lib/openvswitch-switch-dpdk/ovs-vswitchd-dpdk 200 \
- --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.13.so.0.0.0 libopenvswitch.so /usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.13.so.0.0.0
+ --slave /usr/lib/%%MULTIARCH_TRIPLETT%%/libopenvswitch-2.14.so.0.0.90 libopenvswitch.so /usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.14.so.0.0.90
fi
#DEBHELPER#
diff --git a/debian/patches/py3-compat.patch b/debian/patches/py3-compat.patch
index 3ce28880efc4db765512584de072ae2bf84cca4a..343f23bcb4c68d55031dd2663b968abb2eda4e6b 100644
--- a/debian/patches/py3-compat.patch
+++ b/debian/patches/py3-compat.patch
@@ -126,93 +126,6 @@ Signed-off-by: James Page
def ovs_vsctl_add_bridge(bridge):
---- a/utilities/bugtool/ovs-bugtool.in
-+++ b/utilities/bugtool/ovs-bugtool.in
-@@ -33,8 +33,6 @@
- # or func_output().
- #
-
--import StringIO
--import commands
- import fcntl
- import getopt
- import hashlib
-@@ -48,10 +46,12 @@ import warnings
- import zipfile
- from select import select
- from signal import SIGTERM
--from subprocess import PIPE, Popen
-+from subprocess import PIPE, Popen, check_output
-
- from xml.dom.minidom import getDOMImplementation, parse
-
-+from six.moves import StringIO
-+
- warnings.filterwarnings(action="ignore", category=DeprecationWarning)
-
- OS_RELEASE = platform.release()
-@@ -782,7 +782,7 @@ def dump_scsi_hosts(cap):
-
-
- def module_info(cap):
-- output = StringIO.StringIO()
-+ output = StringIO()
- modules = open(PROC_MODULES, 'r')
- procs = []
-
-@@ -806,7 +806,7 @@ def multipathd_topology(cap):
-
-
- def dp_list():
-- output = StringIO.StringIO()
-+ output = StringIO()
- procs = [ProcOutput([OVS_DPCTL, 'dump-dps'],
- caps[CAP_NETWORK_STATUS][MAX_TIME], output)]
-
-@@ -828,7 +828,7 @@ def collect_ovsdb():
- if os.path.isfile(OPENVSWITCH_COMPACT_DB):
- os.unlink(OPENVSWITCH_COMPACT_DB)
-
-- output = StringIO.StringIO()
-+ output = StringIO()
- max_time = 5
- procs = [ProcOutput(['ovsdb-tool', 'compact',
- OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB],
-@@ -871,7 +871,7 @@ def fd_usage(cap):
-
-
- def dump_rdac_groups(cap):
-- output = StringIO.StringIO()
-+ output = StringIO()
- procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)]
-
- run_procs([procs])
-@@ -1095,7 +1095,7 @@ def make_inventory(inventory, subdir):
- s.setAttribute('date', time.strftime('%c'))
- s.setAttribute('hostname', platform.node())
- s.setAttribute('uname', ' '.join(platform.uname()))
-- s.setAttribute('uptime', commands.getoutput(UPTIME))
-+ s.setAttribute('uptime', check_output([UPTIME]))
- document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s)
-
- map(lambda k_v: inventory_entry(document, subdir, k_v[0], k_v[1]),
-@@ -1391,13 +1391,13 @@ def get_free_disk_space(path):
- return s.f_frsize * s.f_bfree
-
-
--class StringIOmtime(StringIO.StringIO):
-+class StringIOmtime(StringIO):
- def __init__(self, buf=''):
-- StringIO.StringIO.__init__(self, buf)
-+ StringIO.__init__(self, buf)
- self.mtime = time.time()
-
- def write(self, s):
-- StringIO.StringIO.write(self, s)
-+ StringIO.write(self, s)
- self.mtime = time.time()
-
-
--- a/utilities/ovs-check-dead-ifs.in
+++ b/utilities/ovs-check-dead-ifs.in
@@ -1,5 +1,7 @@
@@ -252,60 +165,15 @@ Signed-off-by: James Page
(field, stats, action) = (results[0], results[1:-1], results[-1])
-@@ -592,7 +594,7 @@ def flows_read(ihdl, flow_db):
-
- try:
- flow_db.flow_line_add(line)
-- except ValueError, arg:
-+ except ValueError as arg:
- logging.error(arg)
-
- return flow_db
-@@ -958,7 +960,7 @@ class FlowDB:
- change order of fields of the same flow.
- """
+@@ -963,7 +965,7 @@ class FlowDB:
+ if not isinstance(line, str):
+ line = str(line)
- line = line.rstrip("\n")
+ line = line.rstrip(b"\n")
(fields, stats, _) = flow_line_split(line)
try:
-@@ -988,7 +990,7 @@ class FlowDB:
-
- self.flow_event(fields_dict, stats_old_dict, stats_dict)
-
-- except ValueError, arg:
-+ except ValueError as arg:
- logging.error(arg)
- self._error_count += 1
- raise
-@@ -1192,7 +1194,7 @@ def flows_top(args):
- flows_read(ihdl, flow_db)
- finally:
- ihdl.close()
-- except OSError, arg:
-+ except OSError as arg:
- logging.critical(arg)
- break
-
-@@ -1220,7 +1222,7 @@ def flows_top(args):
-
- # repeat output
- for (count, line) in lines:
-- print line
-+ print(line)
-
-
- def flows_script(args):
-@@ -1249,7 +1251,7 @@ def flows_script(args):
- render = Render(console_width, Render.FIELD_SELECT_SCRIPT)
-
- for line in render.format(flow_db):
-- print line
-+ print(line)
-
-
- def main():
--- a/utilities/ovs-l3ping.in
+++ b/utilities/ovs-l3ping.in
@@ -18,8 +18,10 @@ opening holes in the firewall for the XM
diff --git a/debian/rules b/debian/rules
index 5f6122b0ca75b41ad2995c729c7cd287e3ef6651..38fabf5cfc693a64ec612f9cf28877cf5ca58979 100755
--- a/debian/rules
+++ b/debian/rules
@@ -32,7 +32,7 @@ ifneq (,$(filter i386 amd64 ppc64el arm64, $(DEB_HOST_ARCH)))
test -e Makefile || \
../configure --prefix=/usr --localstatedir=/var --enable-ssl --enable-shared \
--libdir=/usr/lib/$(DEB_HOST_MULTIARCH) \
- --with-dpdk --sysconfdir=/etc \
+ --with-dpdk=shared --sysconfdir=/etc \
$(DATAPATH_CONFIGURE_OPTS))
endif
@@ -53,7 +53,7 @@ TEST_LIST_DPDK = $(TEST_LIST)
# 1021: ofproto-dpif - select group with weights FAILED (ofproto-dpif.at:535)
# 1057: ofproto-dpif - controller action without megaflows FAILED (ofproto-dpif.at:1893)
ifneq (,$(filter armhf, $(DEB_HOST_ARCH)))
-TEST_LIST = 1-19 21-23 27-917 919-1020 1022-1056 1058-
+TEST_LIST = 1-19 21-23 28-917 919-1020 1022-1056 1058-
TEST_LIST_DPDK = $(TEST_LIST)
endif # armhf
@@ -197,8 +197,8 @@ override_dh_auto_install-arch:
mkdir -p $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common
mv $(CURDIR)/debian/tmp/usr/sbin/ovs-vswitchd \
$(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common/ovs-vswitchd
- mv $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.13.so.0.0.0 \
- $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common/libopenvswitch-2.13.so.0.0.0
+ mv $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.14.so.0.0.90 \
+ $(CURDIR)/debian/openvswitch-common/usr/lib/openvswitch-common/libopenvswitch-2.14.so.0.0.90
override_dh_auto_install-indep:
$(MAKE) -C _debian DESTDIR=$(CURDIR)/debian/tmp install
@@ -210,7 +210,7 @@ override_dh_auto_install-indep:
override_dh_install:
install -D -m 0644 utilities/ovs-vsctl-bashcomp.bash $(CURDIR)/debian/openvswitch-switch/usr/share/bash-completion/completions/ovs-vsctl
- dh_install --exclude=usr/sbin/ovs-vswitchd --exclude=usr/lib/`dpkg-architecture -qDEB_HOST_MULTIARCH`/libopenvswitch-2.13.so.0.0.0
+ dh_install --exclude=usr/sbin/ovs-vswitchd --exclude=usr/lib/`dpkg-architecture -qDEB_HOST_MULTIARCH`/libopenvswitch-2.14.so.0.0.90
rm -f $(CURDIR)/debian/tmp/usr/lib/*/*.la
dh_installman --language=C
@@ -218,7 +218,7 @@ override_dh_install:
# remove the files managed via update-alternatives
rm -f $(CURDIR)/debian/tmp/usr/sbin/ovs-vswitchd
- rm -f $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.13.so.0.0.0
+ rm -f $(CURDIR)/debian/tmp/usr/lib/*/libopenvswitch-2.14.so.0.0.90
dh_missing --fail-missing
# openvswitch-switch
@@ -228,8 +228,8 @@ override_dh_install:
ifneq (,$(filter i386 amd64 ppc64el arm64, $(DEB_HOST_ARCH)))
install -v -D _dpdk/vswitchd/.libs/ovs-vswitchd \
$(CURDIR)/debian/openvswitch-switch-dpdk/usr/lib/openvswitch-switch-dpdk/ovs-vswitchd-dpdk
- install -v -D _dpdk/lib/.libs/libopenvswitch-2.13.so.0.0.0 \
- $(CURDIR)/debian/openvswitch-switch-dpdk/usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.13.so.0.0.0
+ install -v -D _dpdk/lib/.libs/libopenvswitch-2.14.so.0.0.90 \
+ $(CURDIR)/debian/openvswitch-switch-dpdk/usr/lib/openvswitch-switch-dpdk/libopenvswitch-2.14.so.0.0.90
endif
override_dh_installinit:
diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h
index dc12101f208021febb5a8632eef4fc2f3bc97b2a..b68804991aa755bad241a406ae39f5492c2e8b9b 100644
--- a/include/openflow/nicira-ext.h
+++ b/include/openflow/nicira-ext.h
@@ -296,16 +296,16 @@ enum nx_packet_in2_prop_type {
*
* The other possible roles are a related pair:
*
- * - Master (NX_ROLE_MASTER) is equivalent to Other, except that there may
- * be at most one Master controller at a time: when a controller
- * configures itself as Master, any existing Master is demoted to the
- * Slave role.
+ * - Primary (NX_ROLE_PRIMARY) is equivalent to Other, except that there may
+ * be at most one Primary controller at a time: when a controller
+ * configures itself as Primary, any existing Primary is demoted to the
+ * Secondary role.
*
- * - Slave (NX_ROLE_SLAVE) allows the controller read-only access to
+ * - Secondary (NX_ROLE_SECONDARY) allows the controller read-only access to
* OpenFlow features. In particular attempts to modify the flow table
* will be rejected with an OFPBRC_EPERM error.
*
- * Slave controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED
+ * Secondary controllers do not receive OFPT_PACKET_IN or OFPT_FLOW_REMOVED
* messages, but they do receive OFPT_PORT_STATUS messages.
*/
struct nx_role_request {
@@ -315,23 +315,23 @@ OFP_ASSERT(sizeof(struct nx_role_request) == 4);
enum nx_role {
NX_ROLE_OTHER, /* Default role, full access. */
- NX_ROLE_MASTER, /* Full access, at most one. */
- NX_ROLE_SLAVE /* Read-only access. */
+ NX_ROLE_PRIMARY, /* Full access, at most one. */
+ NX_ROLE_SECONDARY /* Read-only access. */
};
/* NXT_SET_ASYNC_CONFIG.
*
* Sent by a controller, this message configures the asynchronous messages that
* the controller wants to receive. Element 0 in each array specifies messages
- * of interest when the controller has an "other" or "master" role; element 1,
- * when the controller has a "slave" role.
+ * of interest when the controller has an "other" or "primary" role; element 1,
+ * when the controller has a "secondary" role.
*
* Each array element is a bitmask in which a 0-bit disables receiving a
* particular message and a 1-bit enables receiving it. Each bit controls the
* message whose 'reason' corresponds to the bit index. For example, the bit
* with value 1<<2 == 4 in port_status_mask[1] determines whether the
* controller will receive OFPT_PORT_STATUS messages with reason OFPPR_MODIFY
- * (value 2) when the controller has a "slave" role.
+ * (value 2) when the controller has a "secondary" role.
*
* As a side effect, for service controllers, this message changes the
* miss_send_len from default of zero to OFP_DEFAULT_MISS_SEND_LEN (128).
diff --git a/include/openflow/openflow-1.2.h b/include/openflow/openflow-1.2.h
index 30e220cfba5fe4ddd931f10632af472dbec18879..2952aec14ebe7204f56495fe245ca2eb26b164b4 100644
--- a/include/openflow/openflow-1.2.h
+++ b/include/openflow/openflow-1.2.h
@@ -176,7 +176,7 @@ enum ofp12_group_capabilities {
struct ofp12_role_request {
ovs_be32 role; /* One of OFPCR12_ROLE_*. */
uint8_t pad[4]; /* Align to 64 bits. */
- ovs_be64 generation_id; /* Master Election Generation Id */
+ ovs_be64 generation_id; /* Primary Election Generation Id */
};
OFP_ASSERT(sizeof(struct ofp12_role_request) == 16);
@@ -184,8 +184,8 @@ OFP_ASSERT(sizeof(struct ofp12_role_request) == 16);
enum ofp12_controller_role {
OFPCR12_ROLE_NOCHANGE, /* Don't change current role. */
OFPCR12_ROLE_EQUAL, /* Default role, full access. */
- OFPCR12_ROLE_MASTER, /* Full access, at most one master. */
- OFPCR12_ROLE_SLAVE, /* Read-only access. */
+ OFPCR12_ROLE_PRIMARY, /* Full access, at most one primary. */
+ OFPCR12_ROLE_SECONDARY, /* Read-only access. */
};
/* Packet received on port (datapath -> controller). */
diff --git a/include/openflow/openflow-1.4.h b/include/openflow/openflow-1.4.h
index 2bfa16b632d9502b24864df0ae4562c7c358ec25..be191180b63f85084e08a79a06801a9a448afc2e 100644
--- a/include/openflow/openflow-1.4.h
+++ b/include/openflow/openflow-1.4.h
@@ -274,7 +274,7 @@ struct ofp14_role_status {
ovs_be32 role; /* One of OFPCR_ROLE_*. */
uint8_t reason; /* One of OFPCRR_*. */
uint8_t pad[3]; /* Align to 64 bits. */
- ovs_be64 generation_id; /* Master Election Generation Id */
+ ovs_be64 generation_id; /* Primary Election Generation Id */
/* Followed by a list of struct ofp14_role_prop_header */
};
@@ -282,9 +282,9 @@ OFP_ASSERT(sizeof(struct ofp14_role_status) == 16);
/* What changed about the controller role */
enum ofp14_controller_role_reason {
- OFPCRR_MASTER_REQUEST = 0, /* Another controller asked to be master. */
- OFPCRR_CONFIG = 1, /* Configuration changed on the switch. */
- OFPCRR_EXPERIMENTER = 2, /* Experimenter data changed. */
+ OFPCRR_PRIMARY_REQUEST = 0, /* Another controller asked to be primary. */
+ OFPCRR_CONFIG = 1, /* Configuration changed on the switch. */
+ OFPCRR_EXPERIMENTER = 2, /* Experimenter data changed. */
OFPCRR_N_REASONS /* Denotes number of reasons. */
};
diff --git a/include/openvswitch/automake.mk b/include/openvswitch/automake.mk
index 73c346175733725356a41e9c448fae3c7bdaabf3..1fa6d88fabbf678fb5da5f37e79ff3edd4d7f598 100644
--- a/include/openvswitch/automake.mk
+++ b/include/openvswitch/automake.mk
@@ -72,7 +72,7 @@ endif
# header file has the proper extern declaration for use with C++.
#
# Some header files don't declare any external functions, so they
-# don't really need extern "C". We only white list a couple of these
+# don't really need extern "C". We only permit a couple of these
# below, which are the ones that seem unlikely to ever declare
# external functions. For the rest, we add extern "C" anyway; it
# doesn't hurt.
diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h
index 5289a70f6eadc6b5535afc71b175b4c20df5696a..cf009f826443e892889ad450cb1044624afb7f4d 100644
--- a/include/openvswitch/compiler.h
+++ b/include/openvswitch/compiler.h
@@ -113,6 +113,8 @@
* OVS_REQUIRES OVS_REQ_RDLOCK OVS_REQ_WRLOCK
* OVS_EXCLUDED OVS_EXCLUDED OVS_EXCLUDED
*/
+
+/* Please keep OVS_CTAGS_IDENTIFIERS up-to-date in acinclude.m4. */
#define OVS_LOCKABLE __attribute__((lockable))
#define OVS_REQ_RDLOCK(...) __attribute__((shared_locks_required(__VA_ARGS__)))
#define OVS_ACQ_RDLOCK(...) __attribute__((shared_lock_function(__VA_ARGS__)))
diff --git a/include/openvswitch/flow.h b/include/openvswitch/flow.h
index 57b6c925c7bc7c00af98601e67481371f85c95cf..3054015d93c7f88f8aace192c1e9542ca29e7f96 100644
--- a/include/openvswitch/flow.h
+++ b/include/openvswitch/flow.h
@@ -27,7 +27,7 @@ extern "C" {
/* This sequence number should be incremented whenever anything involving flows
* or the wildcarding of flows changes. This will cause build assertion
* failures in places which likely need to be updated. */
-#define FLOW_WC_SEQ 41
+#define FLOW_WC_SEQ 42
/* Number of Open vSwitch extension 32-bit registers. */
#define FLOW_N_REGS 16
@@ -168,7 +168,7 @@ BUILD_ASSERT_DECL(sizeof(struct ovs_key_nsh) % sizeof(uint64_t) == 0);
/* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */
BUILD_ASSERT_DECL(offsetof(struct flow, igmp_group_ip4) + sizeof(uint32_t)
== sizeof(struct flow_tnl) + sizeof(struct ovs_key_nsh) + 300
- && FLOW_WC_SEQ == 41);
+ && FLOW_WC_SEQ == 42);
/* Incremental points at which flow classification may be performed in
* segments.
diff --git a/include/openvswitch/hmap.h b/include/openvswitch/hmap.h
index 8aea9c22db68639b513d74ec1625da2d188f7fb2..4e001cc692b53427441d1bb0183902af1255effd 100644
--- a/include/openvswitch/hmap.h
+++ b/include/openvswitch/hmap.h
@@ -136,12 +136,14 @@ struct hmap_node *hmap_random_node(const struct hmap *);
*/
#define HMAP_FOR_EACH_WITH_HASH(NODE, MEMBER, HASH, HMAP) \
for (INIT_CONTAINER(NODE, hmap_first_with_hash(HMAP, HASH), MEMBER); \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \
+ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
+ || ((NODE = NULL), false); \
ASSIGN_CONTAINER(NODE, hmap_next_with_hash(&(NODE)->MEMBER), \
MEMBER))
#define HMAP_FOR_EACH_IN_BUCKET(NODE, MEMBER, HASH, HMAP) \
for (INIT_CONTAINER(NODE, hmap_first_in_bucket(HMAP, HASH), MEMBER); \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \
+ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
+ || ((NODE = NULL), false); \
ASSIGN_CONTAINER(NODE, hmap_next_in_bucket(&(NODE)->MEMBER), MEMBER))
static inline struct hmap_node *hmap_first_with_hash(const struct hmap *,
@@ -170,7 +172,8 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *);
HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, (void) 0)
#define HMAP_FOR_EACH_INIT(NODE, MEMBER, HMAP, ...) \
for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \
+ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
+ || ((NODE = NULL), false); \
ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER))
/* Safe when NODE may be freed (not needed when NODE may be removed from the
@@ -179,7 +182,8 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *);
HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, (void) 0)
#define HMAP_FOR_EACH_SAFE_INIT(NODE, NEXT, MEMBER, HMAP, ...) \
for (INIT_CONTAINER(NODE, hmap_first(HMAP), MEMBER), __VA_ARGS__; \
- ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL) \
+ ((NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
+ || ((NODE = NULL), false) \
? INIT_CONTAINER(NEXT, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), 1 \
: 0); \
(NODE) = (NEXT))
@@ -190,7 +194,8 @@ bool hmap_contains(const struct hmap *, const struct hmap_node *);
#define HMAP_FOR_EACH_CONTINUE_INIT(NODE, MEMBER, HMAP, ...) \
for (ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER), \
__VA_ARGS__; \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \
+ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
+ || ((NODE = NULL), false); \
ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER))
static inline struct hmap_node *
@@ -211,7 +216,8 @@ hmap_pop_helper__(struct hmap *hmap, size_t *bucket) {
#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP) \
for (size_t bucket__ = 0; \
INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, &bucket__), MEMBER), \
- (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL);)
+ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) \
+ || ((NODE = NULL), false);)
static inline struct hmap_node *hmap_first(const struct hmap *);
static inline struct hmap_node *hmap_next(const struct hmap *,
diff --git a/include/openvswitch/match.h b/include/openvswitch/match.h
index eeabd5f470a75df91d3066b6dcad1204dd1645f4..2e8812048e86b83fda805e03207a7bb38cdb98ab 100644
--- a/include/openvswitch/match.h
+++ b/include/openvswitch/match.h
@@ -121,6 +121,12 @@ void match_set_tun_erspan_dir_masked(struct match *match, uint8_t dir,
void match_set_tun_erspan_hwid(struct match *match, uint8_t hwid);
void match_set_tun_erspan_hwid_masked(struct match *match, uint8_t hwid,
uint8_t mask);
+void match_set_tun_gtpu_flags(struct match *match, uint8_t flags);
+void match_set_tun_gtpu_flags_masked(struct match *match, uint8_t flags,
+ uint8_t mask);
+void match_set_tun_gtpu_msgtype(struct match *match, uint8_t msgtype);
+void match_set_tun_gtpu_msgtype_masked(struct match *match, uint8_t msgtype,
+ uint8_t mask);
void match_set_in_port(struct match *, ofp_port_t ofp_port);
void match_set_pkt_mark(struct match *, uint32_t pkt_mark);
void match_set_pkt_mark_masked(struct match *, uint32_t pkt_mark, uint32_t mask);
@@ -188,6 +194,8 @@ void match_set_tp_dst_masked(struct match *, ovs_be16 port, ovs_be16 mask);
void match_set_tcp_flags(struct match *, ovs_be16);
void match_set_tcp_flags_masked(struct match *, ovs_be16 flags, ovs_be16 mask);
void match_set_nw_proto(struct match *, uint8_t);
+void match_set_nw_proto_masked(struct match *match,
+ const uint8_t nw_proto, const uint8_t mask);
void match_set_nw_src(struct match *, ovs_be32);
void match_set_nw_src_masked(struct match *, ovs_be32 ip, ovs_be32 mask);
void match_set_nw_dst(struct match *, ovs_be32);
@@ -201,6 +209,9 @@ void match_set_nw_frag(struct match *, uint8_t nw_frag);
void match_set_nw_frag_masked(struct match *, uint8_t nw_frag, uint8_t mask);
void match_set_icmp_type(struct match *, uint8_t);
void match_set_icmp_code(struct match *, uint8_t);
+void match_set_arp_opcode_masked(struct match *match,
+ const uint8_t opcode,
+ const uint8_t mask);
void match_set_arp_sha(struct match *, const struct eth_addr);
void match_set_arp_sha_masked(struct match *,
const struct eth_addr arp_sha,
@@ -209,6 +220,12 @@ void match_set_arp_tha(struct match *, const struct eth_addr);
void match_set_arp_tha_masked(struct match *,
const struct eth_addr arp_tha,
const struct eth_addr mask);
+void match_set_arp_spa_masked(struct match *match,
+ const ovs_be32 arp_spa,
+ const ovs_be32 mask);
+void match_set_arp_tpa_masked(struct match *match,
+ const ovs_be32 arp_tpa,
+ const ovs_be32 mask);
void match_set_ipv6_src(struct match *, const struct in6_addr *);
void match_set_ipv6_src_masked(struct match *, const struct in6_addr *,
const struct in6_addr *);
diff --git a/include/openvswitch/meta-flow.h b/include/openvswitch/meta-flow.h
index 1f81d830e70f36fa1e60a4fa3291225f19743ef2..95e52e3587eec7ab7d007922feabc9505f5479c3 100644
--- a/include/openvswitch/meta-flow.h
+++ b/include/openvswitch/meta-flow.h
@@ -506,6 +506,34 @@ enum OVS_PACKED_ENUM mf_field_id {
*/
MFF_TUN_ERSPAN_HWID,
+ /* "tun_gtpu_flags".
+ *
+ * GTP-U tunnel flags.
+ *
+ * Type: u8.
+ * Maskable: bitwise.
+ * Formatting: hexadecimal.
+ * Prerequisites: none.
+ * Access: read-only.
+ * NXM: none.
+ * OXM: NXOXM_ET_GTPU_FLAGS(15) since v2.13.
+ */
+ MFF_TUN_GTPU_FLAGS,
+
+ /* "tun_gtpu_msgtype".
+ *
+ * GTP-U tunnel message type.
+ *
+ * Type: u8.
+ * Maskable: bitwise.
+ * Formatting: decimal.
+ * Prerequisites: none.
+ * Access: read-only.
+ * NXM: none.
+ * OXM: NXOXM_ET_GTPU_MSGTYPE(16) since v2.13.
+ */
+ MFF_TUN_GTPU_MSGTYPE,
+
#if TUN_METADATA_NUM_OPTS == 64
/* "tun_metadata".
*
@@ -824,7 +852,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_nw_proto".
*
* The "protocol" byte in the IPv4 or IPv6 header for the original
- * direction conntrack tuple, or of the master conntrack entry, if the
+ * direction conntrack tuple, or of the parent conntrack entry, if the
* current connection is a related connection.
*
* The value is initially zero and populated by the CT action. The value
@@ -845,7 +873,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_nw_src".
*
* IPv4 source address of the original direction tuple of the conntrack
- * entry, or of the master conntrack entry, if the current connection is a
+ * entry, or of the parent conntrack entry, if the current connection is a
* related connection.
*
* The value is populated by the CT action.
@@ -864,7 +892,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_nw_dst".
*
* IPv4 destination address of the original direction tuple of the
- * conntrack entry, or of the master conntrack entry, if the current
+ * conntrack entry, or of the parent conntrack entry, if the current
* connection is a related connection.
*
* The value is populated by the CT action.
@@ -883,7 +911,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_ipv6_src".
*
* IPv6 source address of the original direction tuple of the conntrack
- * entry, or of the master conntrack entry, if the current connection is a
+ * entry, or of the parent conntrack entry, if the current connection is a
* related connection.
*
* The value is populated by the CT action.
@@ -902,7 +930,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_ipv6_dst".
*
* IPv6 destination address of the original direction tuple of the
- * conntrack entry, or of the master conntrack entry, if the current
+ * conntrack entry, or of the parent conntrack entry, if the current
* connection is a related connection.
*
* The value is populated by the CT action.
@@ -921,7 +949,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_tp_src".
*
* Transport layer source port of the original direction tuple of the
- * conntrack entry, or of the master conntrack entry, if the current
+ * conntrack entry, or of the parent conntrack entry, if the current
* connection is a related connection.
*
* The value is populated by the CT action.
@@ -939,7 +967,7 @@ enum OVS_PACKED_ENUM mf_field_id {
/* "ct_tp_dst".
*
* Transport layer destination port of the original direction tuple of the
- * conntrack entry, or of the master conntrack entry, if the current
+ * conntrack entry, or of the parent conntrack entry, if the current
* connection is a related connection.
*
* The value is populated by the CT action.
diff --git a/include/openvswitch/ofp-actions.h b/include/openvswitch/ofp-actions.h
index c8948e0d694f6702bbe26444d894cd3637b26f14..41bcb55d205655b3ae79dc9172e40bcd1eb0eaf7 100644
--- a/include/openvswitch/ofp-actions.h
+++ b/include/openvswitch/ofp-actions.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2013, 2014, 2015, 2016, 2017, 2019 Nicira, Inc.
+ * Copyright (c) 2012-2017, 2019-2020 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ struct vl_mff_map;
OFPACT(CONTROLLER, ofpact_controller, userdata, "controller") \
OFPACT(ENQUEUE, ofpact_enqueue, ofpact, "enqueue") \
OFPACT(OUTPUT_REG, ofpact_output_reg, ofpact, "output_reg") \
- OFPACT(BUNDLE, ofpact_bundle, slaves, "bundle") \
+ OFPACT(BUNDLE, ofpact_bundle, members, "bundle") \
\
/* Header changes. */ \
OFPACT(SET_FIELD, ofpact_set_field, ofpact, "set_field") \
@@ -94,6 +94,7 @@ struct vl_mff_map;
OFPACT(PUSH_MPLS, ofpact_push_mpls, ofpact, "push_mpls") \
OFPACT(POP_MPLS, ofpact_pop_mpls, ofpact, "pop_mpls") \
OFPACT(DEC_NSH_TTL, ofpact_null, ofpact, "dec_nsh_ttl") \
+ OFPACT(DELETE_FIELD, ofpact_delete_field, ofpact, "delete_field") \
\
/* Generic encap & decap */ \
OFPACT(ENCAP, ofpact_encap, props, "encap") \
@@ -363,24 +364,24 @@ struct ofpact_output_trunc {
);
};
-/* Bundle slave choice algorithm to apply.
+/* Bundle member choice algorithm to apply.
*
- * In the descriptions below, 'slaves' is the list of possible slaves in the
+ * In the descriptions below, 'members' is the list of possible members in the
* order they appear in the OpenFlow action. */
enum nx_bd_algorithm {
- /* Chooses the first live slave listed in the bundle.
+ /* Chooses the first live member listed in the bundle.
*
- * O(n_slaves) performance. */
+ * O(n_members) performance. */
NX_BD_ALG_ACTIVE_BACKUP = 0,
/* Highest Random Weight.
*
- * for i in [0,n_slaves):
+ * for i in [0,n_members):
* weights[i] = hash(flow, i)
- * slave = { slaves[i] such that weights[i] >= weights[j] for all j != i }
+ * member = { members[i] such that weights[i] >= weights[j] for all j != i }
*
- * Redistributes 1/n_slaves of traffic when a slave's liveness changes.
- * O(n_slaves) performance.
+ * Redistributes 1/n_members of traffic when a member's liveness changes.
+ * O(n_members) performance.
*
* Uses the 'fields' and 'basis' parameters. */
NX_BD_ALG_HRW = 1
@@ -393,7 +394,7 @@ struct ofpact_bundle {
OFPACT_PADDED_MEMBERS(
struct ofpact ofpact;
- /* Slave choice algorithm to apply to hash value. */
+ /* Member choice algorithm to apply to hash value. */
enum nx_bd_algorithm algorithm;
/* What fields to hash and how. */
@@ -402,10 +403,12 @@ struct ofpact_bundle {
struct mf_subfield dst;
- /* Slaves for output. */
- unsigned int n_slaves;
+ bool compat_syntax;
+
+ /* Members for output. */
+ unsigned int n_members;
);
- ofp_port_t slaves[];
+ ofp_port_t members[];
};
/* OFPACT_SET_VLAN_VID.
@@ -576,6 +579,16 @@ struct ofpact_pop_mpls {
);
};
+/* OFPACT_DELETE_FIELD.
+ *
+ * Used for NXAST_DELETE_FIELD. */
+struct ofpact_delete_field {
+ OFPACT_PADDED_MEMBERS(
+ struct ofpact ofpact;
+ const struct mf_field *field;
+ );
+};
+
/* OFPACT_SET_TUNNEL.
*
* Used for NXAST_SET_TUNNEL, NXAST_SET_TUNNEL64. */
diff --git a/include/openvswitch/ofp-connection.h b/include/openvswitch/ofp-connection.h
index 5fb143157d2341996ec6f18470467487f28762c4..1e844e07f39d81614df78575169bf083e6f31b39 100644
--- a/include/openvswitch/ofp-connection.h
+++ b/include/openvswitch/ofp-connection.h
@@ -69,10 +69,10 @@ enum ofputil_async_msg_type {
const char *ofputil_async_msg_type_to_string(enum ofputil_async_msg_type);
struct ofputil_async_cfg {
- uint32_t master[OAM_N_TYPES];
- uint32_t slave[OAM_N_TYPES];
+ uint32_t primary[OAM_N_TYPES];
+ uint32_t secondary[OAM_N_TYPES];
};
-#define OFPUTIL_ASYNC_CFG_INIT (struct ofputil_async_cfg) { .master[0] = 0 }
+#define OFPUTIL_ASYNC_CFG_INIT (struct ofputil_async_cfg) { .primary[0] = 0 }
enum ofperr ofputil_decode_set_async_config(const struct ofp_header *,
bool loose,
diff --git a/include/openvswitch/ofp-errors.h b/include/openvswitch/ofp-errors.h
index a3f8142dfb1af6e555e9dd01303903bb67759814..8c8511d65e4a7f7b1eae1e4092bfbc85f9123621 100644
--- a/include/openvswitch/ofp-errors.h
+++ b/include/openvswitch/ofp-errors.h
@@ -115,10 +115,10 @@ enum ofperr {
* OFPBIC_BAD_EXP_TYPE. */
/* Expected: 0x0,1,5 in OF1.0 means both OFPBRC_EPERM and
- * OFPBRC_IS_SLAVE. */
+ * OFPBRC_IS_SECONDARY. */
/* Expected: 0x0,1,5 in OF1.1 means both OFPBRC_EPERM and
- * OFPBRC_IS_SLAVE. */
+ * OFPBRC_IS_SECONDARY. */
/* ## ------------------ ## */
/* ## OFPET_HELLO_FAILED ## */
@@ -168,8 +168,9 @@ enum ofperr {
* code defined the specification. ] */
OFPERR_OFPBRC_BAD_TABLE_ID,
- /* OF1.0-1.1(1,5), OF1.2+(1,10). Denied because controller is slave. */
- OFPERR_OFPBRC_IS_SLAVE,
+ /* OF1.0-1.1(1,5), OF1.2+(1,10). Denied because controller has secondary
+ * role. (Secondary controllers have only read-only access.) */
+ OFPERR_OFPBRC_IS_SECONDARY,
/* NX1.0-1.1(1,514), OF1.2+(1,11). Invalid or missing port. [ A
* non-standard error (1,514), formerly OFPERR_NXBRC_BAD_IN_PORT is used
diff --git a/include/openvswitch/packets.h b/include/openvswitch/packets.h
index 925844edae6a6d328d565aea9b46cae6869090ea..a65cb0d04e770a024a57324b98e77c944083d780 100644
--- a/include/openvswitch/packets.h
+++ b/include/openvswitch/packets.h
@@ -43,7 +43,9 @@ struct flow_tnl {
uint32_t erspan_idx;
uint8_t erspan_dir;
uint8_t erspan_hwid;
- uint8_t pad1[6]; /* Pad to 64 bits. */
+ uint8_t gtpu_flags;
+ uint8_t gtpu_msgtype;
+ uint8_t pad1[4]; /* Pad to 64 bits. */
struct tun_metadata metadata;
};
diff --git a/include/openvswitch/util.h b/include/openvswitch/util.h
index 9189e6480b1c625f2659f061e2e3ec36abeb98c3..228b185c3a5f5bd595ae516f94ac239db3be725a 100644
--- a/include/openvswitch/util.h
+++ b/include/openvswitch/util.h
@@ -85,6 +85,8 @@ OVS_NO_RETURN void ovs_assert_failure(const char *, const char *, const char *);
* assigned to OBJECT. */
#ifdef __GNUC__
#define OVS_TYPEOF(OBJECT) typeof(OBJECT)
+#elif defined (__cplusplus)
+#define OVS_TYPEOF(OBJECT) decltype(OBJECT)
#else
#define OVS_TYPEOF(OBJECT) void *
#endif
diff --git a/include/openvswitch/vlog.h b/include/openvswitch/vlog.h
index 19da4ab62320083c61de9f99246ccc3994a56b13..886fce5e0fd5169faf54fcfbe1f8b93d910aaba0 100644
--- a/include/openvswitch/vlog.h
+++ b/include/openvswitch/vlog.h
@@ -143,6 +143,9 @@ void vlog_set_syslog_method(const char *method);
/* Configure syslog target. */
void vlog_set_syslog_target(const char *target);
+/* Write directly to log file. */
+void vlog_direct_write_to_log_file_unsafe(const char *s);
+
/* Initialization. */
void vlog_init(void);
void vlog_enable_async(void);
diff --git a/include/sparse/automake.mk b/include/sparse/automake.mk
index 974ad3fe55f7fff498be15b60d797f1754a330fa..e966371192baa6fdf72b041b7c5178cef4438af7 100644
--- a/include/sparse/automake.mk
+++ b/include/sparse/automake.mk
@@ -11,7 +11,9 @@ noinst_HEADERS += \
include/sparse/netpacket/packet.h \
include/sparse/pthread.h \
include/sparse/rte_atomic.h \
+ include/sparse/rte_mbuf.h \
include/sparse/rte_memcpy.h \
+ include/sparse/rte_trace_point.h \
include/sparse/sys/socket.h \
include/sparse/sys/sysmacros.h \
include/sparse/sys/types.h \
diff --git a/include/sparse/rte_byteorder.h b/include/sparse/rte_byteorder.h
index d32b5e69150fcba3ca0c6aa3bfb8c0416658b632..72cacac8945aac0734384a90b29a36a92e89e2ca 100644
--- a/include/sparse/rte_byteorder.h
+++ b/include/sparse/rte_byteorder.h
@@ -49,7 +49,7 @@
#include "openvswitch/types.h"
#include
-#ifdef RTE_EXEC_ENV_BSDAPP
+#ifdef RTE_EXEC_ENV_FREEBSD
#include
#else
#include
@@ -127,9 +127,9 @@
#define RTE_BE16(v) (OVS_FORCE rte_be16_t)(RTE_STATIC_BSWAP16(v))
#define RTE_BE32(v) (OVS_FORCE rte_be32_t)(RTE_STATIC_BSWAP32(v))
#define RTE_BE64(v) (OVS_FORCE rte_be64_t)(RTE_STATIC_BSWAP64(v))
-#define RTE_LE16(v) (OVS_FORCE rte_be16_t)(v)
-#define RTE_LE32(v) (OVS_FORCE rte_be32_t)(v)
-#define RTE_LE64(v) (OVS_FORCE rte_be64_t)(v)
+#define RTE_LE16(v) (OVS_FORCE rte_le16_t)(v)
+#define RTE_LE32(v) (OVS_FORCE rte_le32_t)(v)
+#define RTE_LE64(v) (OVS_FORCE rte_le64_t)(v)
#else
#error Unsupported endianness.
#endif
diff --git a/include/sparse/rte_mbuf.h b/include/sparse/rte_mbuf.h
new file mode 100644
index 0000000000000000000000000000000000000000..981cdb441f085889ce6f9ba17dbdfdd2974c0764
--- /dev/null
+++ b/include/sparse/rte_mbuf.h
@@ -0,0 +1,29 @@
+/* Copyright (c) 2020 Intel, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECKER__
+#error "Use this header only with sparse. It is not a correct implementation."
+#endif
+
+/* sparse doesn't know about gcc atomic builtins. */
+#ifndef __ATOMIC_ACQ_REL
+#define __ATOMIC_ACQ_REL 0
+#define __ATOMIC_RELAXED 1
+#define __atomic_add_fetch(p, val, memorder) (*(p) = *(p) + (val))
+#define __atomic_store_n(p, val, memorder) (*(p) = (val))
+#endif
+
+/* Get actual definitions for us to annotate and build on. */
+#include_next
diff --git a/include/sparse/rte_trace_point.h b/include/sparse/rte_trace_point.h
new file mode 100644
index 0000000000000000000000000000000000000000..80392327543f03d5cde2b9c1686108bf80f1eccd
--- /dev/null
+++ b/include/sparse/rte_trace_point.h
@@ -0,0 +1,28 @@
+/* Copyright 2020, Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __CHECKER__
+#error "Use this header only with sparse. It is not a correct implementation."
+#endif
+
+/* sparse doesn't know about gcc atomic builtins. */
+#ifndef __ATOMIC_ACQUIRE
+#define __ATOMIC_ACQUIRE 0
+#define __atomic_load_n(p, memorder) *(p)
+#endif
+
+/* Get actual definitions for us to annotate and
+ * build on. */
+#include_next
diff --git a/ipsec/ovs-monitor-ipsec.in b/ipsec/ovs-monitor-ipsec.in
index 37e370324562426f648f17c63b134983dd74a5b9..b84608a55d8a0f8b63f2335ef866182b4e83974a 100755
--- a/ipsec/ovs-monitor-ipsec.in
+++ b/ipsec/ovs-monitor-ipsec.in
@@ -101,7 +101,7 @@ class XFRM(object):
proc = subprocess.Popen([self.IP, 'xfrm', 'policy'],
stdout=subprocess.PIPE)
while True:
- line = proc.stdout.readline().strip()
+ line = proc.stdout.readline().strip().decode()
if line == '':
break
a = line.split(" ")
@@ -124,7 +124,7 @@ class XFRM(object):
proc = subprocess.Popen([self.IP, 'xfrm', 'state'],
stdout=subprocess.PIPE)
while True:
- line = proc.stdout.readline().strip()
+ line = proc.stdout.readline().strip().decode()
if line == '':
break
a = line.split(" ")
@@ -145,10 +145,18 @@ class StrongSwanHelper(object):
"""This class does StrongSwan specific configurations."""
STRONGSWAN_CONF = """%s
-charon.plugins.kernel-netlink.set_proto_port_transport_sa = yes
-charon.plugins.kernel-netlink.xfrm_ack_expires = 10
-charon.load_modular = yes
-charon.plugins.gcm.load = yes
+charon {
+ plugins {
+ kernel-netlink {
+ set_proto_port_transport_sa = yes
+ xfrm_ack_expires = 10
+ }
+ gcm {
+ load = yes
+ }
+ }
+ load_modular = yes
+}
""" % (FILE_HEADER)
CONF_HEADER = """%s
@@ -246,7 +254,7 @@ conn prevent_unencrypted_vxlan
proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE)
while True:
- line = proc.stdout.readline().strip()
+ line = proc.stdout.readline().strip().decode()
if line == '':
break
tunnel_name = line.split(":")
@@ -340,7 +348,7 @@ conn prevent_unencrypted_vxlan
# about possibility of ovs-monitor-ipsec to block for each tunnel
# while strongSwan sends IKE messages over Internet.
conns_dict = self.get_active_conns()
- for ifname, conns in conns_dict.iteritems():
+ for ifname, conns in conns_dict.items():
tunnel = monitor.tunnels.get(ifname)
for conn in conns:
# IPsec "connection" names that we choose in strongswan
@@ -536,7 +544,7 @@ conn prevent_unencrypted_vxlan
# Delete old connections
conns_dict = self.get_active_conns()
- for ifname, conns in conns_dict.iteritems():
+ for ifname, conns in conns_dict.items():
tunnel = monitor.tunnels.get(ifname)
for conn in conns:
@@ -608,7 +616,7 @@ conn prevent_unencrypted_vxlan
proc = subprocess.Popen([self.IPSEC, 'status'], stdout=subprocess.PIPE)
while True:
- line = proc.stdout.readline().strip()
+ line = proc.stdout.readline().strip().decode()
if line == '':
break
@@ -989,7 +997,7 @@ class IPsecMonitor(object):
skb_mark = None
is_valid = False
- for row in data["Open_vSwitch"].rows.itervalues():
+ for row in data["Open_vSwitch"].rows.values():
pki[0] = row.other_config.get("certificate")
pki[1] = row.other_config.get("private_key")
pki[2] = row.other_config.get("ca_cert")
@@ -1016,7 +1024,7 @@ class IPsecMonitor(object):
table."""
ifaces = set()
- for row in data["Interface"].rows.itervalues():
+ for row in data["Interface"].rows.values():
if not self.is_tunneling_type_supported(row.type):
continue
if not self.is_ipsec_required(row.options):
@@ -1047,7 +1055,7 @@ class IPsecMonitor(object):
return
s = ""
conns = self.ike_helper.get_active_conns()
- for name, tunnel in self.tunnels.iteritems():
+ for name, tunnel in self.tunnels.items():
s += tunnel.show(policies, securities, conns)
unix_conn.reply(s)
@@ -1064,7 +1072,7 @@ class IPsecMonitor(object):
if self.ike_helper.config_global(self):
needs_refresh = True
- for name, tunnel in self.tunnels.iteritems():
+ for name, tunnel in self.tunnels.items():
if tunnel.last_refreshed_version != tunnel.version:
tunnel.last_refreshed_version = tunnel.version
needs_refresh = True
@@ -1094,7 +1102,7 @@ class IPsecMonitor(object):
proc.wait()
if proc.returncode:
raise Exception(proc.stderr.read())
- m = re.search(r"CN=(.+?),", proc.stdout.readline())
+ m = re.search(r"CN=(.+?),", proc.stdout.readline().decode())
if not m:
raise Exception("No CN in the certificate subject.")
except Exception as e:
diff --git a/lib/automake.mk b/lib/automake.mk
index 95925b57c35134bd063a1c71feda5b5105e0a3ca..380a672287ac026f73662afd02b41938d81500c9 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -11,6 +11,7 @@ lib_libopenvswitch_la_LIBADD = $(SSL_LIBS)
lib_libopenvswitch_la_LIBADD += $(CAPNG_LDADD)
lib_libopenvswitch_la_LIBADD += $(LIBBPF_LDADD)
+
if WIN32
lib_libopenvswitch_la_LIBADD += ${PTHREAD_LIBS}
endif
@@ -20,6 +21,29 @@ lib_libopenvswitch_la_LDFLAGS = \
-Wl,--version-script=$(top_builddir)/lib/libopenvswitch.sym \
$(AM_LDFLAGS)
+if HAVE_AVX512F
+if HAVE_LD_AVX512_GOOD
+# Build library of avx512 code with CPU ISA CFLAGS enabled. This allows the
+# compiler to use the ISA features required for the ISA optimized code-paths.
+# Use LDFLAGS to compile only static library of this code, as it should be
+# statically linked into vswitchd even if vswitchd is a shared build.
+lib_LTLIBRARIES += lib/libopenvswitchavx512.la
+lib_libopenvswitch_la_LIBADD += lib/libopenvswitchavx512.la
+lib_libopenvswitchavx512_la_CFLAGS = \
+ -mavx512f \
+ -mavx512bw \
+ -mavx512dq \
+ -mbmi2 \
+ -fPIC \
+ $(AM_CFLAGS)
+lib_libopenvswitchavx512_la_SOURCES = \
+ lib/dpif-netdev-lookup-avx512-gather.c
+lib_libopenvswitchavx512_la_LDFLAGS = \
+ -static
+endif
+endif
+
+# Build core vswitch libraries as before
lib_libopenvswitch_la_SOURCES = \
lib/aes128.c \
lib/aes128.h \
@@ -53,6 +77,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/conntrack-icmp.c \
lib/conntrack-private.h \
lib/conntrack-tcp.c \
+ lib/conntrack-tp.c \
+ lib/conntrack-tp.h \
lib/conntrack-other.c \
lib/conntrack.c \
lib/conntrack.h \
@@ -79,6 +105,9 @@ lib_libopenvswitch_la_SOURCES = \
lib/dp-packet.h \
lib/dp-packet.c \
lib/dpdk.h \
+ lib/dpif-netdev-lookup.h \
+ lib/dpif-netdev-lookup.c \
+ lib/dpif-netdev-lookup-autovalidator.c \
lib/dpif-netdev-lookup-generic.c \
lib/dpif-netdev.c \
lib/dpif-netdev.h \
@@ -517,6 +546,7 @@ MAN_FRAGMENTS += \
lib/daemon-syn.man \
lib/db-ctl-base.man \
lib/dpctl.man \
+ lib/dpdk-unixctl.man \
lib/memory-unixctl.man \
lib/netdev-dpdk-unixctl.man \
lib/dpif-netdev-unixctl.man \
diff --git a/lib/bfd.c b/lib/bfd.c
index cc8c6857afa4c778a7daab7f2e7a321612ce9fe5..3c965699ace31d15a3feb3e9457e6d87bfb739ed 100644
--- a/lib/bfd.c
+++ b/lib/bfd.c
@@ -149,6 +149,9 @@ BUILD_ASSERT_DECL(BFD_PACKET_LEN == sizeof(struct msg));
#define FLAGS_MASK 0x3f
#define DEFAULT_MULT 3
+#define BFD_DEFAULT_SRC_IP 0xA9FE0101 /* 169.254.1.1 */
+#define BFD_DEFAULT_DST_IP 0xA9FE0100 /* 169.254.1.0 */
+
struct bfd {
struct hmap_node node; /* In 'all_bfds'. */
uint32_t disc; /* bfd.LocalDiscr. Key in 'all_bfds' hmap. */
@@ -457,9 +460,9 @@ bfd_configure(struct bfd *bfd, const char *name, const struct smap *cfg,
&bfd->rmt_eth_dst);
bfd_lookup_ip(smap_get_def(cfg, "bfd_src_ip", ""),
- htonl(0xA9FE0101) /* 169.254.1.1 */, &bfd->ip_src);
+ htonl(BFD_DEFAULT_SRC_IP), &bfd->ip_src);
bfd_lookup_ip(smap_get_def(cfg, "bfd_dst_ip", ""),
- htonl(0xA9FE0100) /* 169.254.1.0 */, &bfd->ip_dst);
+ htonl(BFD_DEFAULT_DST_IP), &bfd->ip_dst);
forwarding_if_rx = smap_get_bool(cfg, "forwarding_if_rx", false);
if (bfd->forwarding_if_rx != forwarding_if_rx) {
@@ -674,7 +677,14 @@ bfd_should_process_flow(const struct bfd *bfd_, const struct flow *flow,
memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
if (flow->nw_proto == IPPROTO_UDP
&& !(flow->nw_frag & FLOW_NW_FRAG_LATER)
- && tp_dst_equals(flow, BFD_DEST_PORT, wc)) {
+ && tp_dst_equals(flow, BFD_DEST_PORT, wc)
+ && (bfd->ip_src == htonl(BFD_DEFAULT_SRC_IP)
+ || bfd->ip_src == flow->nw_dst)) {
+
+ if (bfd->ip_src == flow->nw_dst) {
+ memset(&wc->masks.nw_dst, 0xffffffff, sizeof wc->masks.nw_dst);
+ }
+
bool check_tnl_key;
atomic_read_relaxed(&bfd->check_tnl_key, &check_tnl_key);
diff --git a/lib/bundle.c b/lib/bundle.c
index edb11f6bebfa4a7da7eb0407063f289b8467e38a..d728380ec00529281fb5d2fb816ccaefe35968cb 100644
--- a/lib/bundle.c
+++ b/lib/bundle.c
@@ -39,14 +39,14 @@ VLOG_DEFINE_THIS_MODULE(bundle);
static ofp_port_t
execute_ab(const struct ofpact_bundle *bundle,
- bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux)
+ bool (*member_enabled)(ofp_port_t ofp_port, void *aux), void *aux)
{
size_t i;
- for (i = 0; i < bundle->n_slaves; i++) {
- ofp_port_t slave = bundle->slaves[i];
- if (slave_enabled(slave, aux)) {
- return slave;
+ for (i = 0; i < bundle->n_members; i++) {
+ ofp_port_t member = bundle->members[i];
+ if (member_enabled(member, aux)) {
+ return member;
}
}
@@ -56,12 +56,12 @@ execute_ab(const struct ofpact_bundle *bundle,
static ofp_port_t
execute_hrw(const struct ofpact_bundle *bundle,
const struct flow *flow, struct flow_wildcards *wc,
- bool (*slave_enabled)(ofp_port_t ofp_port, void *aux), void *aux)
+ bool (*member_enabled)(ofp_port_t ofp_port, void *aux), void *aux)
{
uint32_t flow_hash, best_hash;
int best, i;
- if (bundle->n_slaves > 1) {
+ if (bundle->n_members > 1) {
flow_mask_hash_fields(flow, wc, bundle->fields);
}
@@ -69,8 +69,8 @@ execute_hrw(const struct ofpact_bundle *bundle,
best = -1;
best_hash = 0;
- for (i = 0; i < bundle->n_slaves; i++) {
- if (slave_enabled(bundle->slaves[i], aux)) {
+ for (i = 0; i < bundle->n_members; i++) {
+ if (member_enabled(bundle->members[i], aux)) {
uint32_t hash = hash_2words(i, flow_hash);
if (best < 0 || hash > best_hash) {
@@ -80,25 +80,25 @@ execute_hrw(const struct ofpact_bundle *bundle,
}
}
- return best >= 0 ? bundle->slaves[best] : OFPP_NONE;
+ return best >= 0 ? bundle->members[best] : OFPP_NONE;
}
/* Executes 'bundle' on 'flow'. Sets fields in 'wc' that were used to
- * calculate the result. Uses 'slave_enabled' to determine if the slave
- * designated by 'ofp_port' is up. Returns the chosen slave, or
- * OFPP_NONE if none of the slaves are acceptable. */
+ * calculate the result. Uses 'member_enabled' to determine if the member
+ * designated by 'ofp_port' is up. Returns the chosen member, or
+ * OFPP_NONE if none of the members are acceptable. */
ofp_port_t
bundle_execute(const struct ofpact_bundle *bundle,
const struct flow *flow, struct flow_wildcards *wc,
- bool (*slave_enabled)(ofp_port_t ofp_port, void *aux),
+ bool (*member_enabled)(ofp_port_t ofp_port, void *aux),
void *aux)
{
switch (bundle->algorithm) {
case NX_BD_ALG_HRW:
- return execute_hrw(bundle, flow, wc, slave_enabled, aux);
+ return execute_hrw(bundle, flow, wc, member_enabled, aux);
case NX_BD_ALG_ACTIVE_BACKUP:
- return execute_ab(bundle, slave_enabled, aux);
+ return execute_ab(bundle, member_enabled, aux);
default:
OVS_NOT_REACHED();
@@ -119,21 +119,21 @@ bundle_check(const struct ofpact_bundle *bundle, ofp_port_t max_ports,
}
}
- for (i = 0; i < bundle->n_slaves; i++) {
- ofp_port_t ofp_port = bundle->slaves[i];
+ for (i = 0; i < bundle->n_members; i++) {
+ ofp_port_t ofp_port = bundle->members[i];
if (ofp_port != OFPP_NONE) {
enum ofperr error = ofpact_check_output_port(ofp_port, max_ports);
if (error) {
- VLOG_WARN_RL(&rl, "invalid slave %"PRIu32, ofp_port);
+ VLOG_WARN_RL(&rl, "invalid member %"PRIu32, ofp_port);
return error;
}
}
- /* Controller slaves are unsupported due to the lack of a max_len
+ /* Controller members are unsupported due to the lack of a max_len
* argument. This may or may not change in the future. There doesn't
* seem to be a real-world use-case for supporting it. */
if (ofp_port == OFPP_CONTROLLER) {
- VLOG_WARN_RL(&rl, "unsupported controller slave");
+ VLOG_WARN_RL(&rl, "unsupported controller member");
return OFPERR_OFPBAC_BAD_OUT_PORT;
}
}
@@ -150,38 +150,39 @@ static char * OVS_WARN_UNUSED_RESULT
bundle_parse__(const char *s, const struct ofputil_port_map *port_map,
char **save_ptr,
const char *fields, const char *basis, const char *algorithm,
- const char *slave_type, const char *dst,
- const char *slave_delim, struct ofpbuf *ofpacts)
+ const char *member_type, const char *dst,
+ const char *member_delim, struct ofpbuf *ofpacts)
{
struct ofpact_bundle *bundle;
- if (!slave_delim) {
+ if (!member_delim) {
return xasprintf("%s: not enough arguments to bundle action", s);
}
- if (strcasecmp(slave_delim, "slaves")) {
- return xasprintf("%s: missing slave delimiter, expected `slaves' "
- "got `%s'", s, slave_delim);
+ if (strcasecmp(member_delim, "members")
+ && strcasecmp(member_delim, "slaves")) {
+ return xasprintf("%s: missing member delimiter, expected `members', "
+ "got `%s'", s, member_delim);
}
bundle = ofpact_put_BUNDLE(ofpacts);
for (;;) {
- ofp_port_t slave_port;
- char *slave;
+ ofp_port_t member_port;
+ char *member;
- slave = strtok_r(NULL, ", []", save_ptr);
- if (!slave || bundle->n_slaves >= BUNDLE_MAX_SLAVES) {
+ member = strtok_r(NULL, ", []", save_ptr);
+ if (!member || bundle->n_members >= BUNDLE_MAX_MEMBERS) {
break;
}
- if (!ofputil_port_from_string(slave, port_map, &slave_port)) {
- return xasprintf("%s: bad port number", slave);
+ if (!ofputil_port_from_string(member, port_map, &member_port)) {
+ return xasprintf("%s: bad port number", member);
}
- ofpbuf_put(ofpacts, &slave_port, sizeof slave_port);
+ ofpbuf_put(ofpacts, &member_port, sizeof member_port);
bundle = ofpacts->header;
- bundle->n_slaves++;
+ bundle->n_members++;
}
if (ofpbuf_oversized(ofpacts)) {
@@ -217,8 +218,8 @@ bundle_parse__(const char *s, const struct ofputil_port_map *port_map,
return xasprintf("%s: unknown algorithm `%s'", s, algorithm);
}
- if (strcasecmp(slave_type, "ofport")) {
- return xasprintf("%s: unknown slave_type `%s'", s, slave_type);
+ if (strcasecmp(member_type, "ofport")) {
+ return xasprintf("%s: unknown member_type `%s'", s, member_type);
}
if (dst) {
@@ -245,7 +246,7 @@ char * OVS_WARN_UNUSED_RESULT
bundle_parse(const char *s, const struct ofputil_port_map *port_map,
struct ofpbuf *ofpacts)
{
- char *fields, *basis, *algorithm, *slave_type, *slave_delim;
+ char *fields, *basis, *algorithm, *member_type, *member_delim;
char *tokstr, *save_ptr;
char *error;
@@ -254,12 +255,12 @@ bundle_parse(const char *s, const struct ofputil_port_map *port_map,
fields = strtok_r(tokstr, ", ", &save_ptr);
basis = strtok_r(NULL, ", ", &save_ptr);
algorithm = strtok_r(NULL, ", ", &save_ptr);
- slave_type = strtok_r(NULL, ", ", &save_ptr);
- slave_delim = strtok_r(NULL, ": ", &save_ptr);
+ member_type = strtok_r(NULL, ", ", &save_ptr);
+ member_delim = strtok_r(NULL, ": ", &save_ptr);
error = bundle_parse__(s, port_map,
- &save_ptr, fields, basis, algorithm, slave_type,
- NULL, slave_delim, ofpacts);
+ &save_ptr, fields, basis, algorithm, member_type,
+ NULL, member_delim, ofpacts);
free(tokstr);
return error;
@@ -274,7 +275,7 @@ char * OVS_WARN_UNUSED_RESULT
bundle_parse_load(const char *s, const struct ofputil_port_map *port_map,
struct ofpbuf *ofpacts)
{
- char *fields, *basis, *algorithm, *slave_type, *dst, *slave_delim;
+ char *fields, *basis, *algorithm, *member_type, *dst, *member_delim;
char *tokstr, *save_ptr;
char *error;
@@ -283,13 +284,13 @@ bundle_parse_load(const char *s, const struct ofputil_port_map *port_map,
fields = strtok_r(tokstr, ", ", &save_ptr);
basis = strtok_r(NULL, ", ", &save_ptr);
algorithm = strtok_r(NULL, ", ", &save_ptr);
- slave_type = strtok_r(NULL, ", ", &save_ptr);
+ member_type = strtok_r(NULL, ", ", &save_ptr);
dst = strtok_r(NULL, ", ", &save_ptr);
- slave_delim = strtok_r(NULL, ": ", &save_ptr);
+ member_delim = strtok_r(NULL, ": ", &save_ptr);
error = bundle_parse__(s, port_map,
- &save_ptr, fields, basis, algorithm, slave_type,
- dst, slave_delim, ofpacts);
+ &save_ptr, fields, basis, algorithm, member_type,
+ dst, member_delim, ofpacts);
free(tokstr);
@@ -328,13 +329,13 @@ bundle_format(const struct ofpact_bundle *bundle,
ds_put_char(s, ',');
}
- ds_put_format(s, "%sslaves:%s", colors.param, colors.end);
- for (i = 0; i < bundle->n_slaves; i++) {
+ ds_put_format(s, "%smembers:%s", colors.param, colors.end);
+ for (i = 0; i < bundle->n_members; i++) {
if (i) {
ds_put_char(s, ',');
}
- ofputil_format_port(bundle->slaves[i], port_map, s);
+ ofputil_format_port(bundle->members[i], port_map, s);
}
ds_put_format(s, "%s)%s", colors.paren, colors.end);
diff --git a/lib/bundle.h b/lib/bundle.h
index 85a2e861d3674096bc1126b76bd70317c1d03fa2..b3b9cdcee26bab40992f181ee46e18c9c7569b62 100644
--- a/lib/bundle.h
+++ b/lib/bundle.h
@@ -40,11 +40,11 @@ struct ofputil_port_map;
*
* See lib/ofp-actions.c for NXAST_BUNDLE specification. */
-#define BUNDLE_MAX_SLAVES 2048
+#define BUNDLE_MAX_MEMBERS 2048
ofp_port_t bundle_execute(const struct ofpact_bundle *, const struct flow *,
struct flow_wildcards *wc,
- bool (*slave_enabled)(ofp_port_t ofp_port, void *aux),
+ bool (*member_enabled)(ofp_port_t ofp_port, void *aux),
void *aux);
enum ofperr bundle_check(const struct ofpact_bundle *, ofp_port_t max_ports,
const struct match *);
diff --git a/lib/cfm.c b/lib/cfm.c
index 71d2c02067e08d6e34366438118eefbf24868bd4..cc43e70e316acbb57df0790d714d88e0ca7064d7 100644
--- a/lib/cfm.c
+++ b/lib/cfm.c
@@ -780,8 +780,8 @@ cfm_process_heartbeat(struct cfm *cfm, const struct dp_packet *p)
*
* Faults can cause a controller or Open vSwitch to make potentially
* expensive changes to the network topology. It seems prudent to trigger
- * them judiciously, especially when CFM is used to check slave status of
- * bonds. Furthermore, faults can be maliciously triggered by crafting
+ * them judiciously, especially when CFM is used to check status of bond
+ * members. Furthermore, faults can be maliciously triggered by crafting
* unexpected CCMs. */
if (memcmp(ccm->maid, cfm->maid, sizeof ccm->maid)) {
cfm->recv_fault |= CFM_FAULT_MAID;
diff --git a/lib/classifier.c b/lib/classifier.c
index 0fad953213e430a9396b1941635b2ffe41d5bce5..2a1d155dad9f016c7efdea1fb148145a755c17a9 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls,
bitmap_set1(fields.bm, trie_fields[i]);
new_fields[n_tries] = NULL;
- if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) {
+ const struct mf_field *cls_field
+ = ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field);
+ if (n_tries >= cls->n_tries || field != cls_field) {
new_fields[n_tries] = field;
changed = true;
}
@@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const struct mf_field *field)
} else {
ovsrcu_set_hidden(&trie->root, NULL);
}
- trie->field = field;
+ ovsrcu_set_hidden(&trie->field, CONST_CAST(struct mf_field *, field));
/* Add existing rules to the new trie. */
CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) {
@@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls,
struct trie_ctx {
const struct cls_trie *trie;
bool lookup_done; /* Status of the lookup. */
- uint8_t be32ofs; /* U32 offset of the field in question. */
unsigned int maskbits; /* Prefix length needed to avoid false matches. */
union trie_prefix match_plens; /* Bitmask of prefix lengths with possible
* matches. */
@@ -849,7 +850,6 @@ static void
trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie)
{
ctx->trie = trie;
- ctx->be32ofs = trie->field->flow_be32ofs;
ctx->lookup_done = false;
}
@@ -1370,6 +1370,7 @@ cls_cursor_start(const struct classifier *cls, const struct cls_rule *target,
struct cls_cursor cursor;
struct cls_subtable *subtable;
+ memset(&cursor, 0x0, sizeof cursor);
cursor.cls = cls;
cursor.target = target && !cls_rule_is_catchall(target) ? target : NULL;
cursor.version = version;
@@ -1531,8 +1532,10 @@ insert_subtable(struct classifier *cls, const struct minimask *mask)
*CONST_CAST(uint8_t *, &subtable->n_indices) = index;
for (i = 0; i < cls->n_tries; i++) {
- subtable->trie_plen[i] = minimask_get_prefix_len(mask,
- cls->tries[i].field);
+ const struct mf_field *field
+ = ovsrcu_get(struct mf_field *, &cls->tries[i].field);
+ subtable->trie_plen[i]
+ = field ? minimask_get_prefix_len(mask, field) : 0;
}
/* Ports trie. */
@@ -1575,11 +1578,17 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
* fields using the prefix tries. The trie checks are done only as
* needed to avoid folding in additional bits to the wildcards mask. */
for (j = 0; j < n_tries; j++) {
- /* Is the trie field relevant for this subtable, and
- is the trie field within the current range of fields? */
- if (field_plen[j] &&
- flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) {
+ /* Is the trie field relevant for this subtable? */
+ if (field_plen[j]) {
struct trie_ctx *ctx = &trie_ctx[j];
+ const struct mf_field *ctx_field
+ = ovsrcu_get(struct mf_field *, &ctx->trie->field);
+
+ /* Is the trie field within the current range of fields? */
+ if (!ctx_field
+ || !flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) {
+ continue;
+ }
/* On-demand trie lookup. */
if (!ctx->lookup_done) {
@@ -1601,14 +1610,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], unsigned int n_tries,
* than this subtable would otherwise. */
if (ctx->maskbits <= field_plen[j]) {
/* Unwildcard the bits and skip the rest. */
- mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits);
+ mask_set_prefix_bits(wc, ctx_field->flow_be32ofs,
+ ctx->maskbits);
/* Note: Prerequisite already unwildcarded, as the only
* prerequisite of the supported trie lookup fields is
* the ethertype, which is always unwildcarded. */
return true;
}
/* Can skip if the field is already unwildcarded. */
- if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) {
+ if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs,
+ ctx->maskbits)) {
return true;
}
}
@@ -2001,12 +2012,12 @@ static unsigned int
trie_lookup(const struct cls_trie *trie, const struct flow *flow,
union trie_prefix *plens)
{
- const struct mf_field *mf = trie->field;
+ const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field);
/* Check that current flow matches the prerequisites for the trie
* field. Some match fields are used for multiple purposes, so we
* must check that the trie is relevant for this flow. */
- if (mf_are_prereqs_ok(mf, flow, NULL)) {
+ if (mf && mf_are_prereqs_ok(mf, flow, NULL)) {
return trie_lookup_value(&trie->root,
&((ovs_be32 *)flow)[mf->flow_be32ofs],
&plens->be32, mf->n_bits);
@@ -2053,8 +2064,9 @@ minimask_get_prefix_len(const struct minimask *minimask,
* happened to be zeros.
*/
static const ovs_be32 *
-minimatch_get_prefix(const struct minimatch *match, const struct mf_field *mf)
+minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field)
{
+ struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field);
size_t u64_ofs = mf->flow_be32ofs / 2;
return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs)
@@ -2068,7 +2080,7 @@ static void
trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
trie_insert_prefix(&trie->root,
- minimatch_get_prefix(&rule->match, trie->field), mlen);
+ minimatch_get_prefix(&rule->match, &trie->field), mlen);
}
static void
@@ -2123,7 +2135,7 @@ static void
trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen)
{
trie_remove_prefix(&trie->root,
- minimatch_get_prefix(&rule->match, trie->field), mlen);
+ minimatch_get_prefix(&rule->match, &trie->field), mlen);
}
/* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask
diff --git a/lib/classifier.h b/lib/classifier.h
index d1bd4aa12a7832d8087073137e8943489e5b6200..f646a8f7429b31fc1fc4353832bf40075c76b8b4 100644
--- a/lib/classifier.h
+++ b/lib/classifier.h
@@ -314,13 +314,15 @@ extern "C" {
struct cls_subtable;
struct cls_match;
+struct mf_field;
+typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr;
struct trie_node;
typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr;
/* Prefix trie for a 'field' */
struct cls_trie {
- const struct mf_field *field; /* Trie field, or NULL. */
- rcu_trie_ptr root; /* NULL if none. */
+ rcu_field_ptr field; /* Trie field, or NULL. */
+ rcu_trie_ptr root; /* NULL if none. */
};
enum {
diff --git a/lib/conntrack-icmp.c b/lib/conntrack-icmp.c
index 63246f0124d0a744d7106b510280992baf11b484..bf49f9a9fa93dccea18ff93088a49563bbb48afd 100644
--- a/lib/conntrack-icmp.c
+++ b/lib/conntrack-icmp.c
@@ -22,6 +22,7 @@
#include
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "dp-packet.h"
enum OVS_PACKED_ENUM icmp_state {
@@ -50,9 +51,12 @@ icmp_conn_update(struct conntrack *ct, struct conn *conn_,
struct dp_packet *pkt OVS_UNUSED, bool reply, long long now)
{
struct conn_icmp *conn = conn_icmp_cast(conn_);
- conn->state = reply ? ICMPS_REPLY : ICMPS_FIRST;
- conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+ if (reply && conn->state == ICMPS_FIRST) {
+ conn->state = ICMPS_REPLY;
+ }
+
+ conn_update_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
return CT_UPDATE_VALID;
}
@@ -76,12 +80,13 @@ icmp6_valid_new(struct dp_packet *pkt)
static struct conn *
icmp_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
- long long now)
+ long long now, uint32_t tp_id)
{
struct conn_icmp *conn = xzalloc(sizeof *conn);
conn->state = ICMPS_FIRST;
- conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
+ conn->up.tp_id = tp_id;
+ conn_init_expiration(ct, &conn->up, icmp_timeouts[conn->state], now);
return &conn->up;
}
diff --git a/lib/conntrack-other.c b/lib/conntrack-other.c
index de22ef87cc19663cdcfd2f24696a62e1617d1575..d3b46018586c0cc3c73777d2472aa6759fd22984 100644
--- a/lib/conntrack-other.c
+++ b/lib/conntrack-other.c
@@ -17,6 +17,7 @@
#include
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "dp-packet.h"
enum OVS_PACKED_ENUM other_state {
@@ -69,12 +70,13 @@ other_valid_new(struct dp_packet *pkt OVS_UNUSED)
static struct conn *
other_new_conn(struct conntrack *ct, struct dp_packet *pkt OVS_UNUSED,
- long long now)
+ long long now, uint32_t tp_id)
{
struct conn_other *conn;
conn = xzalloc(sizeof *conn);
conn->state = OTHERS_FIRST;
+ conn->up.tp_id = tp_id;
conn_init_expiration(ct, &conn->up, other_timeouts[conn->state], now);
diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h
index 9a8ca39101574d33e1a206fc0aa57ebed1f12f9a..3895bc6880d7bd069f49e8e6d90a3bb11ec04eee 100644
--- a/lib/conntrack-private.h
+++ b/lib/conntrack-private.h
@@ -59,6 +59,9 @@ struct conn_key {
uint8_t nw_proto;
};
+/* Verify that nw_proto stays uint8_t as it's used to index into l4_protos[] */
+BUILD_ASSERT_DECL(MEMBER_SIZEOF(struct conn_key, nw_proto) == sizeof(uint8_t));
+
/* This is used for alg expectations; an expectation is a
* context created in preparation for establishing a data
* connection. The expectation is created by the control
@@ -71,13 +74,13 @@ struct alg_exp_node {
/* Key of data connection to be created. */
struct conn_key key;
/* Corresponding key of the control connection. */
- struct conn_key master_key;
+ struct conn_key parent_key;
/* The NAT replacement address to be used by the data connection. */
union ct_addr alg_nat_repl_addr;
- /* The data connection inherits the master control
+ /* The data connection inherits the parent control
* connection label and mark. */
- ovs_u128 master_label;
- uint32_t master_mark;
+ ovs_u128 parent_label;
+ uint32_t parent_mark;
/* True if for NAT application, the alg replaces the dest address;
* otherwise, the source address is replaced. */
bool nat_rpl_dst;
@@ -92,7 +95,7 @@ struct conn {
/* Immutable data. */
struct conn_key key;
struct conn_key rev_key;
- struct conn_key master_key; /* Only used for orig_tuple support. */
+ struct conn_key parent_key; /* Only used for orig_tuple support. */
struct ovs_list exp_node;
struct cmap_node cm_node;
struct nat_action_info_t *nat_info;
@@ -118,6 +121,8 @@ struct conn {
/* Immutable data. */
bool alg_related; /* True if alg data connection. */
enum ct_conn_type conn_type;
+
+ uint32_t tp_id; /* Timeout policy ID. */
};
enum ct_update_res {
@@ -131,28 +136,20 @@ enum ct_update_res {
* are listed here. The name will be prefix by CT_TM_ and the value is in
* milliseconds */
#define CT_TIMEOUTS \
- CT_TIMEOUT(TCP_FIRST_PACKET, 30 * 1000) \
- CT_TIMEOUT(TCP_OPENING, 30 * 1000) \
- CT_TIMEOUT(TCP_ESTABLISHED, 24 * 60 * 60 * 1000) \
- CT_TIMEOUT(TCP_CLOSING, 15 * 60 * 1000) \
- CT_TIMEOUT(TCP_FIN_WAIT, 45 * 1000) \
- CT_TIMEOUT(TCP_CLOSED, 30 * 1000) \
- CT_TIMEOUT(OTHER_FIRST, 60 * 1000) \
- CT_TIMEOUT(OTHER_MULTIPLE, 60 * 1000) \
- CT_TIMEOUT(OTHER_BIDIR, 30 * 1000) \
- CT_TIMEOUT(ICMP_FIRST, 60 * 1000) \
- CT_TIMEOUT(ICMP_REPLY, 30 * 1000)
-
-/* The smallest of the above values: it is used as an upper bound for the
- * interval between two rounds of cleanup of expired entries */
-#define CT_TM_MIN (30 * 1000)
-
-#define CT_TIMEOUT(NAME, VAL) BUILD_ASSERT_DECL(VAL >= CT_TM_MIN);
- CT_TIMEOUTS
-#undef CT_TIMEOUT
+ CT_TIMEOUT(TCP_FIRST_PACKET) \
+ CT_TIMEOUT(TCP_OPENING) \
+ CT_TIMEOUT(TCP_ESTABLISHED) \
+ CT_TIMEOUT(TCP_CLOSING) \
+ CT_TIMEOUT(TCP_FIN_WAIT) \
+ CT_TIMEOUT(TCP_CLOSED) \
+ CT_TIMEOUT(OTHER_FIRST) \
+ CT_TIMEOUT(OTHER_MULTIPLE) \
+ CT_TIMEOUT(OTHER_BIDIR) \
+ CT_TIMEOUT(ICMP_FIRST) \
+ CT_TIMEOUT(ICMP_REPLY)
enum ct_timeout {
-#define CT_TIMEOUT(NAME, VALUE) CT_TM_##NAME,
+#define CT_TIMEOUT(NAME) CT_TM_##NAME,
CT_TIMEOUTS
#undef CT_TIMEOUT
N_CT_TM
@@ -163,6 +160,7 @@ struct conntrack {
struct cmap conns OVS_GUARDED;
struct ovs_list exp_lists[N_CT_TM] OVS_GUARDED;
struct hmap zone_limits OVS_GUARDED;
+ struct hmap timeout_policies OVS_GUARDED;
uint32_t hash_basis; /* Salt for hashing a connection key. */
pthread_t clean_thread; /* Periodically cleans up connection tracker. */
struct latch clean_thread_exit; /* To destroy the 'clean_thread'. */
@@ -197,7 +195,7 @@ extern struct ct_l4_proto ct_proto_icmp6;
struct ct_l4_proto {
struct conn *(*new_conn)(struct conntrack *ct, struct dp_packet *pkt,
- long long now);
+ long long now, uint32_t tp_id);
bool (*valid_new)(struct dp_packet *pkt);
enum ct_update_res (*conn_update)(struct conntrack *ct, struct conn *conn,
struct dp_packet *pkt, bool reply,
@@ -206,39 +204,6 @@ struct ct_l4_proto {
struct ct_dpif_protoinfo *);
};
-extern long long ct_timeout_val[];
-
-
-/* ct_lock must be held. */
-static inline void
-conn_init_expiration(struct conntrack *ct, struct conn *conn,
- enum ct_timeout tm, long long now)
-{
- conn->expiration = now + ct_timeout_val[tm];
- ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
-}
-
-/* The conn entry lock must be held on entry and exit. */
-static inline void
-conn_update_expiration(struct conntrack *ct, struct conn *conn,
- enum ct_timeout tm, long long now)
- OVS_NO_THREAD_SAFETY_ANALYSIS
-{
- ovs_mutex_unlock(&conn->lock);
-
- ovs_mutex_lock(&ct->ct_lock);
- ovs_mutex_lock(&conn->lock);
- if (!conn->cleaned) {
- conn->expiration = now + ct_timeout_val[tm];
- ovs_list_remove(&conn->exp_node);
- ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
- }
- ovs_mutex_unlock(&conn->lock);
- ovs_mutex_unlock(&ct->ct_lock);
-
- ovs_mutex_lock(&conn->lock);
-}
-
static inline uint32_t
tcp_payload_length(struct dp_packet *pkt)
{
diff --git a/lib/conntrack-tcp.c b/lib/conntrack-tcp.c
index 416cb769d22f1dc884fbaaf77d031d73c2a883cd..18a2aa7c7d0250002064cf9daf957a59580c0097 100644
--- a/lib/conntrack-tcp.c
+++ b/lib/conntrack-tcp.c
@@ -39,6 +39,7 @@
#include
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "coverage.h"
#include "ct-dpif.h"
#include "dp-packet.h"
@@ -189,7 +190,7 @@ tcp_conn_update(struct conntrack *ct, struct conn *conn_,
} else if (src->state <= CT_DPIF_TCPS_SYN_SENT) {
src->state = CT_DPIF_TCPS_SYN_SENT;
conn_update_expiration(ct, &conn->up, CT_TM_TCP_FIRST_PACKET, now);
- return CT_UPDATE_NEW;
+ return CT_UPDATE_VALID_NEW;
}
}
@@ -435,7 +436,8 @@ tcp_valid_new(struct dp_packet *pkt)
}
static struct conn *
-tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
+tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now,
+ uint32_t tp_id)
{
struct conn_tcp* newconn = NULL;
struct tcp_header *tcp = dp_packet_l4(pkt);
@@ -471,6 +473,7 @@ tcp_new_conn(struct conntrack *ct, struct dp_packet *pkt, long long now)
src->state = CT_DPIF_TCPS_SYN_SENT;
dst->state = CT_DPIF_TCPS_CLOSED;
+ newconn->up.tp_id = tp_id;
conn_init_expiration(ct, &newconn->up, CT_TM_TCP_FIRST_PACKET, now);
return &newconn->up;
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
new file mode 100644
index 0000000000000000000000000000000000000000..a586d3a8d3272ed142a049da8dc25e0182678b13
--- /dev/null
+++ b/lib/conntrack-tp.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+#include "conntrack-private.h"
+#include "conntrack-tp.h"
+#include "ct-dpif.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(conntrack_tp);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+
+static const char *ct_timeout_str[] = {
+#define CT_TIMEOUT(NAME) #NAME,
+ CT_TIMEOUTS
+#undef CT_TIMEOUT
+};
+
+/* Default timeout policy in seconds. */
+static unsigned int ct_dpif_netdev_tp_def[] = {
+ [CT_DPIF_TP_ATTR_TCP_SYN_SENT] = 30,
+ [CT_DPIF_TP_ATTR_TCP_SYN_RECV] = 30,
+ [CT_DPIF_TP_ATTR_TCP_ESTABLISHED] = 24 * 60 * 60,
+ [CT_DPIF_TP_ATTR_TCP_FIN_WAIT] = 15 * 60,
+ [CT_DPIF_TP_ATTR_TCP_TIME_WAIT] = 45,
+ [CT_DPIF_TP_ATTR_TCP_CLOSE] = 30,
+ [CT_DPIF_TP_ATTR_UDP_FIRST] = 60,
+ [CT_DPIF_TP_ATTR_UDP_SINGLE] = 60,
+ [CT_DPIF_TP_ATTR_UDP_MULTIPLE] = 30,
+ [CT_DPIF_TP_ATTR_ICMP_FIRST] = 60,
+ [CT_DPIF_TP_ATTR_ICMP_REPLY] = 30,
+};
+
+static struct timeout_policy *
+timeout_policy_lookup(struct conntrack *ct, int32_t tp_id)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ struct timeout_policy *tp;
+ uint32_t hash;
+
+ hash = hash_int(tp_id, ct->hash_basis);
+ HMAP_FOR_EACH_IN_BUCKET (tp, node, hash, &ct->timeout_policies) {
+ if (tp->policy.id == tp_id) {
+ return tp;
+ }
+ }
+ return NULL;
+}
+
+struct timeout_policy *
+timeout_policy_get(struct conntrack *ct, int32_t tp_id)
+{
+ struct timeout_policy *tp;
+
+ ovs_mutex_lock(&ct->ct_lock);
+ tp = timeout_policy_lookup(ct, tp_id);
+ if (!tp) {
+ ovs_mutex_unlock(&ct->ct_lock);
+ return NULL;
+ }
+
+ ovs_mutex_unlock(&ct->ct_lock);
+ return tp;
+}
+
+static void
+update_existing_tp(struct timeout_policy *tp_dst,
+ const struct timeout_policy *tp_src)
+{
+ struct ct_dpif_timeout_policy *dst;
+ const struct ct_dpif_timeout_policy *src;
+ int i;
+
+ dst = &tp_dst->policy;
+ src = &tp_src->policy;
+
+ /* Set the value and present bit to dst if present
+ * bit in src is set.
+ */
+ for (i = 0; i < ARRAY_SIZE(dst->attrs); i++) {
+ if (src->present & (1 << i)) {
+ dst->attrs[i] = src->attrs[i];
+ dst->present |= (1 << i);
+ }
+ }
+}
+
+static void
+init_default_tp(struct timeout_policy *tp, uint32_t tp_id)
+{
+ tp->policy.id = tp_id;
+ /* Initialize the timeout value to default, but not
+ * setting the present bit.
+ */
+ tp->policy.present = 0;
+ memcpy(tp->policy.attrs, ct_dpif_netdev_tp_def,
+ sizeof tp->policy.attrs);
+}
+
+static void
+timeout_policy_create(struct conntrack *ct,
+ struct timeout_policy *new_tp)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ uint32_t tp_id = new_tp->policy.id;
+ struct timeout_policy *tp;
+ uint32_t hash;
+
+ tp = xzalloc(sizeof *tp);
+ init_default_tp(tp, tp_id);
+ update_existing_tp(tp, new_tp);
+ hash = hash_int(tp_id, ct->hash_basis);
+ hmap_insert(&ct->timeout_policies, &tp->node, hash);
+}
+
+static void
+timeout_policy_clean(struct conntrack *ct, struct timeout_policy *tp)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ hmap_remove(&ct->timeout_policies, &tp->node);
+ free(tp);
+}
+
+static int
+timeout_policy_delete__(struct conntrack *ct, uint32_t tp_id)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ int err = 0;
+ struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
+
+ if (tp) {
+ timeout_policy_clean(ct, tp);
+ } else {
+ VLOG_WARN_RL(&rl, "Failed to delete a non-existent timeout "
+ "policy: id=%d", tp_id);
+ err = ENOENT;
+ }
+ return err;
+}
+
+int
+timeout_policy_delete(struct conntrack *ct, uint32_t tp_id)
+{
+ int err;
+
+ ovs_mutex_lock(&ct->ct_lock);
+ err = timeout_policy_delete__(ct, tp_id);
+ ovs_mutex_unlock(&ct->ct_lock);
+ return err;
+}
+
+void
+timeout_policy_init(struct conntrack *ct)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ struct timeout_policy tp;
+
+ hmap_init(&ct->timeout_policies);
+
+ /* Create default timeout policy. */
+ memset(&tp, 0, sizeof tp);
+ tp.policy.id = DEFAULT_TP_ID;
+ timeout_policy_create(ct, &tp);
+}
+
+int
+timeout_policy_update(struct conntrack *ct,
+ struct timeout_policy *new_tp)
+{
+ int err = 0;
+ uint32_t tp_id = new_tp->policy.id;
+
+ ovs_mutex_lock(&ct->ct_lock);
+ struct timeout_policy *tp = timeout_policy_lookup(ct, tp_id);
+ if (tp) {
+ err = timeout_policy_delete__(ct, tp_id);
+ }
+ timeout_policy_create(ct, new_tp);
+ ovs_mutex_unlock(&ct->ct_lock);
+ return err;
+}
+
+static enum ct_dpif_tp_attr
+tm_to_ct_dpif_tp(enum ct_timeout tm)
+{
+ switch (tm) {
+ case CT_TM_TCP_FIRST_PACKET:
+ return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
+ case CT_TM_TCP_OPENING:
+ return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
+ case CT_TM_TCP_ESTABLISHED:
+ return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
+ case CT_TM_TCP_CLOSING:
+ return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
+ case CT_TM_TCP_FIN_WAIT:
+ return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
+ case CT_TM_TCP_CLOSED:
+ return CT_DPIF_TP_ATTR_TCP_CLOSE;
+ case CT_TM_OTHER_FIRST:
+ return CT_DPIF_TP_ATTR_UDP_FIRST;
+ case CT_TM_OTHER_BIDIR:
+ return CT_DPIF_TP_ATTR_UDP_MULTIPLE;
+ case CT_TM_OTHER_MULTIPLE:
+ return CT_DPIF_TP_ATTR_UDP_SINGLE;
+ case CT_TM_ICMP_FIRST:
+ return CT_DPIF_TP_ATTR_ICMP_FIRST;
+ case CT_TM_ICMP_REPLY:
+ return CT_DPIF_TP_ATTR_ICMP_REPLY;
+ case N_CT_TM:
+ default:
+ OVS_NOT_REACHED();
+ break;
+ }
+ OVS_NOT_REACHED();
+ return CT_DPIF_TP_ATTR_MAX;
+}
+
+static void
+conn_update_expiration__(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now,
+ uint32_t tp_value)
+ OVS_REQUIRES(conn->lock)
+{
+ ovs_mutex_unlock(&conn->lock);
+
+ ovs_mutex_lock(&ct->ct_lock);
+ ovs_mutex_lock(&conn->lock);
+ if (!conn->cleaned) {
+ conn->expiration = now + tp_value * 1000;
+ ovs_list_remove(&conn->exp_node);
+ ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
+ }
+ ovs_mutex_unlock(&conn->lock);
+ ovs_mutex_unlock(&ct->ct_lock);
+
+ ovs_mutex_lock(&conn->lock);
+}
+
+/* The conn entry lock must be held on entry and exit. */
+void
+conn_update_expiration(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now)
+ OVS_REQUIRES(conn->lock)
+{
+ struct timeout_policy *tp;
+ uint32_t val;
+
+ ovs_mutex_unlock(&conn->lock);
+
+ ovs_mutex_lock(&ct->ct_lock);
+ ovs_mutex_lock(&conn->lock);
+ tp = timeout_policy_lookup(ct, conn->tp_id);
+ if (tp) {
+ val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
+ } else {
+ val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
+ }
+ ovs_mutex_unlock(&conn->lock);
+ ovs_mutex_unlock(&ct->ct_lock);
+
+ ovs_mutex_lock(&conn->lock);
+ VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d "
+ "val=%u sec.",
+ ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
+
+ conn_update_expiration__(ct, conn, tm, now, val);
+}
+
+static void
+conn_init_expiration__(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now,
+ uint32_t tp_value)
+{
+ conn->expiration = now + tp_value * 1000;
+ ovs_list_push_back(&ct->exp_lists[tm], &conn->exp_node);
+}
+
+/* ct_lock must be held. */
+void
+conn_init_expiration(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now)
+ OVS_REQUIRES(ct->ct_lock)
+{
+ struct timeout_policy *tp;
+ uint32_t val;
+
+ tp = timeout_policy_lookup(ct, conn->tp_id);
+ if (tp) {
+ val = tp->policy.attrs[tm_to_ct_dpif_tp(tm)];
+ } else {
+ val = ct_dpif_netdev_tp_def[tm_to_ct_dpif_tp(tm)];
+ }
+
+ VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.",
+ ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
+
+ conn_init_expiration__(ct, conn, tm, now, val);
+}
diff --git a/lib/conntrack-tp.h b/lib/conntrack-tp.h
new file mode 100644
index 0000000000000000000000000000000000000000..4d411d19fd537c2b79e0c83fd00e98cc40cd498e
--- /dev/null
+++ b/lib/conntrack-tp.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2020 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CONNTRACK_TP_H
+#define CONNTRACK_TP_H 1
+
+#define CT_DPIF_NETDEV_TP_MIN 30
+enum ct_timeout;
+void timeout_policy_init(struct conntrack *ct);
+int timeout_policy_update(struct conntrack *ct, struct timeout_policy *tp);
+int timeout_policy_delete(struct conntrack *ct, uint32_t tp_id);
+struct timeout_policy *timeout_policy_get(struct conntrack *ct, int32_t tp_id);
+void conn_init_expiration(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now);
+void conn_update_expiration(struct conntrack *ct, struct conn *conn,
+ enum ct_timeout tm, long long now);
+#endif
diff --git a/lib/conntrack.c b/lib/conntrack.c
index ff5a89457c0ad507c5a64d3a4c66589182f07daa..bba38f9f576d854e4111dc700592649ad991adaa 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -25,6 +25,7 @@
#include "bitmap.h"
#include "conntrack.h"
#include "conntrack-private.h"
+#include "conntrack-tp.h"
#include "coverage.h"
#include "csum.h"
#include "ct-dpif.h"
@@ -44,6 +45,7 @@ VLOG_DEFINE_THIS_MODULE(conntrack);
COVERAGE_DEFINE(conntrack_full);
COVERAGE_DEFINE(conntrack_long_cleanup);
+COVERAGE_DEFINE(conntrack_l4csum_err);
struct conn_lookup_ctx {
struct conn_key key;
@@ -88,7 +90,8 @@ static uint32_t conn_key_hash(const struct conn_key *, uint32_t basis);
static void conn_key_reverse(struct conn_key *);
static bool valid_new(struct dp_packet *pkt, struct conn_key *);
static struct conn *new_conn(struct conntrack *ct, struct dp_packet *pkt,
- struct conn_key *, long long now);
+ struct conn_key *, long long now,
+ uint32_t tp_id);
static void delete_conn_cmn(struct conn *);
static void delete_conn(struct conn *);
static void delete_conn_one(struct conn *conn);
@@ -141,14 +144,9 @@ detect_ftp_ctl_type(const struct conn_lookup_ctx *ctx,
struct dp_packet *pkt);
static void
-expectation_clean(struct conntrack *ct, const struct conn_key *master_key);
+expectation_clean(struct conntrack *ct, const struct conn_key *parent_key);
-static struct ct_l4_proto *l4_protos[] = {
- [IPPROTO_TCP] = &ct_proto_tcp,
- [IPPROTO_UDP] = &ct_proto_other,
- [IPPROTO_ICMP] = &ct_proto_icmp4,
- [IPPROTO_ICMPV6] = &ct_proto_icmp6,
-};
+static struct ct_l4_proto *l4_protos[UINT8_MAX + 1];
static void
handle_ftp_ctl(struct conntrack *ct, const struct conn_lookup_ctx *ctx,
@@ -175,12 +173,6 @@ static alg_helper alg_helpers[] = {
[CT_ALG_CTL_TFTP] = handle_tftp_ctl,
};
-long long ct_timeout_val[] = {
-#define CT_TIMEOUT(NAME, VAL) [CT_TM_##NAME] = VAL,
- CT_TIMEOUTS
-#undef CT_TIMEOUT
-};
-
/* The maximum TCP or UDP port number. */
#define CT_MAX_L4_PORT 65535
/* String buffer used for parsing FTP string messages.
@@ -296,6 +288,7 @@ ct_print_conn_info(const struct conn *c, const char *log_msg,
struct conntrack *
conntrack_init(void)
{
+ static struct ovsthread_once setup_l4_once = OVSTHREAD_ONCE_INITIALIZER;
struct conntrack *ct = xzalloc(sizeof *ct);
ovs_rwlock_init(&ct->resources_lock);
@@ -312,6 +305,7 @@ conntrack_init(void)
}
hmap_init(&ct->zone_limits);
ct->zone_limit_seq = 0;
+ timeout_policy_init(ct);
ovs_mutex_unlock(&ct->ct_lock);
ct->hash_basis = random_uint32();
@@ -322,6 +316,18 @@ conntrack_init(void)
ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct);
ct->ipf = ipf_init();
+ /* Initialize the l4 protocols. */
+ if (ovsthread_once_start(&setup_l4_once)) {
+ for (int i = 0; i < ARRAY_SIZE(l4_protos); i++) {
+ l4_protos[i] = &ct_proto_other;
+ }
+ /* IPPROTO_UDP uses ct_proto_other, so no need to initialize it. */
+ l4_protos[IPPROTO_TCP] = &ct_proto_tcp;
+ l4_protos[IPPROTO_ICMP] = &ct_proto_icmp4;
+ l4_protos[IPPROTO_ICMPV6] = &ct_proto_icmp6;
+
+ ovsthread_once_done(&setup_l4_once);
+ }
return ct;
}
@@ -502,6 +508,12 @@ conntrack_destroy(struct conntrack *ct)
}
hmap_destroy(&ct->zone_limits);
+ struct timeout_policy *tp;
+ HMAP_FOR_EACH_POP (tp, node, &ct->timeout_policies) {
+ free(tp);
+ }
+ hmap_destroy(&ct->timeout_policies);
+
ovs_mutex_unlock(&ct->ct_lock);
ovs_mutex_destroy(&ct->ct_lock);
@@ -581,14 +593,14 @@ write_ct_md(struct dp_packet *pkt, uint16_t zone, const struct conn *conn,
/* Use the original direction tuple if we have it. */
if (conn) {
if (conn->alg_related) {
- key = &conn->master_key;
+ key = &conn->parent_key;
} else {
key = &conn->key;
}
} else if (alg_exp) {
- pkt->md.ct_mark = alg_exp->master_mark;
- pkt->md.ct_label = alg_exp->master_label;
- key = &alg_exp->master_key;
+ pkt->md.ct_mark = alg_exp->parent_mark;
+ pkt->md.ct_label = alg_exp->parent_label;
+ key = &alg_exp->parent_key;
}
pkt->md.ct_orig_tuple_ipv6 = false;
@@ -956,7 +968,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
struct conn_lookup_ctx *ctx, bool commit, long long now,
const struct nat_action_info_t *nat_action_info,
const char *helper, const struct alg_exp_node *alg_exp,
- enum ct_alg_ctl_type ct_alg_ctl)
+ enum ct_alg_ctl_type ct_alg_ctl, uint32_t tp_id)
OVS_REQUIRES(ct->ct_lock)
{
struct conn *nc = NULL;
@@ -987,7 +999,7 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
return nc;
}
- nc = new_conn(ct, pkt, &ctx->key, now);
+ nc = new_conn(ct, pkt, &ctx->key, now, tp_id);
memcpy(&nc->key, &ctx->key, sizeof nc->key);
memcpy(&nc->rev_key, &nc->key, sizeof nc->rev_key);
conn_key_reverse(&nc->rev_key);
@@ -998,9 +1010,9 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
if (alg_exp) {
nc->alg_related = true;
- nc->mark = alg_exp->master_mark;
- nc->label = alg_exp->master_label;
- nc->master_key = alg_exp->master_key;
+ nc->mark = alg_exp->parent_mark;
+ nc->label = alg_exp->parent_label;
+ nc->parent_key = alg_exp->parent_key;
}
if (nat_action_info) {
@@ -1275,8 +1287,14 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
bool force, bool commit, long long now, const uint32_t *setmark,
const struct ovs_key_ct_labels *setlabel,
const struct nat_action_info_t *nat_action_info,
- ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper)
+ ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
+ uint32_t tp_id)
{
+ /* Reset ct_state whenever entering a new zone. */
+ if (pkt->md.ct_state && pkt->md.ct_zone != zone) {
+ pkt->md.ct_state = 0;
+ }
+
bool create_new_conn = false;
conn_key_lookup(ct, &ctx->key, ctx->hash, now, &ctx->conn, &ctx->reply);
struct conn *conn = ctx->conn;
@@ -1300,9 +1318,10 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
conn_key_lookup(ct, &ctx->key, hash, now, &conn, &ctx->reply);
if (!conn) {
- pkt->md.ct_state |= CS_TRACKED | CS_INVALID;
- char *log_msg = xasprintf("Missing master conn %p", rev_conn);
- ct_print_conn_info(conn, log_msg, VLL_INFO, true, true);
+ pkt->md.ct_state |= CS_INVALID;
+ write_ct_md(pkt, zone, NULL, NULL, NULL);
+ char *log_msg = xasprintf("Missing parent conn %p", rev_conn);
+ ct_print_conn_info(rev_conn, log_msg, VLL_INFO, true, true);
free(log_msg);
return;
}
@@ -1353,7 +1372,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
ovs_mutex_lock(&ct->ct_lock);
if (!conn_lookup(ct, &ctx->key, now, NULL, NULL)) {
conn = conn_not_found(ct, pkt, ctx, commit, now, nat_action_info,
- helper, alg_exp, ct_alg_ctl);
+ helper, alg_exp, ct_alg_ctl, tp_id);
}
ovs_mutex_unlock(&ct->ct_lock);
}
@@ -1389,7 +1408,7 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
const struct ovs_key_ct_labels *setlabel,
ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
const struct nat_action_info_t *nat_action_info,
- long long now)
+ long long now, uint32_t tp_id)
{
ipf_preprocess_conntrack(ct->ipf, pkt_batch, now, dl_type, zone,
ct->hash_basis);
@@ -1411,7 +1430,8 @@ conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
write_ct_md(packet, zone, NULL, NULL, NULL);
} else {
process_one(ct, packet, &ctx, zone, force, commit, now, setmark,
- setlabel, nat_action_info, tp_src, tp_dst, helper);
+ setlabel, nat_action_info, tp_src, tp_dst, helper,
+ tp_id);
}
}
@@ -1517,7 +1537,7 @@ conntrack_clean(struct conntrack *ct, long long now)
atomic_read_relaxed(&ct->n_conn_limit, &n_conn_limit);
size_t clean_max = n_conn_limit > 10 ? n_conn_limit / 10 : 1;
long long min_exp = ct_sweep(ct, now, clean_max);
- long long next_wakeup = MIN(min_exp, now + CT_TM_MIN);
+ long long next_wakeup = MIN(min_exp, now + CT_DPIF_NETDEV_TP_MIN);
return next_wakeup;
}
@@ -1655,6 +1675,7 @@ checksum_valid(const struct conn_key *key, const void *data, size_t size,
} else if (key->dl_type == htons(ETH_TYPE_IPV6)) {
return packet_csum_upperlayer6(l3, data, key->nw_proto, size) == 0;
} else {
+ COVERAGE_INC(conntrack_l4csum_err);
return false;
}
}
@@ -1698,7 +1719,12 @@ check_l4_udp(const struct conn_key *key, const void *data, size_t size,
static inline bool
check_l4_icmp(const void *data, size_t size, bool validate_checksum)
{
- return validate_checksum ? csum(data, size) == 0 : true;
+ if (validate_checksum && csum(data, size) != 0) {
+ COVERAGE_INC(conntrack_l4csum_err);
+ return false;
+ } else {
+ return true;
+ }
}
static inline bool
@@ -1964,9 +1990,10 @@ extract_l4(struct conn_key *key, const void *data, size_t size, bool *related,
return (!related || check_l4_icmp6(key, data, size, l3,
validate_checksum))
&& extract_l4_icmp6(key, data, size, related);
- } else {
- return false;
}
+
+ /* For all other protocols we do not have L4 keys, so keep them zero. */
+ return true;
}
static bool
@@ -2249,8 +2276,8 @@ nat_select_range_tuple(struct conntrack *ct, const struct conn *conn,
conn->nat_info->nat_action & NAT_ACTION_SRC_PORT
? true : false;
union ct_addr first_addr = ct_addr;
- bool pat_enabled = conn->key.nw_proto != IPPROTO_ICMP &&
- conn->key.nw_proto != IPPROTO_ICMPV6;
+ bool pat_enabled = conn->key.nw_proto == IPPROTO_TCP ||
+ conn->key.nw_proto == IPPROTO_UDP;
while (true) {
if (conn->nat_info->nat_action & NAT_ACTION_SRC) {
@@ -2341,9 +2368,9 @@ valid_new(struct dp_packet *pkt, struct conn_key *key)
static struct conn *
new_conn(struct conntrack *ct, struct dp_packet *pkt, struct conn_key *key,
- long long now)
+ long long now, uint32_t tp_id)
{
- return l4_protos[key->nw_proto]->new_conn(ct, pkt, now);
+ return l4_protos[key->nw_proto]->new_conn(ct, pkt, now, tp_id);
}
static void
@@ -2659,16 +2686,16 @@ expectation_remove(struct hmap *alg_expectations,
/* This function must be called with the ct->resources read lock taken. */
static struct alg_exp_node *
expectation_ref_lookup_unique(const struct hindex *alg_expectation_refs,
- const struct conn_key *master_key,
+ const struct conn_key *parent_key,
const struct conn_key *alg_exp_key,
uint32_t basis)
{
struct alg_exp_node *alg_exp_node;
HINDEX_FOR_EACH_WITH_HASH (alg_exp_node, node_ref,
- conn_key_hash(master_key, basis),
+ conn_key_hash(parent_key, basis),
alg_expectation_refs) {
- if (!conn_key_cmp(&alg_exp_node->master_key, master_key) &&
+ if (!conn_key_cmp(&alg_exp_node->parent_key, parent_key) &&
!conn_key_cmp(&alg_exp_node->key, alg_exp_key)) {
return alg_exp_node;
}
@@ -2683,23 +2710,23 @@ expectation_ref_create(struct hindex *alg_expectation_refs,
uint32_t basis)
{
if (!expectation_ref_lookup_unique(alg_expectation_refs,
- &alg_exp_node->master_key,
+ &alg_exp_node->parent_key,
&alg_exp_node->key, basis)) {
hindex_insert(alg_expectation_refs, &alg_exp_node->node_ref,
- conn_key_hash(&alg_exp_node->master_key, basis));
+ conn_key_hash(&alg_exp_node->parent_key, basis));
}
}
static void
-expectation_clean(struct conntrack *ct, const struct conn_key *master_key)
+expectation_clean(struct conntrack *ct, const struct conn_key *parent_key)
{
ovs_rwlock_wrlock(&ct->resources_lock);
struct alg_exp_node *node, *next;
HINDEX_FOR_EACH_WITH_HASH_SAFE (node, next, node_ref,
- conn_key_hash(master_key, ct->hash_basis),
+ conn_key_hash(parent_key, ct->hash_basis),
&ct->alg_expectation_refs) {
- if (!conn_key_cmp(&node->master_key, master_key)) {
+ if (!conn_key_cmp(&node->parent_key, parent_key)) {
expectation_remove(&ct->alg_expectations, &node->key,
ct->hash_basis);
hindex_remove(&ct->alg_expectation_refs, &node->node_ref);
@@ -2712,7 +2739,7 @@ expectation_clean(struct conntrack *ct, const struct conn_key *master_key)
static void
expectation_create(struct conntrack *ct, ovs_be16 dst_port,
- const struct conn *master_conn, bool reply, bool src_ip_wc,
+ const struct conn *parent_conn, bool reply, bool src_ip_wc,
bool skip_nat)
{
union ct_addr src_addr;
@@ -2721,47 +2748,47 @@ expectation_create(struct conntrack *ct, ovs_be16 dst_port,
struct alg_exp_node *alg_exp_node = xzalloc(sizeof *alg_exp_node);
if (reply) {
- src_addr = master_conn->key.src.addr;
- dst_addr = master_conn->key.dst.addr;
+ src_addr = parent_conn->key.src.addr;
+ dst_addr = parent_conn->key.dst.addr;
alg_exp_node->nat_rpl_dst = true;
if (skip_nat) {
alg_nat_repl_addr = dst_addr;
- } else if (master_conn->nat_info &&
- master_conn->nat_info->nat_action & NAT_ACTION_DST) {
- alg_nat_repl_addr = master_conn->rev_key.src.addr;
+ } else if (parent_conn->nat_info &&
+ parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
+ alg_nat_repl_addr = parent_conn->rev_key.src.addr;
alg_exp_node->nat_rpl_dst = false;
} else {
- alg_nat_repl_addr = master_conn->rev_key.dst.addr;
+ alg_nat_repl_addr = parent_conn->rev_key.dst.addr;
}
} else {
- src_addr = master_conn->rev_key.src.addr;
- dst_addr = master_conn->rev_key.dst.addr;
+ src_addr = parent_conn->rev_key.src.addr;
+ dst_addr = parent_conn->rev_key.dst.addr;
alg_exp_node->nat_rpl_dst = false;
if (skip_nat) {
alg_nat_repl_addr = src_addr;
- } else if (master_conn->nat_info &&
- master_conn->nat_info->nat_action & NAT_ACTION_DST) {
- alg_nat_repl_addr = master_conn->key.dst.addr;
+ } else if (parent_conn->nat_info &&
+ parent_conn->nat_info->nat_action & NAT_ACTION_DST) {
+ alg_nat_repl_addr = parent_conn->key.dst.addr;
alg_exp_node->nat_rpl_dst = true;
} else {
- alg_nat_repl_addr = master_conn->key.src.addr;
+ alg_nat_repl_addr = parent_conn->key.src.addr;
}
}
if (src_ip_wc) {
memset(&src_addr, 0, sizeof src_addr);
}
- alg_exp_node->key.dl_type = master_conn->key.dl_type;
- alg_exp_node->key.nw_proto = master_conn->key.nw_proto;
- alg_exp_node->key.zone = master_conn->key.zone;
+ alg_exp_node->key.dl_type = parent_conn->key.dl_type;
+ alg_exp_node->key.nw_proto = parent_conn->key.nw_proto;
+ alg_exp_node->key.zone = parent_conn->key.zone;
alg_exp_node->key.src.addr = src_addr;
alg_exp_node->key.dst.addr = dst_addr;
alg_exp_node->key.src.port = ALG_WC_SRC_PORT;
alg_exp_node->key.dst.port = dst_port;
- alg_exp_node->master_mark = master_conn->mark;
- alg_exp_node->master_label = master_conn->label;
- memcpy(&alg_exp_node->master_key, &master_conn->key,
- sizeof alg_exp_node->master_key);
+ alg_exp_node->parent_mark = parent_conn->mark;
+ alg_exp_node->parent_label = parent_conn->label;
+ memcpy(&alg_exp_node->parent_key, &parent_conn->key,
+ sizeof alg_exp_node->parent_key);
/* Take the write lock here because it is almost 100%
* likely that the lookup will fail and
* expectation_create() will be called below. */
diff --git a/lib/conntrack.h b/lib/conntrack.h
index b0d0fc8d9597a45248424f07d864e93ba5c5d702..9553b188a41046642c0a56b2a44f93f932ca0444 100644
--- a/lib/conntrack.h
+++ b/lib/conntrack.h
@@ -20,6 +20,7 @@
#include
#include "cmap.h"
+#include "ct-dpif.h"
#include "latch.h"
#include "odp-netlink.h"
#include "openvswitch/hmap.h"
@@ -93,7 +94,7 @@ int conntrack_execute(struct conntrack *ct, struct dp_packet_batch *pkt_batch,
const struct ovs_key_ct_labels *setlabel,
ovs_be16 tp_src, ovs_be16 tp_dst, const char *helper,
const struct nat_action_info_t *nat_action_info,
- long long now);
+ long long now, uint32_t tp_id);
void conntrack_clear(struct dp_packet *packet);
struct conntrack_dump {
@@ -111,6 +112,11 @@ struct conntrack_zone_limit {
uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */
};
+struct timeout_policy {
+ struct hmap_node node;
+ struct ct_dpif_timeout_policy policy;
+};
+
enum {
INVALID_ZONE = -2,
DEFAULT_ZONE = -1, /* Default zone for zone limit management. */
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index 8c2480e7ac39dc72fbc718d3901465bccab9942b..6a5ba052dd2054a5af3ab05ac4a6bfa3900812a9 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -323,9 +323,9 @@ ct_dpif_format_entry(const struct ct_dpif_entry *entry, struct ds *ds,
}
ct_dpif_format_protoinfo(ds, ",protoinfo=", &entry->protoinfo, verbose);
ct_dpif_format_helper(ds, ",helper=", &entry->helper);
- if (verbose && entry->tuple_master.l3_type != 0) {
- ds_put_cstr(ds, ",master=(");
- ct_dpif_format_tuple(ds, &entry->tuple_master);
+ if (verbose && entry->tuple_parent.l3_type != 0) {
+ ds_put_cstr(ds, ",parent=(");
+ ct_dpif_format_tuple(ds, &entry->tuple_parent);
ds_put_cstr(ds, ")");
}
}
diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index 3e227d9e3b6e2aa14eab9cf2cc4bd0610f09a64e..88f4c7e28cbffa57509ed98727f0c10276f922ad 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -59,6 +59,8 @@ struct ct_dpif_timestamp {
uint64_t stop;
};
+#define DEFAULT_TP_ID 0
+
#define CT_DPIF_TCP_STATES \
CT_DPIF_TCP_STATE(CLOSED) \
CT_DPIF_TCP_STATE(LISTEN) \
@@ -175,7 +177,7 @@ struct ct_dpif_entry {
/* Const members. */
struct ct_dpif_tuple tuple_orig;
struct ct_dpif_tuple tuple_reply;
- struct ct_dpif_tuple tuple_master;
+ struct ct_dpif_tuple tuple_parent;
struct ct_dpif_helper helper;
uint32_t id;
uint16_t zone;
diff --git a/lib/daemon-private.h b/lib/daemon-private.h
index 4e0667601001239e3381dc602206a79a2e22232b..2b90e004235c1bb9ad73bdd99b91abb16e7392d6 100644
--- a/lib/daemon-private.h
+++ b/lib/daemon-private.h
@@ -20,6 +20,7 @@
extern bool detach;
extern char *pidfile;
extern int daemonize_fd;
+extern bool monitor;
char *make_pidfile_name(const char *name);
diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
index 7e48630f0e93abdc26d9205314a14e5ee65c880f..ae59ecf2c2b515531219b670cc18a86e1dda26ab 100644
--- a/lib/daemon-unix.c
+++ b/lib/daemon-unix.c
@@ -80,7 +80,7 @@ int daemonize_fd = -1;
/* --monitor: Should a supervisory process monitor the daemon and restart it if
* it dies due to an error signal? */
-static bool monitor;
+bool monitor;
/* --user: Only root can use this option. Switch to new uid:gid after
* initially running as root. */
@@ -434,8 +434,8 @@ monitor_daemon(pid_t daemon_pid)
/* If daemonization is configured, then starts daemonization, by forking and
* returning in the child process. The parent process hangs around until the
* child lets it know either that it completed startup successfully (by calling
- * daemon_complete()) or that it failed to start up (by exiting with a nonzero
- * exit code). */
+ * daemonize_complete()) or that it failed to start up (by exiting with a
+ * nonzero exit code). */
void
daemonize_start(bool access_datapath)
{
diff --git a/lib/daemon.man b/lib/daemon.man
index 68c0a312db6a49af3d65d9d67bf042911d4f820e..a92f8c4d5a1de91880749a827d4e000479ab90ec 100644
--- a/lib/daemon.man
+++ b/lib/daemon.man
@@ -58,7 +58,7 @@ This option has no effect when \fB\-\-detach\fR is not specified.
.TP
\fB\-\-no\-self\-confinement\fR
By default daemon will try to self-confine itself to work with
-files under well-know, at build-time whitelisted directories. It
+files under well-known directories determined during build. It
is better to stick with this default behavior and not to use this
flag unless some other Access Control is used to confine daemon.
Note that in contrast to other access control implementations that
diff --git a/lib/daemon.xml b/lib/daemon.xml
index 1b5e8acae21a11acb7ac9851ba808652f740f750..5a421ccab385c30626d0ac89db5b16942dd79b19 100644
--- a/lib/daemon.xml
+++ b/lib/daemon.xml
@@ -82,7 +82,7 @@
--no-self-confinement
By default this daemon will try to self-confine itself to work with files
- under well-known directories whitelisted at build time. It is better to
+ under well-known directories determined at build time. It is better to
stick with this default behavior and not to use this flag unless some other
Access Control is used to confine daemon. Note that in contrast to other
access control implementations that are typically enforced from
diff --git a/lib/dns-resolve.c b/lib/dns-resolve.c
index 1ff58960fe01fa499e55cea99f4398ea36a090b2..d34451434386309d6991b1dd4d055873937d9744 100644
--- a/lib/dns-resolve.c
+++ b/lib/dns-resolve.c
@@ -82,6 +82,18 @@ dns_resolve_init(bool is_daemon)
return;
}
+ const char *ub_conf_filename = getenv("OVS_UNBOUND_CONF");
+ if (ub_conf_filename != NULL) {
+ int retval = ub_ctx_config(ub_ctx__, ub_conf_filename);
+ if (retval != 0) {
+ VLOG_WARN_RL(&rl, "Failed to set libunbound context config: %s",
+ ub_strerror(retval));
+ ub_ctx_delete(ub_ctx__);
+ ub_ctx__ = NULL;
+ return;
+ }
+ }
+
const char *filename = getenv("OVS_RESOLV_CONF");
if (!filename) {
#ifdef _WIN32
diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index cd2623500e3d868f92a00c9e466b6c95dba56c37..72f6d09ac7f3cc9a0830f6d952b587d36df74226 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -192,10 +192,8 @@ dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
sizeof(struct dp_packet) -
offsetof(struct dp_packet, l2_pad_size));
-#ifdef DPDK_NETDEV
- new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
- new_buffer->mbuf.ol_flags &= ~DPDK_MBUF_NON_OFFLOADING_FLAGS;
-#endif
+ *dp_packet_ol_flags_ptr(new_buffer) = *dp_packet_ol_flags_ptr(buffer);
+ *dp_packet_ol_flags_ptr(new_buffer) &= DP_PACKET_OL_SUPPORTED_MASK;
if (dp_packet_rss_valid(buffer)) {
dp_packet_set_rss_hash(new_buffer, dp_packet_get_rss_hash(buffer));
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 9f8991faad52049622cae7deca1fb0e27b237c6a..0430cca8ebbd6453ecde3344073f787f6939440a 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -48,18 +48,62 @@ enum OVS_PACKED_ENUM dp_packet_source {
#define DP_PACKET_CONTEXT_SIZE 64
-#ifndef DPDK_NETDEV
+#ifdef DPDK_NETDEV
+#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = DPDK_DEF
+#else
+#define DEF_OL_FLAG(NAME, DPDK_DEF, GENERIC_DEF) NAME = GENERIC_DEF
+#endif
+
/* Bit masks for the 'ol_flags' member of the 'dp_packet' structure. */
enum dp_packet_offload_mask {
- DP_PACKET_OL_RSS_HASH_MASK = 0x1, /* Is the 'rss_hash' valid? */
- DP_PACKET_OL_FLOW_MARK_MASK = 0x2, /* Is the 'flow_mark' valid? */
+ /* Value 0 is not used. */
+ /* Is the 'rss_hash' valid? */
+ DEF_OL_FLAG(DP_PACKET_OL_RSS_HASH, PKT_RX_RSS_HASH, 0x1),
+ /* Is the 'flow_mark' valid? */
+ DEF_OL_FLAG(DP_PACKET_OL_FLOW_MARK, PKT_RX_FDIR_ID, 0x2),
+ /* Bad L4 checksum in the packet. */
+ DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, 0x4),
+ /* Bad IP checksum in the packet. */
+ DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD, 0x8),
+ /* Valid L4 checksum in the packet. */
+ DEF_OL_FLAG(DP_PACKET_OL_RX_L4_CKSUM_GOOD, PKT_RX_L4_CKSUM_GOOD, 0x10),
+ /* Valid IP checksum in the packet. */
+ DEF_OL_FLAG(DP_PACKET_OL_RX_IP_CKSUM_GOOD, PKT_RX_IP_CKSUM_GOOD, 0x20),
+ /* TCP Segmentation Offload. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_SEG, PKT_TX_TCP_SEG, 0x40),
+ /* Offloaded packet is IPv4. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_IPV4, PKT_TX_IPV4, 0x80),
+ /* Offloaded packet is IPv6. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_IPV6, PKT_TX_IPV6, 0x100),
+ /* Offload TCP checksum. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_TCP_CKSUM, PKT_TX_TCP_CKSUM, 0x200),
+ /* Offload UDP checksum. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_UDP_CKSUM, PKT_TX_UDP_CKSUM, 0x400),
+ /* Offload SCTP checksum. */
+ DEF_OL_FLAG(DP_PACKET_OL_TX_SCTP_CKSUM, PKT_TX_SCTP_CKSUM, 0x800),
+ /* Adding new field requires adding to DP_PACKET_OL_SUPPORTED_MASK. */
};
-#else
-/* DPDK mbuf ol_flags that are not really an offload flags. These are mostly
- * related to mbuf memory layout and OVS should not touch/clear them. */
-#define DPDK_MBUF_NON_OFFLOADING_FLAGS (EXT_ATTACHED_MBUF | \
- IND_ATTACHED_MBUF)
-#endif
+
+#define DP_PACKET_OL_SUPPORTED_MASK (DP_PACKET_OL_RSS_HASH | \
+ DP_PACKET_OL_FLOW_MARK | \
+ DP_PACKET_OL_RX_L4_CKSUM_BAD | \
+ DP_PACKET_OL_RX_IP_CKSUM_BAD | \
+ DP_PACKET_OL_RX_L4_CKSUM_GOOD | \
+ DP_PACKET_OL_RX_IP_CKSUM_GOOD | \
+ DP_PACKET_OL_TX_TCP_SEG | \
+ DP_PACKET_OL_TX_IPV4 | \
+ DP_PACKET_OL_TX_IPV6 | \
+ DP_PACKET_OL_TX_TCP_CKSUM | \
+ DP_PACKET_OL_TX_UDP_CKSUM | \
+ DP_PACKET_OL_TX_SCTP_CKSUM)
+
+#define DP_PACKET_OL_TX_L4_MASK (DP_PACKET_OL_TX_TCP_CKSUM | \
+ DP_PACKET_OL_TX_UDP_CKSUM | \
+ DP_PACKET_OL_TX_SCTP_CKSUM)
+#define DP_PACKET_OL_RX_IP_CKSUM_MASK (DP_PACKET_OL_RX_IP_CKSUM_GOOD | \
+ DP_PACKET_OL_RX_IP_CKSUM_BAD)
+#define DP_PACKET_OL_RX_L4_CKSUM_MASK (DP_PACKET_OL_RX_L4_CKSUM_GOOD | \
+ DP_PACKET_OL_RX_L4_CKSUM_BAD)
/* Buffer for holding packet data. A dp_packet is automatically reallocated
* as necessary if it grows too large for the available memory.
@@ -450,6 +494,45 @@ dp_packet_get_nd_payload(const struct dp_packet *b)
? (const char *)dp_packet_l4(b) + ND_MSG_LEN : NULL;
}
+#ifdef DPDK_NETDEV
+static inline uint64_t *
+dp_packet_ol_flags_ptr(const struct dp_packet *b)
+{
+ return CONST_CAST(uint64_t *, &b->mbuf.ol_flags);
+}
+
+static inline uint32_t *
+dp_packet_rss_ptr(const struct dp_packet *b)
+{
+ return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
+}
+
+static inline uint32_t *
+dp_packet_flow_mark_ptr(const struct dp_packet *b)
+{
+ return CONST_CAST(uint32_t *, &b->mbuf.hash.fdir.hi);
+}
+
+#else
+static inline uint32_t *
+dp_packet_ol_flags_ptr(const struct dp_packet *b)
+{
+ return CONST_CAST(uint32_t *, &b->ol_flags);
+}
+
+static inline uint32_t *
+dp_packet_rss_ptr(const struct dp_packet *b)
+{
+ return CONST_CAST(uint32_t *, &b->rss_hash);
+}
+
+static inline uint32_t *
+dp_packet_flow_mark_ptr(const struct dp_packet *b)
+{
+ return CONST_CAST(uint32_t *, &b->flow_mark);
+}
+#endif
+
#ifdef DPDK_NETDEV
BUILD_ASSERT_DECL(offsetof(struct dp_packet, mbuf) == 0);
@@ -521,168 +604,6 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->mbuf.buf_len = s;
}
-/* Returns 'true' if packet 'b' is marked for TCP segmentation offloading. */
-static inline bool
-dp_packet_hwol_is_tso(const struct dp_packet *b)
-{
- return !!(b->mbuf.ol_flags & PKT_TX_TCP_SEG);
-}
-
-/* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */
-static inline bool
-dp_packet_hwol_is_ipv4(const struct dp_packet *b)
-{
- return !!(b->mbuf.ol_flags & PKT_TX_IPV4);
-}
-
-/* Returns the L4 cksum offload bitmask. */
-static inline uint64_t
-dp_packet_hwol_l4_mask(const struct dp_packet *b)
-{
- return b->mbuf.ol_flags & PKT_TX_L4_MASK;
-}
-
-/* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */
-static inline bool
-dp_packet_hwol_l4_is_tcp(const struct dp_packet *b)
-{
- return (b->mbuf.ol_flags & PKT_TX_L4_MASK) == PKT_TX_TCP_CKSUM;
-}
-
-/* Returns 'true' if packet 'b' is marked for UDP checksum offloading. */
-static inline bool
-dp_packet_hwol_l4_is_udp(struct dp_packet *b)
-{
- return (b->mbuf.ol_flags & PKT_TX_L4_MASK) == PKT_TX_UDP_CKSUM;
-}
-
-/* Returns 'true' if packet 'b' is marked for SCTP checksum offloading. */
-static inline bool
-dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
-{
- return (b->mbuf.ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM;
-}
-
-/* Mark packet 'b' for IPv4 checksum offloading. */
-static inline void
-dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
-{
- b->mbuf.ol_flags |= PKT_TX_IPV4;
-}
-
-/* Mark packet 'b' for IPv6 checksum offloading. */
-static inline void
-dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
-{
- b->mbuf.ol_flags |= PKT_TX_IPV6;
-}
-
-/* Mark packet 'b' for TCP checksum offloading. It implies that either
- * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
-static inline void
-dp_packet_hwol_set_csum_tcp(struct dp_packet *b)
-{
- b->mbuf.ol_flags |= PKT_TX_TCP_CKSUM;
-}
-
-/* Mark packet 'b' for UDP checksum offloading. It implies that either
- * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
-static inline void
-dp_packet_hwol_set_csum_udp(struct dp_packet *b)
-{
- b->mbuf.ol_flags |= PKT_TX_UDP_CKSUM;
-}
-
-/* Mark packet 'b' for SCTP checksum offloading. It implies that either
- * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
-static inline void
-dp_packet_hwol_set_csum_sctp(struct dp_packet *b)
-{
- b->mbuf.ol_flags |= PKT_TX_SCTP_CKSUM;
-}
-
-/* Mark packet 'b' for TCP segmentation offloading. It implies that
- * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading
- * and also for TCP checksum offloading. */
-static inline void
-dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
-{
- b->mbuf.ol_flags |= PKT_TX_TCP_SEG;
-}
-
-/* Returns the RSS hash of the packet 'p'. Note that the returned value is
- * correct only if 'dp_packet_rss_valid(p)' returns true */
-static inline uint32_t
-dp_packet_get_rss_hash(const struct dp_packet *p)
-{
- return p->mbuf.hash.rss;
-}
-
-static inline void
-dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash)
-{
- p->mbuf.hash.rss = hash;
- p->mbuf.ol_flags |= PKT_RX_RSS_HASH;
-}
-
-static inline bool
-dp_packet_rss_valid(const struct dp_packet *p)
-{
- return p->mbuf.ol_flags & PKT_RX_RSS_HASH;
-}
-
-static inline void
-dp_packet_reset_offload(struct dp_packet *p)
-{
- p->mbuf.ol_flags &= DPDK_MBUF_NON_OFFLOADING_FLAGS;
-}
-
-static inline bool
-dp_packet_ip_checksum_valid(const struct dp_packet *p)
-{
- return (p->mbuf.ol_flags & PKT_RX_IP_CKSUM_MASK) ==
- PKT_RX_IP_CKSUM_GOOD;
-}
-
-static inline bool
-dp_packet_ip_checksum_bad(const struct dp_packet *p)
-{
- return (p->mbuf.ol_flags & PKT_RX_IP_CKSUM_MASK) ==
- PKT_RX_IP_CKSUM_BAD;
-}
-
-static inline bool
-dp_packet_l4_checksum_valid(const struct dp_packet *p)
-{
- return (p->mbuf.ol_flags & PKT_RX_L4_CKSUM_MASK) ==
- PKT_RX_L4_CKSUM_GOOD;
-}
-
-static inline bool
-dp_packet_l4_checksum_bad(const struct dp_packet *p)
-{
- return (p->mbuf.ol_flags & PKT_RX_L4_CKSUM_MASK) ==
- PKT_RX_L4_CKSUM_BAD;
-}
-
-static inline bool
-dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark)
-{
- if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
- *mark = p->mbuf.hash.fdir.hi;
- return true;
- }
-
- return false;
-}
-
-static inline void
-dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
-{
- p->mbuf.hash.fdir.hi = mark;
- p->mbuf.ol_flags |= PKT_RX_FDIR_ID;
-}
-
#else /* DPDK_NETDEV */
static inline void
@@ -739,151 +660,6 @@ dp_packet_set_allocated(struct dp_packet *b, uint16_t s)
b->allocated_ = s;
}
-/* There are no implementation when not DPDK enabled datapath. */
-static inline bool
-dp_packet_hwol_is_tso(const struct dp_packet *b OVS_UNUSED)
-{
- return false;
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline bool
-dp_packet_hwol_is_ipv4(const struct dp_packet *b OVS_UNUSED)
-{
- return false;
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline uint64_t
-dp_packet_hwol_l4_mask(const struct dp_packet *b OVS_UNUSED)
-{
- return 0;
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline bool
-dp_packet_hwol_l4_is_tcp(const struct dp_packet *b OVS_UNUSED)
-{
- return false;
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline bool
-dp_packet_hwol_l4_is_udp(const struct dp_packet *b OVS_UNUSED)
-{
- return false;
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline bool
-dp_packet_hwol_l4_is_sctp(const struct dp_packet *b OVS_UNUSED)
-{
- return false;
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline void
-dp_packet_hwol_set_tx_ipv4(struct dp_packet *b OVS_UNUSED)
-{
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline void
-dp_packet_hwol_set_tx_ipv6(struct dp_packet *b OVS_UNUSED)
-{
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline void
-dp_packet_hwol_set_csum_tcp(struct dp_packet *b OVS_UNUSED)
-{
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline void
-dp_packet_hwol_set_csum_udp(struct dp_packet *b OVS_UNUSED)
-{
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline void
-dp_packet_hwol_set_csum_sctp(struct dp_packet *b OVS_UNUSED)
-{
-}
-
-/* There are no implementation when not DPDK enabled datapath. */
-static inline void
-dp_packet_hwol_set_tcp_seg(struct dp_packet *b OVS_UNUSED)
-{
-}
-
-/* Returns the RSS hash of the packet 'p'. Note that the returned value is
- * correct only if 'dp_packet_rss_valid(p)' returns true */
-static inline uint32_t
-dp_packet_get_rss_hash(const struct dp_packet *p)
-{
- return p->rss_hash;
-}
-
-static inline void
-dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash)
-{
- p->rss_hash = hash;
- p->ol_flags |= DP_PACKET_OL_RSS_HASH_MASK;
-}
-
-static inline bool
-dp_packet_rss_valid(const struct dp_packet *p)
-{
- return p->ol_flags & DP_PACKET_OL_RSS_HASH_MASK;
-}
-
-static inline void
-dp_packet_reset_offload(struct dp_packet *p)
-{
- p->ol_flags = 0;
-}
-
-static inline bool
-dp_packet_ip_checksum_valid(const struct dp_packet *p OVS_UNUSED)
-{
- return false;
-}
-
-static inline bool
-dp_packet_ip_checksum_bad(const struct dp_packet *p OVS_UNUSED)
-{
- return false;
-}
-
-static inline bool
-dp_packet_l4_checksum_valid(const struct dp_packet *p OVS_UNUSED)
-{
- return false;
-}
-
-static inline bool
-dp_packet_l4_checksum_bad(const struct dp_packet *p OVS_UNUSED)
-{
- return false;
-}
-
-static inline bool
-dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark)
-{
- if (p->ol_flags & DP_PACKET_OL_FLOW_MARK_MASK) {
- *mark = p->flow_mark;
- return true;
- }
- return false;
-}
-
-static inline void
-dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
-{
- p->flow_mark = mark;
- p->ol_flags |= DP_PACKET_OL_FLOW_MARK_MASK;
-}
#endif /* DPDK_NETDEV */
static inline void
@@ -1112,6 +888,58 @@ dp_packet_batch_reset_cutlen(struct dp_packet_batch *batch)
}
}
+/* Returns the RSS hash of the packet 'p'. Note that the returned value is
+ * correct only if 'dp_packet_rss_valid(p)' returns 'true'. */
+static inline uint32_t
+dp_packet_get_rss_hash(const struct dp_packet *p)
+{
+ return *dp_packet_rss_ptr(p);
+}
+
+static inline void
+dp_packet_set_rss_hash(struct dp_packet *p, uint32_t hash)
+{
+ *dp_packet_rss_ptr(p) = hash;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_RSS_HASH;
+}
+
+static inline bool
+dp_packet_rss_valid(const struct dp_packet *p)
+{
+ return *dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RSS_HASH;
+}
+
+static inline void
+dp_packet_reset_offload(struct dp_packet *p)
+{
+ *dp_packet_ol_flags_ptr(p) &= ~DP_PACKET_OL_SUPPORTED_MASK;
+}
+
+static inline bool
+dp_packet_has_flow_mark(const struct dp_packet *p, uint32_t *mark)
+{
+ if (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_FLOW_MARK) {
+ *mark = *dp_packet_flow_mark_ptr(p);
+ return true;
+ }
+
+ return false;
+}
+
+static inline void
+dp_packet_set_flow_mark(struct dp_packet *p, uint32_t mark)
+{
+ *dp_packet_flow_mark_ptr(p) = mark;
+ *dp_packet_ol_flags_ptr(p) |= DP_PACKET_OL_FLOW_MARK;
+}
+
+/* Returns the L4 cksum offload bitmask. */
+static inline uint64_t
+dp_packet_hwol_l4_mask(const struct dp_packet *b)
+{
+ return *dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK;
+}
+
/* Return true if the packet 'b' requested L4 checksum offload. */
static inline bool
dp_packet_hwol_tx_l4_checksum(const struct dp_packet *b)
@@ -1119,6 +947,119 @@ dp_packet_hwol_tx_l4_checksum(const struct dp_packet *b)
return !!dp_packet_hwol_l4_mask(b);
}
+/* Returns 'true' if packet 'b' is marked for TCP segmentation offloading. */
+static inline bool
+dp_packet_hwol_is_tso(const struct dp_packet *b)
+{
+ return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_TCP_SEG);
+}
+
+/* Returns 'true' if packet 'b' is marked for IPv4 checksum offloading. */
+static inline bool
+dp_packet_hwol_is_ipv4(const struct dp_packet *b)
+{
+ return !!(*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_IPV4);
+}
+
+/* Returns 'true' if packet 'b' is marked for TCP checksum offloading. */
+static inline bool
+dp_packet_hwol_l4_is_tcp(const struct dp_packet *b)
+{
+ return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK) ==
+ DP_PACKET_OL_TX_TCP_CKSUM;
+}
+
+/* Returns 'true' if packet 'b' is marked for UDP checksum offloading. */
+static inline bool
+dp_packet_hwol_l4_is_udp(struct dp_packet *b)
+{
+ return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK) ==
+ DP_PACKET_OL_TX_UDP_CKSUM;
+}
+
+/* Returns 'true' if packet 'b' is marked for SCTP checksum offloading. */
+static inline bool
+dp_packet_hwol_l4_is_sctp(struct dp_packet *b)
+{
+ return (*dp_packet_ol_flags_ptr(b) & DP_PACKET_OL_TX_L4_MASK) ==
+ DP_PACKET_OL_TX_SCTP_CKSUM;
+}
+
+/* Mark packet 'b' for IPv4 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_ipv4(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV4;
+}
+
+/* Mark packet 'b' for IPv6 checksum offloading. */
+static inline void
+dp_packet_hwol_set_tx_ipv6(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_IPV6;
+}
+
+/* Mark packet 'b' for TCP checksum offloading. It implies that either
+ * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
+static inline void
+dp_packet_hwol_set_csum_tcp(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_CKSUM;
+}
+
+/* Mark packet 'b' for UDP checksum offloading. It implies that either
+ * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
+static inline void
+dp_packet_hwol_set_csum_udp(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_UDP_CKSUM;
+}
+
+/* Mark packet 'b' for SCTP checksum offloading. It implies that either
+ * the packet 'b' is marked for IPv4 or IPv6 checksum offloading. */
+static inline void
+dp_packet_hwol_set_csum_sctp(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_SCTP_CKSUM;
+}
+
+/* Mark packet 'b' for TCP segmentation offloading. It implies that
+ * either the packet 'b' is marked for IPv4 or IPv6 checksum offloading
+ * and also for TCP checksum offloading. */
+static inline void
+dp_packet_hwol_set_tcp_seg(struct dp_packet *b)
+{
+ *dp_packet_ol_flags_ptr(b) |= DP_PACKET_OL_TX_TCP_SEG;
+}
+
+static inline bool
+dp_packet_ip_checksum_valid(const struct dp_packet *p)
+{
+ return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) ==
+ DP_PACKET_OL_RX_IP_CKSUM_GOOD;
+}
+
+static inline bool
+dp_packet_ip_checksum_bad(const struct dp_packet *p)
+{
+ return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_IP_CKSUM_MASK) ==
+ DP_PACKET_OL_RX_IP_CKSUM_BAD;
+}
+
+static inline bool
+dp_packet_l4_checksum_valid(const struct dp_packet *p)
+{
+ return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
+ DP_PACKET_OL_RX_L4_CKSUM_GOOD;
+}
+
+static inline bool
+dp_packet_l4_checksum_bad(const struct dp_packet *p)
+{
+ return (*dp_packet_ol_flags_ptr(p) & DP_PACKET_OL_RX_L4_CKSUM_MASK) ==
+ DP_PACKET_OL_RX_L4_CKSUM_BAD;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/dpctl.c b/lib/dpctl.c
index db2b1f8961aa31df81a091a356bc3d9f09345f45..33202813b5441ab423d4c6f5c108d19a7f339b5a 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -980,6 +980,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
struct dpif_flow_dump *flow_dump;
struct dpif_flow f;
int pmd_id = PMD_ID_NULL;
+ bool pmd_id_filter = false;
int lastargc = 0;
int error;
@@ -996,6 +997,16 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
goto out_free;
}
types_list = xstrdup(argv[--argc] + 5);
+ } else if (!strncmp(argv[argc - 1], "pmd=", 4)) {
+ if (!ovs_scan(argv[--argc], "pmd=%d", &pmd_id)) {
+ error = EINVAL;
+ goto out_free;
+ }
+
+ if (pmd_id == -1) {
+ pmd_id = NON_PMD_CORE_ID;
+ }
+ pmd_id_filter = true;
}
}
@@ -1031,7 +1042,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
memset(&dump_types, 0, sizeof dump_types);
error = populate_dump_types(types_list, &dump_types, dpctl_p);
if (error) {
- goto out_free;
+ goto out_dpifclose;
}
determine_dpif_flow_dump_types(&dump_types, &dpif_dump_types);
@@ -1070,7 +1081,7 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
/* If 'pmd_id' is specified, overlapping flows could be dumped from
* different pmd threads. So, separates dumps from different pmds
* by printing a title line. */
- if (pmd_id != f.pmd_id) {
+ if (!pmd_id_filter && pmd_id != f.pmd_id) {
if (f.pmd_id == NON_PMD_CORE_ID) {
ds_put_format(&ds, "flow-dump from the main thread:\n");
} else {
@@ -1079,7 +1090,8 @@ dpctl_dump_flows(int argc, const char *argv[], struct dpctl_params *dpctl_p)
}
pmd_id = f.pmd_id;
}
- if (flow_passes_type_filter(&f, &dump_types)) {
+ if (pmd_id == f.pmd_id &&
+ flow_passes_type_filter(&f, &dump_types)) {
format_dpif_flow(&ds, &f, portno_names, dpctl_p);
dpctl_print(dpctl_p, "%s\n", ds_cstr(&ds));
}
@@ -1157,6 +1169,16 @@ dpctl_put_flow(int argc, const char *argv[], enum dpif_flow_put_flags flags,
goto out_freeactions;
}
+ if (!ufid_present && dpctl_p->is_appctl) {
+ /* Generating UFID for this flow so it could be offloaded to HW. We're
+ * not doing that if invoked from ovs-dpctl utility because
+ * odp_flow_key_hash() uses randomly generated base for flow hashes
+ * that will be different for each invocation. And, anyway, offloading
+ * is only available via appctl. */
+ odp_flow_key_hash(key.data, key.size, &ufid);
+ ufid_present = true;
+ }
+
/* The flow will be added on all pmds currently in the datapath. */
error = dpif_flow_put(dpif, flags,
key.data, key.size,
@@ -1268,6 +1290,7 @@ dpctl_del_flow(int argc, const char *argv[], struct dpctl_params *dpctl_p)
struct ofpbuf mask; /* To be ignored. */
struct dpif *dpif;
ovs_u128 ufid;
+ bool ufid_generated;
bool ufid_present;
struct simap port_names;
int n, error;
@@ -1303,6 +1326,14 @@ dpctl_del_flow(int argc, const char *argv[], struct dpctl_params *dpctl_p)
goto out;
}
+ if (!ufid_present && dpctl_p->is_appctl) {
+ /* While adding flow via appctl we're generating UFID to make HW
+ * offloading possible. Generating UFID here to be sure that such
+ * flows could be removed the same way they were added. */
+ odp_flow_key_hash(key.data, key.size, &ufid);
+ ufid_present = ufid_generated = true;
+ }
+
/* The flow will be deleted from all pmds currently in the datapath. */
error = dpif_flow_del(dpif, key.data, key.size,
ufid_present ? &ufid : NULL, PMD_ID_NULL,
@@ -1310,7 +1341,7 @@ dpctl_del_flow(int argc, const char *argv[], struct dpctl_params *dpctl_p)
if (error) {
dpctl_error(dpctl_p, error, "deleting flow");
- if (error == ENOENT && !ufid_present) {
+ if (error == ENOENT && (!ufid_present || ufid_generated)) {
struct ds s;
ds_init(&s);
@@ -2503,8 +2534,8 @@ static const struct dpctl_command all_commands[] = {
{ "set-if", "dp iface...", 2, INT_MAX, dpctl_set_if, DP_RW },
{ "dump-dps", "", 0, 0, dpctl_dump_dps, DP_RO },
{ "show", "[dp...]", 0, INT_MAX, dpctl_show, DP_RO },
- { "dump-flows", "[dp] [filter=..] [type=..]",
- 0, 3, dpctl_dump_flows, DP_RO },
+ { "dump-flows", "[dp] [filter=..] [type=..] [pmd=..]",
+ 0, 4, dpctl_dump_flows, DP_RO },
{ "add-flow", "[dp] flow actions", 2, 3, dpctl_add_flow, DP_RW },
{ "mod-flow", "[dp] flow actions", 2, 3, dpctl_mod_flow, DP_RW },
{ "get-flow", "[dp] ufid", 1, 2, dpctl_get_flow, DP_RO },
diff --git a/lib/dpctl.man b/lib/dpctl.man
index 727d1f7be8d4bc006945e012b5eabeb64bad3424..0f63277861e520914bcabf8a34350f8e6565e72e 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -104,7 +104,7 @@ default. When multiple datapaths exist, then a datapath name is
required.
.
.TP
-.DO "[\fB\-m \fR| \fB\-\-more\fR] [\fB\-\-names \fR| \fB\-\-no\-names\fR]" \*(DX\fBdump\-flows\fR "[\fIdp\fR] [\fBfilter=\fIfilter\fR] [\fBtype=\fItype\fR]"
+.DO "[\fB\-m \fR| \fB\-\-more\fR] [\fB\-\-names \fR| \fB\-\-no\-names\fR]" \*(DX\fBdump\-flows\fR "[\fIdp\fR] [\fBfilter=\fIfilter\fR] [\fBtype=\fItype\fR] [\fBpmd=\fIpmd\fR]"
Prints to the console all flow entries in datapath \fIdp\fR's flow
table. Without \fB\-m\fR or \fB\-\-more\fR, output omits match fields
that a flow wildcards entirely; with \fB\-m\fR or \fB\-\-more\fR,
@@ -118,6 +118,10 @@ The \fIfilter\fR is also useful to match wildcarded fields in the datapath
flow. As an example, \fBfilter='tcp,tp_src=100'\fR will match the
datapath flow containing '\fBtcp(src=80/0xff00,dst=8080/0xff)\fR'.
.IP
+If \fBpmd=\fIpmd\fR is specified, only displays flows of the specified pmd.
+Using \fBpmd=\fI-1\fR will restrict the dump to flows from the main thread.
+This option is only supported by the \fBuserspace datapath\fR.
+.IP
If \fBtype=\fItype\fR is specified, only displays flows of the specified types.
This option supported only for \fBovs\-appctl dpctl/dump\-flows\fR.
\fItype\fR is a comma separated list, which can contain any of the following:
diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index c332c217cbfd80aac93e91309eb23e7bfb818261..b7d577870d879b0a9b60a020a82f2efe40ba8908 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -79,6 +79,15 @@ print_dpdk_version(void)
{
}
+bool
+dpdk_get_cpu_has_isa(const char *arch OVS_UNUSED,
+ const char *feature OVS_UNUSED)
+{
+ VLOG_ERR_ONCE("DPDK not supported in this version of Open vSwitch, "
+ "cannot use CPU flag based optimizations");
+ return false;
+}
+
void
dpdk_status(const struct ovsrec_open_vswitch *cfg)
{
diff --git a/lib/dpdk-unixctl.man b/lib/dpdk-unixctl.man
new file mode 100644
index 0000000000000000000000000000000000000000..2d6d576f24fbde20bb6dfaef8e39765872d19fc9
--- /dev/null
+++ b/lib/dpdk-unixctl.man
@@ -0,0 +1,14 @@
+.SS "DPDK COMMANDS"
+These commands manage DPDK components.
+.IP "\fBdpdk/log-list\fR"
+Lists all DPDK components that emit logs and their logging levels.
+.IP "\fBdpdk/log-set\fR [\fIspec\fR]"
+Sets DPDK components logging level. Without any \fIspec\fR, sets the logging
+\fBlevel\fR for all DPDK components to \fBdebug\fR. Otherwise, \fIspec\fR is a
+list of words separated by spaces: a word can be either a logging \fBlevel\fR
+(\fBemergency\fR, \fBalert\fR, \fBcritical\fR, \fBerror\fR, \fBwarning\fR,
+\fBnotice\fR, \fBinfo\fR or \fBdebug\fR) or a \fBpattern\fR matching DPDK
+components (see \fBdpdk/log-list\fR command on \fBovs\-appctl\fR(8)) separated
+by a colon from the logging \fBlevel\fR to apply.
+.RE
+.
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 37ea2973ce865e5466f897d3edadae1cf2cd801f..319540394ba251fb3586a608f118e019b3ca2a56 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -22,13 +22,11 @@
#include
#include
+#include
#include
#include
#include
#include
-#ifdef DPDK_PDUMP
-#include
-#endif
#include "dirs.h"
#include "fatal-signal.h"
@@ -39,6 +37,7 @@
#include "ovs-numa.h"
#include "smap.h"
#include "svec.h"
+#include "unixctl.h"
#include "util.h"
#include "vswitch-idl.h"
@@ -264,6 +263,99 @@ static cookie_io_functions_t dpdk_log_func = {
.write = dpdk_log_write,
};
+static void
+dpdk_unixctl_mem_stream(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED, void *aux)
+{
+ void (*callback)(FILE *) = aux;
+ char *response = NULL;
+ FILE *stream;
+ size_t size;
+
+ stream = open_memstream(&response, &size);
+ if (!stream) {
+ response = xasprintf("Unable to open memstream: %s.",
+ ovs_strerror(errno));
+ unixctl_command_reply_error(conn, response);
+ goto out;
+ }
+
+ callback(stream);
+ fclose(stream);
+ unixctl_command_reply(conn, response);
+out:
+ free(response);
+}
+
+static int
+dpdk_parse_log_level(const char *s)
+{
+ static const char * const levels[] = {
+ [RTE_LOG_EMERG] = "emergency",
+ [RTE_LOG_ALERT] = "alert",
+ [RTE_LOG_CRIT] = "critical",
+ [RTE_LOG_ERR] = "error",
+ [RTE_LOG_WARNING] = "warning",
+ [RTE_LOG_NOTICE] = "notice",
+ [RTE_LOG_INFO] = "info",
+ [RTE_LOG_DEBUG] = "debug",
+ };
+ int i;
+
+ for (i = 1; i < ARRAY_SIZE(levels); ++i) {
+ if (!strcmp(s, levels[i])) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void
+dpdk_unixctl_log_set(struct unixctl_conn *conn, int argc, const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ int i;
+
+ /* With no argument, set all components level to 'debug'. */
+ if (argc == 1) {
+ rte_log_set_level_pattern("*", RTE_LOG_DEBUG);
+ }
+ for (i = 1; i < argc; i++) {
+ char *err_msg = NULL;
+ char *level_string;
+ char *pattern;
+ char *s;
+ int level;
+
+ s = xstrdup(argv[i]);
+ level_string = strchr(s, ':');
+ if (level_string == NULL) {
+ pattern = "*";
+ level_string = s;
+ } else {
+ pattern = s;
+ level_string[0] = '\0';
+ level_string++;
+ }
+
+ level = dpdk_parse_log_level(level_string);
+ if (level == -1) {
+ err_msg = xasprintf("invalid log level: '%s'", level_string);
+ } else if (rte_log_set_level_pattern(pattern, level) < 0) {
+ err_msg = xasprintf("cannot set log level for '%s'", argv[i]);
+ }
+
+ if (err_msg) {
+ unixctl_command_reply_error(conn, err_msg);
+ free(err_msg);
+ free(s);
+ return;
+ }
+ free(s);
+ }
+ unixctl_command_reply(conn, NULL);
+}
+
static bool
dpdk_init__(const struct smap *ovs_other_config)
{
@@ -351,7 +443,7 @@ dpdk_init__(const struct smap *ovs_other_config)
/**
* NOTE: This is an unsophisticated mechanism for determining the DPDK
- * lcore for the DPDK Master.
+ * main core.
*/
if (auto_determine) {
const struct ovs_numa_info_core *core;
@@ -416,30 +508,27 @@ dpdk_init__(const struct smap *ovs_other_config)
FILE *stream = open_memstream(&response, &size);
if (stream) {
+ fprintf(stream, "rte_memzone_dump:\n");
rte_memzone_dump(stream);
+ fprintf(stream, "rte_log_dump:\n");
+ rte_log_dump(stream);
fclose(stream);
- if (size) {
- VLOG_DBG("rte_memzone_dump:\n%s", response);
- }
+ VLOG_DBG("%s", response);
free(response);
} else {
- VLOG_DBG("Could not dump memzone. Unable to open memstream: %s.",
- ovs_strerror(errno));
+ VLOG_DBG("Could not dump memzone and log levels. "
+ "Unable to open memstream: %s.", ovs_strerror(errno));
}
}
+ unixctl_command_register("dpdk/log-list", "", 0, 0,
+ dpdk_unixctl_mem_stream, rte_log_dump);
+ unixctl_command_register("dpdk/log-set", "{level | pattern:level}", 0,
+ INT_MAX, dpdk_unixctl_log_set, NULL);
+
/* We are called from the main thread here */
RTE_PER_LCORE(_lcore_id) = NON_PMD_CORE_ID;
-#ifdef DPDK_PDUMP
- VLOG_WARN("DPDK pdump support is deprecated and "
- "will be removed in next OVS releases.");
- err = rte_pdump_init();
- if (err) {
- VLOG_INFO("Error initialising DPDK pdump");
- }
-#endif
-
/* Finally, register the dpdk classes */
netdev_dpdk_register();
netdev_register_flow_api_provider(&netdev_offload_dpdk);
@@ -525,6 +614,35 @@ print_dpdk_version(void)
puts(rte_version());
}
+#define CHECK_CPU_FEATURE(feature, name_str, RTE_CPUFLAG) \
+ do { \
+ if (strncmp(feature, name_str, strlen(name_str)) == 0) { \
+ int has_isa = rte_cpu_get_flag_enabled(RTE_CPUFLAG); \
+ VLOG_DBG("CPU flag %s, available %s\n", name_str, \
+ has_isa ? "yes" : "no"); \
+ return true; \
+ } \
+ } while (0)
+
+bool
+dpdk_get_cpu_has_isa(const char *arch, const char *feature)
+{
+ /* Ensure Arch is x86_64. */
+ if (strncmp(arch, "x86_64", 6) != 0) {
+ return false;
+ }
+
+#if __x86_64__
+ /* CPU flags only defined for the architecture that support it. */
+ CHECK_CPU_FEATURE(feature, "avx512f", RTE_CPUFLAG_AVX512F);
+ CHECK_CPU_FEATURE(feature, "bmi2", RTE_CPUFLAG_BMI2);
+#endif
+
+ VLOG_WARN("Unknown CPU arch,feature: %s,%s. Returning not supported.\n",
+ arch, feature);
+ return false;
+}
+
void
dpdk_status(const struct ovsrec_open_vswitch *cfg)
{
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 736a64279e3119bb285be522d99fd09fab66e385..445a51d065fe5b917190f66013f7334b597f66f1 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -44,4 +44,6 @@ bool dpdk_per_port_memory(void);
bool dpdk_available(void);
void print_dpdk_version(void);
void dpdk_status(const struct ovsrec_open_vswitch *);
+bool dpdk_get_cpu_has_isa(const char *arch, const char *feature);
+
#endif /* dpdk.h */
diff --git a/lib/dpif-netdev-lookup-autovalidator.c b/lib/dpif-netdev-lookup-autovalidator.c
new file mode 100644
index 0000000000000000000000000000000000000000..97b59fdd006f69f7e3015abe39d8d930428cdf00
--- /dev/null
+++ b/lib/dpif-netdev-lookup-autovalidator.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include "dpif-netdev.h"
+#include "dpif-netdev-lookup.h"
+#include "dpif-netdev-private.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_lookup_autovalidator);
+
+/* This file implements an automated validator for subtable search
+ * implementations. It compares the results of the generic scalar search result
+ * with ISA optimized implementations.
+ *
+ * Note the goal is *NOT* to test the *specialized* versions of subtables, as
+ * the compiler performs the specialization - and we rely on the correctness of
+ * the compiler to not break those specialized variants.
+ *
+ * The goal is to ensure identical results of the different implementations,
+ * despite that the implementations may have different methods to get those
+ * results.
+ *
+ * Example: AVX-512 ISA uses different instructions and algorithm to the scalar
+ * implementation, however the results (rules[] output) must be the same.
+ */
+
+dpcls_subtable_lookup_func
+dpcls_subtable_autovalidator_probe(uint32_t u0 OVS_UNUSED,
+ uint32_t u1 OVS_UNUSED);
+
+static uint32_t
+dpcls_subtable_autovalidator(struct dpcls_subtable *subtable,
+ uint32_t keys_map,
+ const struct netdev_flow_key *keys[],
+ struct dpcls_rule **rules_good)
+{
+ const uint32_t u0_bit_count = subtable->mf_bits_set_unit0;
+ const uint32_t u1_bit_count = subtable->mf_bits_set_unit1;
+
+ /* Scalar generic - the "known correct" version. */
+ dpcls_subtable_lookup_func lookup_good;
+ lookup_good = dpcls_subtable_generic_probe(u0_bit_count, u1_bit_count);
+
+ /* Run actual scalar implementation to get known good results. */
+ uint32_t matches_good = lookup_good(subtable, keys_map, keys, rules_good);
+
+ struct dpcls_subtable_lookup_info_t *lookup_funcs;
+ int32_t lookup_func_count = dpcls_subtable_lookup_info_get(&lookup_funcs);
+ if (lookup_func_count < 0) {
+ VLOG_ERR("failed to get lookup subtable function implementations\n");
+ return 0;
+ }
+
+ /* Ensure the autovalidator is the 0th item in the lookup_funcs array. */
+ ovs_assert(lookup_funcs[0].probe(0, 0) == dpcls_subtable_autovalidator);
+
+ /* Now compare all other implementations against known good results.
+ * Note we start iterating from array[1], as 0 is the autotester itself.
+ */
+ for (int i = 1; i < lookup_func_count; i++) {
+ dpcls_subtable_lookup_func lookup_func;
+ lookup_func = lookup_funcs[i].probe(u0_bit_count,
+ u1_bit_count);
+
+ /* If its probe returns a function, then test it. */
+ if (lookup_func) {
+ struct dpcls_rule *rules_test[NETDEV_MAX_BURST];
+ size_t rules_size = sizeof(struct dpcls_rule *) * NETDEV_MAX_BURST;
+ memset(rules_test, 0, rules_size);
+ uint32_t matches_test = lookup_func(subtable, keys_map, keys,
+ rules_test);
+
+ /* Ensure same packets matched against subtable. */
+ if (matches_good != matches_test) {
+ VLOG_ERR("matches_good 0x%x != matches_test 0x%x in func %s\n",
+ matches_good, matches_test, lookup_funcs[i].name);
+ }
+
+ /* Ensure rules matched are the same for scalar / others. */
+ int j;
+ ULLONG_FOR_EACH_1 (j, matches_test) {
+ ovs_assert(rules_good[j] == rules_test[j]);
+ }
+ }
+ }
+
+ return matches_good;
+}
+
+dpcls_subtable_lookup_func
+dpcls_subtable_autovalidator_probe(uint32_t u0 OVS_UNUSED,
+ uint32_t u1 OVS_UNUSED)
+{
+ /* Always return the same validator tester, it works for all subtables. */
+ return dpcls_subtable_autovalidator;
+}
diff --git a/lib/dpif-netdev-lookup-avx512-gather.c b/lib/dpif-netdev-lookup-avx512-gather.c
new file mode 100644
index 0000000000000000000000000000000000000000..5e3634249d818877642925bcb808351c0f9a0957
--- /dev/null
+++ b/lib/dpif-netdev-lookup-avx512-gather.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2020, Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __x86_64__
+#if !defined(__CHECKER__)
+
+#include
+
+#include "dpif-netdev.h"
+#include "dpif-netdev-lookup.h"
+#include "dpif-netdev-private.h"
+#include "cmap.h"
+#include "flow.h"
+#include "pvector.h"
+#include "openvswitch/vlog.h"
+
+#include "immintrin.h"
+
+/* Each AVX512 register (zmm register in assembly notation) can contain up to
+ * 512 bits, which is equivalent to 8 uint64_t variables. This is the maximum
+ * number of miniflow blocks that can be processed in a single pass of the
+ * AVX512 code at a time.
+ */
+#define NUM_U64_IN_ZMM_REG (8)
+#define BLOCKS_CACHE_SIZE (NETDEV_MAX_BURST * NUM_U64_IN_ZMM_REG)
+
+
+VLOG_DEFINE_THIS_MODULE(dpif_lookup_avx512_gather);
+
+static inline __m512i
+_mm512_popcnt_epi64_manual(__m512i v_in)
+{
+ static const uint8_t pop_lut[64] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ };
+ __m512i v_pop_lut = _mm512_loadu_si512(pop_lut);
+
+ __m512i v_in_srl8 = _mm512_srli_epi64(v_in, 4);
+ __m512i v_nibble_mask = _mm512_set1_epi8(0xF);
+ __m512i v_in_lo = _mm512_and_si512(v_in, v_nibble_mask);
+ __m512i v_in_hi = _mm512_and_si512(v_in_srl8, v_nibble_mask);
+
+ __m512i v_lo_pop = _mm512_shuffle_epi8(v_pop_lut, v_in_lo);
+ __m512i v_hi_pop = _mm512_shuffle_epi8(v_pop_lut, v_in_hi);
+ __m512i v_u8_pop = _mm512_add_epi8(v_lo_pop, v_hi_pop);
+
+ return _mm512_sad_epu8(v_u8_pop, _mm512_setzero_si512());
+}
+
+static inline uint64_t
+netdev_rule_matches_key(const struct dpcls_rule *rule,
+ const uint32_t mf_bits_total,
+ const uint64_t * block_cache)
+{
+ const uint64_t *keyp = miniflow_get_values(&rule->flow.mf);
+ const uint64_t *maskp = miniflow_get_values(&rule->mask->mf);
+ const uint32_t lane_mask = (1 << mf_bits_total) - 1;
+
+ /* Always load a full cache line from blocks_cache. Other loads must be
+ * trimmed to the amount of data required for mf_bits_total blocks.
+ */
+ __m512i v_blocks = _mm512_loadu_si512(&block_cache[0]);
+ __m512i v_mask = _mm512_maskz_loadu_epi64(lane_mask, &maskp[0]);
+ __m512i v_key = _mm512_maskz_loadu_epi64(lane_mask, &keyp[0]);
+
+ __m512i v_data = _mm512_and_si512(v_blocks, v_mask);
+ uint32_t res_mask = _mm512_mask_cmpeq_epi64_mask(lane_mask, v_data, v_key);
+
+ /* returns 1 assuming result of SIMD compare is all blocks. */
+ return res_mask == lane_mask;
+}
+
+static inline uint32_t ALWAYS_INLINE
+avx512_lookup_impl(struct dpcls_subtable *subtable,
+ uint32_t keys_map,
+ const struct netdev_flow_key *keys[],
+ struct dpcls_rule **rules,
+ const uint32_t bit_count_u0,
+ const uint32_t bit_count_u1)
+{
+ OVS_ALIGNED_VAR(CACHE_LINE_SIZE)uint64_t block_cache[BLOCKS_CACHE_SIZE];
+
+ const uint32_t bit_count_total = bit_count_u0 + bit_count_u1;
+ int i;
+ uint32_t hashes[NETDEV_MAX_BURST];
+ const uint32_t n_pkts = __builtin_popcountll(keys_map);
+ ovs_assert(NETDEV_MAX_BURST >= n_pkts);
+
+ const uint64_t tbl_u0 = subtable->mask.mf.map.bits[0];
+ const uint64_t tbl_u1 = subtable->mask.mf.map.bits[1];
+
+ /* Load subtable blocks for masking later. */
+ const uint64_t *tbl_blocks = miniflow_get_values(&subtable->mask.mf);
+ const __m512i v_tbl_blocks = _mm512_loadu_si512(&tbl_blocks[0]);
+
+ /* Load pre-created subtable masks for each block in subtable. */
+ const __mmask8 bit_count_total_mask = (1 << bit_count_total) - 1;
+ const __m512i v_mf_masks = _mm512_maskz_loadu_epi64(bit_count_total_mask,
+ subtable->mf_masks);
+
+ ULLONG_FOR_EACH_1 (i, keys_map) {
+ const uint64_t pkt_mf_u0_bits = keys[i]->mf.map.bits[0];
+ const uint64_t pkt_mf_u0_pop = __builtin_popcountll(pkt_mf_u0_bits);
+
+ /* Pre-create register with *PER PACKET* u0 offset. */
+ const __mmask8 u1_bcast_mask = (UINT8_MAX << bit_count_u0);
+ const __m512i v_idx_u0_offset = _mm512_maskz_set1_epi64(u1_bcast_mask,
+ pkt_mf_u0_pop);
+
+ /* Broadcast u0, u1 bitmasks to 8x u64 lanes. */
+ __m512i v_u0 = _mm512_set1_epi64(pkt_mf_u0_bits);
+ __m512i v_pkt_bits = _mm512_mask_set1_epi64(v_u0, u1_bcast_mask,
+ keys[i]->mf.map.bits[1]);
+
+ /* Bitmask by pre-created masks. */
+ __m512i v_masks = _mm512_and_si512(v_pkt_bits, v_mf_masks);
+
+ /* Manual AVX512 popcount for u64 lanes. */
+ __m512i v_popcnts = _mm512_popcnt_epi64_manual(v_masks);
+
+ /* Offset popcounts for u1 with pre-created offset register. */
+ __m512i v_indexes = _mm512_add_epi64(v_popcnts, v_idx_u0_offset);
+
+ /* Gather u64 blocks from packet miniflow. */
+ const __m512i v_zeros = _mm512_setzero_si512();
+ const void *pkt_data = miniflow_get_values(&keys[i]->mf);
+ __m512i v_all_blocks = _mm512_mask_i64gather_epi64(v_zeros,
+ bit_count_total_mask, v_indexes,
+ pkt_data, 8);
+
+ /* Zero out bits that pkt doesn't have:
+ * - 2x pext() to extract bits from packet miniflow as needed by TBL
+ * - Shift u1 over by bit_count of u0, OR to create zero bitmask
+ */
+ uint64_t u0_to_zero = _pext_u64(keys[i]->mf.map.bits[0], tbl_u0);
+ uint64_t u1_to_zero = _pext_u64(keys[i]->mf.map.bits[1], tbl_u1);
+ uint64_t zero_mask = (u1_to_zero << bit_count_u0) | u0_to_zero;
+
+ /* Mask blocks using AND with subtable blocks, use k-mask to zero
+ * where lanes as required for this packet.
+ */
+ __m512i v_masked_blocks = _mm512_maskz_and_epi64(zero_mask,
+ v_all_blocks, v_tbl_blocks);
+
+ /* Store to blocks cache, full cache line aligned. */
+ _mm512_storeu_si512(&block_cache[i * 8], v_masked_blocks);
+ }
+
+ /* Hash the now linearized blocks of packet metadata. */
+ ULLONG_FOR_EACH_1 (i, keys_map) {
+ uint64_t *block_ptr = &block_cache[i * 8];
+ uint32_t hash = hash_add_words64(0, block_ptr, bit_count_total);
+ hashes[i] = hash_finish(hash, bit_count_total * 8);
+ }
+
+ /* Lookup: this returns a bitmask of packets where the hash table had
+ * an entry for the given hash key. Presence of a hash key does not
+ * guarantee matching the key, as there can be hash collisions.
+ */
+ uint32_t found_map;
+ const struct cmap_node *nodes[NETDEV_MAX_BURST];
+ found_map = cmap_find_batch(&subtable->rules, keys_map, hashes, nodes);
+
+ /* Verify that packet actually matched rule. If not found, a hash
+ * collision has taken place, so continue searching with the next node.
+ */
+ ULLONG_FOR_EACH_1 (i, found_map) {
+ struct dpcls_rule *rule;
+
+ CMAP_NODE_FOR_EACH (rule, cmap_node, nodes[i]) {
+ const uint32_t cidx = i * 8;
+ uint32_t match = netdev_rule_matches_key(rule, bit_count_total,
+ &block_cache[cidx]);
+ if (OVS_LIKELY(match)) {
+ rules[i] = rule;
+ subtable->hit_cnt++;
+ goto next;
+ }
+ }
+
+ /* None of the found rules was a match. Clear the i-th bit to
+ * search for this key in the next subtable. */
+ ULLONG_SET0(found_map, i);
+ next:
+ ; /* Keep Sparse happy. */
+ }
+
+ return found_map;
+}
+
+/* Expand out specialized functions with U0 and U1 bit attributes. */
+#define DECLARE_OPTIMIZED_LOOKUP_FUNCTION(U0, U1) \
+ static uint32_t \
+ dpcls_avx512_gather_mf_##U0##_##U1(struct dpcls_subtable *subtable, \
+ uint32_t keys_map, \
+ const struct netdev_flow_key *keys[], \
+ struct dpcls_rule **rules) \
+ { \
+ return avx512_lookup_impl(subtable, keys_map, keys, rules, U0, U1); \
+ } \
+
+DECLARE_OPTIMIZED_LOOKUP_FUNCTION(5, 1)
+DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 1)
+DECLARE_OPTIMIZED_LOOKUP_FUNCTION(4, 0)
+
+/* Check if a specialized function is valid for the required subtable. */
+#define CHECK_LOOKUP_FUNCTION(U0, U1) \
+ ovs_assert((U0 + U1) <= NUM_U64_IN_ZMM_REG); \
+ if (!f && u0_bits == U0 && u1_bits == U1) { \
+ f = dpcls_avx512_gather_mf_##U0##_##U1; \
+ }
+
+static uint32_t
+dpcls_avx512_gather_mf_any(struct dpcls_subtable *subtable, uint32_t keys_map,
+ const struct netdev_flow_key *keys[],
+ struct dpcls_rule **rules)
+{
+ return avx512_lookup_impl(subtable, keys_map, keys, rules,
+ subtable->mf_bits_set_unit0,
+ subtable->mf_bits_set_unit1);
+}
+
+dpcls_subtable_lookup_func
+dpcls_subtable_avx512_gather_probe(uint32_t u0_bits, uint32_t u1_bits)
+{
+ dpcls_subtable_lookup_func f = NULL;
+
+ int avx512f_available = dpdk_get_cpu_has_isa("x86_64", "avx512f");
+ int bmi2_available = dpdk_get_cpu_has_isa("x86_64", "bmi2");
+ if (!avx512f_available || !bmi2_available) {
+ return NULL;
+ }
+
+ CHECK_LOOKUP_FUNCTION(5, 1);
+ CHECK_LOOKUP_FUNCTION(4, 1);
+ CHECK_LOOKUP_FUNCTION(4, 0);
+
+ if (!f && (u0_bits + u1_bits) < NUM_U64_IN_ZMM_REG) {
+ f = dpcls_avx512_gather_mf_any;
+ VLOG_INFO("Using avx512_gather_mf_any for subtable (%d,%d)\n",
+ u0_bits, u1_bits);
+ }
+
+ return f;
+}
+
+#endif /* CHECKER */
+#endif /* __x86_64__ */
diff --git a/lib/dpif-netdev-lookup-generic.c b/lib/dpif-netdev-lookup-generic.c
index 89c8be0fa42740f2a92724dac9f1d6a9e68aa7b5..b1a0cfc3691ec7ea1ddafd15365ddbcc219b6ed6 100644
--- a/lib/dpif-netdev-lookup-generic.c
+++ b/lib/dpif-netdev-lookup-generic.c
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2016, 2017 Nicira, Inc.
- * Copyright (c) 2019 Intel Corporation.
+ * Copyright (c) 2019, 2020 Intel Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
#include
#include "dpif-netdev.h"
#include "dpif-netdev-private.h"
+#include "dpif-netdev-lookup.h"
#include "bitmap.h"
#include "cmap.h"
@@ -254,7 +255,7 @@ lookup_generic_impl(struct dpcls_subtable *subtable,
}
/* Generic lookup function that uses runtime provided mf bits for iterating. */
-uint32_t
+static uint32_t
dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
uint32_t keys_map,
const struct netdev_flow_key *keys[],
@@ -310,6 +311,10 @@ dpcls_subtable_generic_probe(uint32_t u0_bits, uint32_t u1_bits)
if (f) {
VLOG_DBG("Subtable using Generic Optimized for u0 %d, u1 %d\n",
u0_bits, u1_bits);
+ } else {
+ /* Always return the generic function. */
+ f = dpcls_subtable_lookup_generic;
}
+
return f;
}
diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c
new file mode 100644
index 0000000000000000000000000000000000000000..bd0a99abe7ab54fabf35d59f768c27e74247dcd2
--- /dev/null
+++ b/lib/dpif-netdev-lookup.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include "dpif-netdev-lookup.h"
+
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_netdev_lookup);
+
+/* Actual list of implementations goes here */
+static struct dpcls_subtable_lookup_info_t subtable_lookups[] = {
+ /* The autovalidator implementation will not be used by default, it must
+ * be enabled at compile time to be the default lookup implementation. The
+ * user may enable it at runtime using the normal "prio-set" command if
+ * desired. The compile time default switch is here to enable all unit
+ * tests to transparently run with the autovalidator.
+ */
+#ifdef DPCLS_AUTOVALIDATOR_DEFAULT
+ { .prio = 255,
+#else
+ { .prio = 0,
+#endif
+ .probe = dpcls_subtable_autovalidator_probe,
+ .name = "autovalidator", },
+
+ /* The default scalar C code implementation. */
+ { .prio = 1,
+ .probe = dpcls_subtable_generic_probe,
+ .name = "generic", },
+
+#if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
+ /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
+ { .prio = 0,
+ .probe = dpcls_subtable_avx512_gather_probe,
+ .name = "avx512_gather", },
+#else
+ /* Disabling AVX512 at compile time, as compile time requirements not met.
+ * This could be due to a number of reasons:
+ * 1) core OVS is not compiled with SSE4.2 instruction set.
+ * The SSE42 instructions are required to use CRC32 ISA for high-
+ * performance hashing. Consider ./configure of OVS with -msse42 (or
+ * newer) to enable CRC32 hashing and higher performance.
+ * 2) The assembler in binutils versions 2.30 and 2.31 has bugs in AVX512
+ * assembly. Compile time probes check for this assembler issue, and
+ * disable the HAVE_LD_AVX512_GOOD check if an issue is detected.
+ * Please upgrade binutils, or backport this binutils fix commit:
+ * 2069ccaf8dc28ea699bd901fdd35d90613e4402a
+ */
+#endif
+};
+
+int32_t
+dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr)
+{
+ if (out_ptr == NULL) {
+ return -1;
+ }
+
+ *out_ptr = subtable_lookups;
+ return ARRAY_SIZE(subtable_lookups);
+}
+
+/* sets the priority of the lookup function with "name". */
+int32_t
+dpcls_subtable_set_prio(const char *name, uint8_t priority)
+{
+ for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) {
+ if (strcmp(name, subtable_lookups[i].name) == 0) {
+ subtable_lookups[i].prio = priority;
+ VLOG_INFO("Subtable function '%s' set priority to %d\n",
+ name, priority);
+ return 0;
+ }
+ }
+ VLOG_WARN("Subtable function '%s' not found, failed to set priority\n",
+ name);
+ return -EINVAL;
+}
+
+dpcls_subtable_lookup_func
+dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count)
+{
+ /* Iter over each subtable impl, and get highest priority one. */
+ int32_t prio = -1;
+ const char *name = NULL;
+ dpcls_subtable_lookup_func best_func = NULL;
+
+ for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) {
+ int32_t probed_prio = subtable_lookups[i].prio;
+ if (probed_prio > prio) {
+ dpcls_subtable_lookup_func probed_func;
+ probed_func = subtable_lookups[i].probe(u0_bit_count,
+ u1_bit_count);
+ if (probed_func) {
+ best_func = probed_func;
+ prio = probed_prio;
+ name = subtable_lookups[i].name;
+ }
+ }
+ }
+
+ VLOG_DBG("Subtable lookup function '%s' with units (%d,%d), priority %d\n",
+ name, u0_bit_count, u1_bit_count, prio);
+
+ /* Programming error - we must always return a valid func ptr. */
+ ovs_assert(best_func != NULL);
+
+ return best_func;
+}
diff --git a/lib/dpif-netdev-lookup.h b/lib/dpif-netdev-lookup.h
new file mode 100644
index 0000000000000000000000000000000000000000..bd72aa29b8ad1f4d7134832c600a6701a0879f75
--- /dev/null
+++ b/lib/dpif-netdev-lookup.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2020 Intel Corporation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPIF_NETDEV_LOOKUP_H
+#define DPIF_NETDEV_LOOKUP_H 1
+
+#include
+#include "dpif-netdev.h"
+#include "dpif-netdev-private.h"
+
+/* Function to perform a probe for the subtable bit fingerprint.
+ * Returns NULL if not valid, or a valid function pointer to call for this
+ * subtable on success.
+ */
+typedef
+dpcls_subtable_lookup_func (*dpcls_subtable_probe_func)(uint32_t u0_bit_count,
+ uint32_t u1_bit_count);
+
+/* Prototypes for subtable implementations */
+dpcls_subtable_lookup_func
+dpcls_subtable_autovalidator_probe(uint32_t u0_bit_count,
+ uint32_t u1_bit_count);
+
+/* Probe function to select a specialized version of the generic lookup
+ * implementation. This provides performance benefit due to compile-time
+ * optimizations such as loop-unrolling. These are enabled by the compile-time
+ * constants in the specific function implementations.
+ */
+dpcls_subtable_lookup_func
+dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count);
+
+/* Probe function for AVX-512 gather implementation */
+dpcls_subtable_lookup_func
+dpcls_subtable_avx512_gather_probe(uint32_t u0_bit_cnt, uint32_t u1_bit_cnt);
+
+
+/* Subtable registration and iteration helpers */
+struct dpcls_subtable_lookup_info_t {
+ /* higher priority gets used over lower values. This allows deployments
+ * to select the best implementation for the use-case.
+ */
+ uint8_t prio;
+
+ /* Probe function: tests if the (u0,u1) combo is supported. If not
+ * supported, this function returns NULL. If supported, a function pointer
+ * is returned which when called will perform the lookup on the subtable.
+ */
+ dpcls_subtable_probe_func probe;
+
+ /* Human readable name, used in setting subtable priority commands */
+ const char *name;
+};
+
+int32_t dpcls_subtable_set_prio(const char *name, uint8_t priority);
+
+dpcls_subtable_lookup_func
+dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count);
+
+/* Retrieve the array of lookup implementations for iteration.
+ * On error, returns a negative number.
+ * On success, returns the size of the arrays pointed to by the out parameter.
+ */
+int32_t
+dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr);
+
+#endif /* dpif-netdev-lookup.h */
diff --git a/lib/dpif-netdev-private.h b/lib/dpif-netdev-private.h
index 68c33a0f96a828fd4ff829bc184a6d3d756d264e..4fda1220b0227cbcab6a94d02c81d16de2143071 100644
--- a/lib/dpif-netdev-private.h
+++ b/lib/dpif-netdev-private.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2015 Nicira, Inc.
- * Copyright (c) 2019 Intel Corperation.
+ * Copyright (c) 2019 Intel Corporation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -60,21 +60,6 @@ uint32_t (*dpcls_subtable_lookup_func)(struct dpcls_subtable *subtable,
const struct netdev_flow_key *keys[],
struct dpcls_rule **rules);
-/* Prototype for generic lookup func, using generic scalar code path. */
-uint32_t
-dpcls_subtable_lookup_generic(struct dpcls_subtable *subtable,
- uint32_t keys_map,
- const struct netdev_flow_key *keys[],
- struct dpcls_rule **rules);
-
-/* Probe function to select a specialized version of the generic lookup
- * implementation. This provides performance benefit due to compile-time
- * optimizations such as loop-unrolling. These are enabled by the compile-time
- * constants in the specific function implementations.
- */
-dpcls_subtable_lookup_func
-dpcls_subtable_generic_probe(uint32_t u0_bit_count, uint32_t u1_bit_count);
-
/* A set of rules that all have the same fields wildcarded. */
struct dpcls_subtable {
/* The fields are only used by writers. */
diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
index 6c54f6f9cc3b49be76be82acd27a19ab726a1c49..858d491df3b3915bbc0defb49a087162f25599c6 100644
--- a/lib/dpif-netdev-unixctl.man
+++ b/lib/dpif-netdev-unixctl.man
@@ -217,3 +217,12 @@ with port names, which this thread polls.
.
.IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
+.
+.IP "\fBdpif-netdev/bond-show\fR [\fIdp\fR]"
+When "other_config:lb-output-action" is set to "true", the userspace datapath
+handles the load balancing of bonds directly instead of depending on flow
+recirculation (only in balance-tcp mode).
+
+When this is the case, the above command prints the load-balancing information
+of the bonds configured in datapath \fIdp\fR showing the interface associated
+with each bucket (hash).
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d393aab5e3bed833d4eb8a0da035e8918bbe71ec..300861ca5972b66dde63e221b62e74e50edff20a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -36,11 +36,13 @@
#include "bitmap.h"
#include "cmap.h"
#include "conntrack.h"
+#include "conntrack-tp.h"
#include "coverage.h"
#include "ct-dpif.h"
#include "csum.h"
#include "dp-packet.h"
#include "dpif.h"
+#include "dpif-netdev-lookup.h"
#include "dpif-netdev-perf.h"
#include "dpif-provider.h"
#include "dummy.h"
@@ -97,7 +99,6 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
#define DEFAULT_TX_FLUSH_INTERVAL 0
/* Configuration parameters. */
-enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
enum { MAX_METERS = 65536 }; /* Maximum number of meters. */
enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */
enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */
@@ -110,6 +111,7 @@ COVERAGE_DEFINE(datapath_drop_tunnel_push_error);
COVERAGE_DEFINE(datapath_drop_tunnel_pop_error);
COVERAGE_DEFINE(datapath_drop_recirc_error);
COVERAGE_DEFINE(datapath_drop_invalid_port);
+COVERAGE_DEFINE(datapath_drop_invalid_bond);
COVERAGE_DEFINE(datapath_drop_invalid_tnl_port);
COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
@@ -239,6 +241,9 @@ struct dfc_cache {
* and used during rxq to pmd assignment. */
#define PMD_RXQ_INTERVAL_MAX 6
+/* Time in microseconds to try RCU quiescing. */
+#define PMD_RCU_QUIESCE_INTERVAL 10000LL
+
struct dpcls {
struct cmap_node node; /* Within dp_netdev_pmd_thread.classifiers */
odp_port_t in_port;
@@ -256,6 +261,7 @@ struct dp_packet_flow_map {
static void dpcls_init(struct dpcls *);
static void dpcls_destroy(struct dpcls *);
static void dpcls_sort_subtable_vector(struct dpcls *);
+static uint32_t dpcls_subtable_lookup_reprobe(struct dpcls *cls);
static void dpcls_insert(struct dpcls *, struct dpcls_rule *,
const struct netdev_flow_key *mask);
static void dpcls_remove(struct dpcls *, struct dpcls_rule *);
@@ -309,6 +315,7 @@ struct pmd_auto_lb {
*
* dp_netdev_mutex (global)
* port_mutex
+ * bond_mutex
* non_pmd_mutex
*/
struct dp_netdev {
@@ -376,6 +383,10 @@ struct dp_netdev {
struct conntrack *conntrack;
struct pmd_auto_lb pmd_alb;
+
+ /* Bonds. */
+ struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. */
+ struct cmap tx_bonds; /* Contains 'struct tx_bond'. */
};
static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
@@ -481,6 +492,12 @@ struct dp_netdev_flow_stats {
atomic_uint16_t tcp_flags; /* Bitwise-OR of seen tcp_flags values. */
};
+/* Contained by struct dp_netdev_flow's 'last_attrs' member. */
+struct dp_netdev_flow_attrs {
+ atomic_bool offloaded; /* True if flow is offloaded to HW. */
+ ATOMIC(const char *) dp_layer; /* DP layer the flow is handled in. */
+};
+
/* A flow in 'dp_netdev_pmd_thread's 'flow_table'.
*
*
@@ -541,6 +558,11 @@ struct dp_netdev_flow {
/* Statistics. */
struct dp_netdev_flow_stats stats;
+ /* Statistics and attributes received from the netdev offload provider. */
+ atomic_int netdev_flow_get_result;
+ struct dp_netdev_flow_stats last_stats;
+ struct dp_netdev_flow_attrs last_attrs;
+
/* Actions. */
OVSRCU_TYPE(struct dp_netdev_actions *) actions;
@@ -607,6 +629,20 @@ struct tx_port {
struct dp_netdev_rxq *output_pkts_rxqs[NETDEV_MAX_BURST];
};
+/* Contained by struct tx_bond 'member_buckets'. */
+struct member_entry {
+ odp_port_t member_id;
+ atomic_ullong n_packets;
+ atomic_ullong n_bytes;
+};
+
+/* Contained by struct dp_netdev_pmd_thread's 'tx_bonds'. */
+struct tx_bond {
+ struct cmap_node node;
+ uint32_t bond_id;
+ struct member_entry member_buckets[BOND_BUCKETS];
+};
+
/* A set of properties for the current processing loop that is not directly
* associated with the pmd thread itself, but with the packets being
* processed or the short-term system configuration (for example, time).
@@ -739,6 +775,11 @@ struct dp_netdev_pmd_thread {
* read by the pmd thread. */
struct hmap tx_ports OVS_GUARDED;
+ struct ovs_mutex bond_mutex; /* Protects updates of 'tx_bonds'. */
+ /* Map of 'tx_bond's used for transmission. Written by the main thread
+ * and read by the pmd thread. */
+ struct cmap tx_bonds;
+
/* These are thread-local copies of 'tx_ports'. One contains only tunnel
* ports (that support push_tunnel/pop_tunnel), the other contains ports
* with at least one txq (that support send). A port can be in both.
@@ -762,6 +803,9 @@ struct dp_netdev_pmd_thread {
/* Set to true if the pmd thread needs to be reloaded. */
bool need_reload;
+
+ /* Next time when PMD should try RCU quiescing. */
+ long long next_rcu_quiesce;
};
/* Interface to netdev-based datapath. */
@@ -830,6 +874,12 @@ static void dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
static int
dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread *pmd,
bool force);
+static void dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *bond, bool update)
+ OVS_EXCLUDED(pmd->bond_mutex);
+static void dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
+ uint32_t bond_id)
+ OVS_EXCLUDED(pmd->bond_mutex);
static void reconfigure_datapath(struct dp_netdev *dp)
OVS_REQUIRES(dp->port_mutex);
@@ -858,6 +908,9 @@ dpif_netdev_xps_revalidate_pmd(const struct dp_netdev_pmd_thread *pmd,
bool purge);
static int dpif_netdev_xps_get_tx_qid(const struct dp_netdev_pmd_thread *pmd,
struct tx_port *tx);
+static inline struct dpcls *
+dp_netdev_pmd_lookup_dpcls(struct dp_netdev_pmd_thread *pmd,
+ odp_port_t in_port);
static inline bool emc_entry_alive(struct emc_entry *ce);
static void emc_clear_entry(struct emc_entry *ce);
@@ -1258,6 +1311,121 @@ sorted_poll_thread_list(struct dp_netdev *dp,
*n = k;
}
+static void
+dpif_netdev_subtable_lookup_get(struct unixctl_conn *conn, int argc OVS_UNUSED,
+ const char *argv[] OVS_UNUSED,
+ void *aux OVS_UNUSED)
+{
+ /* Get a list of all lookup functions. */
+ struct dpcls_subtable_lookup_info_t *lookup_funcs = NULL;
+ int32_t count = dpcls_subtable_lookup_info_get(&lookup_funcs);
+ if (count < 0) {
+ unixctl_command_reply_error(conn, "error getting lookup names");
+ return;
+ }
+
+ /* Add all lookup functions to reply string. */
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_cstr(&reply, "Available lookup functions (priority : name)\n");
+ for (int i = 0; i < count; i++) {
+ ds_put_format(&reply, " %d : %s\n", lookup_funcs[i].prio,
+ lookup_funcs[i].name);
+ }
+ unixctl_command_reply(conn, ds_cstr(&reply));
+ ds_destroy(&reply);
+}
+
+static void
+dpif_netdev_subtable_lookup_set(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ /* This function requires 2 parameters (argv[1] and argv[2]) to execute.
+ * argv[1] is subtable name
+ * argv[2] is priority
+ * argv[3] is the datapath name (optional if only 1 datapath exists)
+ */
+ const char *func_name = argv[1];
+
+ errno = 0;
+ char *err_char;
+ uint32_t new_prio = strtoul(argv[2], &err_char, 10);
+ if (errno != 0 || new_prio > UINT8_MAX) {
+ unixctl_command_reply_error(conn,
+ "error converting priority, use integer in range 0-255\n");
+ return;
+ }
+
+ int32_t err = dpcls_subtable_set_prio(func_name, new_prio);
+ if (err) {
+ unixctl_command_reply_error(conn,
+ "error, subtable lookup function not found\n");
+ return;
+ }
+
+ /* argv[3] is optional datapath instance. If no datapath name is provided
+ * and only one datapath exists, the one existing datapath is reprobed.
+ */
+ ovs_mutex_lock(&dp_netdev_mutex);
+ struct dp_netdev *dp = NULL;
+
+ if (argc == 4) {
+ dp = shash_find_data(&dp_netdevs, argv[3]);
+ } else if (shash_count(&dp_netdevs) == 1) {
+ dp = shash_first(&dp_netdevs)->data;
+ }
+
+ if (!dp) {
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply_error(conn,
+ "please specify an existing datapath");
+ return;
+ }
+
+ /* Get PMD threads list, required to get DPCLS instances. */
+ size_t n;
+ uint32_t lookup_dpcls_changed = 0;
+ uint32_t lookup_subtable_changed = 0;
+ struct dp_netdev_pmd_thread **pmd_list;
+ sorted_poll_thread_list(dp, &pmd_list, &n);
+
+ /* take port mutex as HMAP iters over them. */
+ ovs_mutex_lock(&dp->port_mutex);
+
+ for (size_t i = 0; i < n; i++) {
+ struct dp_netdev_pmd_thread *pmd = pmd_list[i];
+ if (pmd->core_id == NON_PMD_CORE_ID) {
+ continue;
+ }
+
+ struct dp_netdev_port *port = NULL;
+ HMAP_FOR_EACH (port, node, &dp->ports) {
+ odp_port_t in_port = port->port_no;
+ struct dpcls *cls = dp_netdev_pmd_lookup_dpcls(pmd, in_port);
+ if (!cls) {
+ continue;
+ }
+ uint32_t subtbl_changes = dpcls_subtable_lookup_reprobe(cls);
+ if (subtbl_changes) {
+ lookup_dpcls_changed++;
+ lookup_subtable_changed += subtbl_changes;
+ }
+ }
+ }
+
+ /* release port mutex before netdev mutex. */
+ ovs_mutex_unlock(&dp->port_mutex);
+ ovs_mutex_unlock(&dp_netdev_mutex);
+
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ ds_put_format(&reply,
+ "Lookup priority change affected %d dpcls ports and %d subtables.\n",
+ lookup_dpcls_changed, lookup_subtable_changed);
+ const char *reply_str = ds_cstr(&reply);
+ unixctl_command_reply(conn, reply_str);
+ VLOG_INFO("%s", reply_str);
+ ds_destroy(&reply);
+}
+
static void
dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
const char *argv[], void *aux OVS_UNUSED)
@@ -1396,6 +1564,49 @@ pmd_perf_show_cmd(struct unixctl_conn *conn, int argc,
par.command_type = PMD_INFO_PERF_SHOW;
dpif_netdev_pmd_info(conn, argc, argv, &par);
}
+
+static void
+dpif_netdev_bond_show(struct unixctl_conn *conn, int argc,
+ const char *argv[], void *aux OVS_UNUSED)
+{
+ struct ds reply = DS_EMPTY_INITIALIZER;
+ struct dp_netdev *dp = NULL;
+
+ ovs_mutex_lock(&dp_netdev_mutex);
+ if (argc == 2) {
+ dp = shash_find_data(&dp_netdevs, argv[1]);
+ } else if (shash_count(&dp_netdevs) == 1) {
+ /* There's only one datapath. */
+ dp = shash_first(&dp_netdevs)->data;
+ }
+ if (!dp) {
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply_error(conn,
+ "please specify an existing datapath");
+ return;
+ }
+
+ if (cmap_count(&dp->tx_bonds) > 0) {
+ struct tx_bond *dp_bond_entry;
+
+ ds_put_cstr(&reply, "Bonds:\n");
+ CMAP_FOR_EACH (dp_bond_entry, node, &dp->tx_bonds) {
+ ds_put_format(&reply, " bond-id %"PRIu32":\n",
+ dp_bond_entry->bond_id);
+ for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ uint32_t member_id = odp_to_u32(
+ dp_bond_entry->member_buckets[bucket].member_id);
+ ds_put_format(&reply,
+ " bucket %d - member %"PRIu32"\n",
+ bucket, member_id);
+ }
+ }
+ }
+ ovs_mutex_unlock(&dp_netdev_mutex);
+ unixctl_command_reply(conn, ds_cstr(&reply));
+ ds_destroy(&reply);
+}
+
static int
dpif_netdev_init(void)
@@ -1427,6 +1638,16 @@ dpif_netdev_init(void)
"[-us usec] [-q qlen]",
0, 10, pmd_perf_log_set_cmd,
NULL);
+ unixctl_command_register("dpif-netdev/bond-show", "[dp]",
+ 0, 1, dpif_netdev_bond_show,
+ NULL);
+ unixctl_command_register("dpif-netdev/subtable-lookup-prio-set",
+ "[lookup_func] [prio] [dp]",
+ 2, 3, dpif_netdev_subtable_lookup_set,
+ NULL);
+ unixctl_command_register("dpif-netdev/subtable-lookup-prio-get", "",
+ 0, 0, dpif_netdev_subtable_lookup_get,
+ NULL);
return 0;
}
@@ -1551,6 +1772,9 @@ create_dp_netdev(const char *name, const struct dpif_class *class,
ovs_mutex_init_recursive(&dp->port_mutex);
hmap_init(&dp->ports);
dp->port_seq = seq_create();
+ ovs_mutex_init(&dp->bond_mutex);
+ cmap_init(&dp->tx_bonds);
+
fat_rwlock_init(&dp->upcall_rwlock);
dp->reconfigure_seq = seq_create();
@@ -1657,6 +1881,12 @@ dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
}
}
+static uint32_t
+hash_bond_id(uint32_t bond_id)
+{
+ return hash_int(bond_id, 0);
+}
+
/* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
* through the 'dp_netdevs' shash while freeing 'dp'. */
static void
@@ -1664,6 +1894,7 @@ dp_netdev_free(struct dp_netdev *dp)
OVS_REQUIRES(dp_netdev_mutex)
{
struct dp_netdev_port *port, *next;
+ struct tx_bond *bond;
shash_find_and_delete(&dp_netdevs, dp->name);
@@ -1673,6 +1904,13 @@ dp_netdev_free(struct dp_netdev *dp)
}
ovs_mutex_unlock(&dp->port_mutex);
+ ovs_mutex_lock(&dp->bond_mutex);
+ CMAP_FOR_EACH (bond, node, &dp->tx_bonds) {
+ cmap_remove(&dp->tx_bonds, &bond->node, hash_bond_id(bond->bond_id));
+ ovsrcu_postpone(free, bond);
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
dp_netdev_destroy_all_pmds(dp, true);
cmap_destroy(&dp->poll_threads);
@@ -1691,6 +1929,9 @@ dp_netdev_free(struct dp_netdev *dp)
hmap_destroy(&dp->ports);
ovs_mutex_destroy(&dp->port_mutex);
+ cmap_destroy(&dp->tx_bonds);
+ ovs_mutex_destroy(&dp->bond_mutex);
+
/* Upcalls must be disabled at this point */
dp_netdev_destroy_upcall_lock(dp);
@@ -2149,7 +2390,11 @@ dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread *pmd,
}
#define MAX_FLOW_MARK (UINT32_MAX - 1)
-#define INVALID_FLOW_MARK (UINT32_MAX)
+#define INVALID_FLOW_MARK 0
+/* Zero flow mark is used to indicate the HW to remove the mark. A packet
+ * marked with zero mark is received in SW without a mark at all, so it
+ * cannot be used as a valid mark.
+ */
struct megaflow_to_mark_data {
const struct cmap_node node;
@@ -2175,7 +2420,7 @@ flow_mark_alloc(void)
if (!flow_mark.pool) {
/* Haven't initiated yet, do it here */
- flow_mark.pool = id_pool_create(0, MAX_FLOW_MARK);
+ flow_mark.pool = id_pool_create(1, MAX_FLOW_MARK);
}
if (id_pool_alloc_id(flow_mark.pool, &mark)) {
@@ -2253,7 +2498,8 @@ mark_to_flow_associate(const uint32_t mark, struct dp_netdev_flow *flow)
hash_int(mark, 0));
flow->mark = mark;
- VLOG_DBG("Associated dp_netdev flow %p with mark %u\n", flow, mark);
+ VLOG_DBG("Associated dp_netdev flow %p with mark %u mega_ufid "UUID_FMT,
+ flow, mark, UUID_ARGS((struct uuid *) &flow->mega_ufid));
}
static bool
@@ -2275,10 +2521,17 @@ static int
mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd,
struct dp_netdev_flow *flow)
{
- int ret = 0;
- uint32_t mark = flow->mark;
+ const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
struct cmap_node *mark_node = CONST_CAST(struct cmap_node *,
&flow->mark_node);
+ uint32_t mark = flow->mark;
+ int ret = 0;
+
+ /* INVALID_FLOW_MARK may mean that the flow has been disassociated or
+ * never associated. */
+ if (OVS_UNLIKELY(mark == INVALID_FLOW_MARK)) {
+ return EINVAL;
+ }
cmap_remove(&flow_mark.mark_to_flow, mark_node, hash_int(mark, 0));
flow->mark = INVALID_FLOW_MARK;
@@ -2291,7 +2544,7 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd,
struct netdev *port;
odp_port_t in_port = flow->flow.in_port.odp_port;
- port = netdev_ports_get(in_port, pmd->dp->class);
+ port = netdev_ports_get(in_port, dpif_type_str);
if (port) {
/* Taking a global 'port_mutex' to fulfill thread safety
* restrictions for the netdev-offload-dpdk module. */
@@ -2302,7 +2555,8 @@ mark_to_flow_disassociate(struct dp_netdev_pmd_thread *pmd,
}
flow_mark_free(mark);
- VLOG_DBG("Freed flow mark %u\n", mark);
+ VLOG_DBG("Freed flow mark %u mega_ufid "UUID_FMT, mark,
+ UUID_ARGS((struct uuid *) &flow->mega_ufid));
megaflow_to_mark_disassociate(&flow->mega_ufid);
}
@@ -2398,9 +2652,9 @@ static int
dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
{
struct dp_netdev_pmd_thread *pmd = offload->pmd;
- const struct dpif_class *dpif_class = pmd->dp->class;
struct dp_netdev_flow *flow = offload->flow;
odp_port_t in_port = flow->flow.in_port.odp_port;
+ const char *dpif_type_str = dpif_normalize_type(pmd->dp->class->type);
bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
struct offload_info info;
struct netdev *port;
@@ -2433,12 +2687,12 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
mark = flow_mark_alloc();
if (mark == INVALID_FLOW_MARK) {
VLOG_ERR("Failed to allocate flow mark!\n");
+ return -1;
}
}
info.flow_mark = mark;
- info.dpif_class = dpif_class;
- port = netdev_ports_get(in_port, pmd->dp->class);
+ port = netdev_ports_get(in_port, dpif_type_str);
if (!port || netdev_vport_is_vport_class(port->netdev_class)) {
netdev_close(port);
goto err_free;
@@ -2509,9 +2763,11 @@ dp_netdev_flow_offload_main(void *data OVS_UNUSED)
OVS_NOT_REACHED();
}
- VLOG_DBG("%s to %s netdev flow\n",
- ret == 0 ? "succeed" : "failed", op);
+ VLOG_DBG("%s to %s netdev flow "UUID_FMT,
+ ret == 0 ? "succeed" : "failed", op,
+ UUID_ARGS((struct uuid *) &offload->flow->mega_ufid));
dp_netdev_free_flow_offload(offload);
+ ovsrcu_quiesce();
}
return NULL;
@@ -3032,9 +3288,56 @@ dp_netdev_pmd_find_flow(const struct dp_netdev_pmd_thread *pmd,
return NULL;
}
+static void
+dp_netdev_flow_set_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
+ const struct dpif_flow_stats *stats,
+ const struct dpif_flow_attrs *attrs,
+ int result)
+{
+ struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
+ struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
+
+ atomic_store_relaxed(&netdev_flow->netdev_flow_get_result, result);
+ if (result) {
+ return;
+ }
+
+ atomic_store_relaxed(&last_stats->used, stats->used);
+ atomic_store_relaxed(&last_stats->packet_count, stats->n_packets);
+ atomic_store_relaxed(&last_stats->byte_count, stats->n_bytes);
+ atomic_store_relaxed(&last_stats->tcp_flags, stats->tcp_flags);
+
+ atomic_store_relaxed(&last_attrs->offloaded, attrs->offloaded);
+ atomic_store_relaxed(&last_attrs->dp_layer, attrs->dp_layer);
+
+}
+
+static void
+dp_netdev_flow_get_last_stats_attrs(struct dp_netdev_flow *netdev_flow,
+ struct dpif_flow_stats *stats,
+ struct dpif_flow_attrs *attrs,
+ int *result)
+{
+ struct dp_netdev_flow_stats *last_stats = &netdev_flow->last_stats;
+ struct dp_netdev_flow_attrs *last_attrs = &netdev_flow->last_attrs;
+
+ atomic_read_relaxed(&netdev_flow->netdev_flow_get_result, result);
+ if (*result) {
+ return;
+ }
+
+ atomic_read_relaxed(&last_stats->used, &stats->used);
+ atomic_read_relaxed(&last_stats->packet_count, &stats->n_packets);
+ atomic_read_relaxed(&last_stats->byte_count, &stats->n_bytes);
+ atomic_read_relaxed(&last_stats->tcp_flags, &stats->tcp_flags);
+
+ atomic_read_relaxed(&last_attrs->offloaded, &attrs->offloaded);
+ atomic_read_relaxed(&last_attrs->dp_layer, &attrs->dp_layer);
+}
+
static bool
dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
- const struct dp_netdev_flow *netdev_flow,
+ struct dp_netdev_flow *netdev_flow,
struct dpif_flow_stats *stats,
struct dpif_flow_attrs *attrs)
{
@@ -3050,17 +3353,38 @@ dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
return false;
}
- netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class);
+ netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port,
+ dpif_normalize_type(dp->class->type));
if (!netdev) {
return false;
}
ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
/* Taking a global 'port_mutex' to fulfill thread safety
- * restrictions for the netdev-offload-dpdk module. */
- ovs_mutex_lock(&dp->port_mutex);
- ret = netdev_flow_get(netdev, &match, &actions, &netdev_flow->mega_ufid,
- stats, attrs, &buf);
- ovs_mutex_unlock(&dp->port_mutex);
+ * restrictions for the netdev-offload-dpdk module.
+ *
+ * XXX: Main thread will try to pause/stop all revalidators during datapath
+ * reconfiguration via datapath purge callback (dp_purge_cb) while
+ * holding 'dp->port_mutex'. So we're not waiting for mutex here.
+ * Otherwise, deadlock is possible, bcause revalidators might sleep
+ * waiting for the main thread to release the lock and main thread
+ * will wait for them to stop processing.
+ * This workaround might make statistics less accurate. Especially
+ * for flow deletion case, since there will be no other attempt. */
+ if (!ovs_mutex_trylock(&dp->port_mutex)) {
+ ret = netdev_flow_get(netdev, &match, &actions,
+ &netdev_flow->mega_ufid, stats, attrs, &buf);
+ /* Storing statistics and attributes from the last request for
+ * later use on mutex contention. */
+ dp_netdev_flow_set_last_stats_attrs(netdev_flow, stats, attrs, ret);
+ ovs_mutex_unlock(&dp->port_mutex);
+ } else {
+ dp_netdev_flow_get_last_stats_attrs(netdev_flow, stats, attrs, &ret);
+ if (!ret && !attrs->dp_layer) {
+ /* Flow was never reported as 'offloaded' so it's harmless
+ * to continue to think so. */
+ ret = EAGAIN;
+ }
+ }
netdev_close(netdev);
if (ret) {
return false;
@@ -3329,6 +3653,9 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
/* Do not allocate extra space. */
flow = xmalloc(sizeof *flow - sizeof flow->cr.flow.mf + mask.len);
memset(&flow->stats, 0, sizeof flow->stats);
+ atomic_init(&flow->netdev_flow_get_result, 0);
+ memset(&flow->last_stats, 0, sizeof flow->last_stats);
+ memset(&flow->last_attrs, 0, sizeof flow->last_attrs);
flow->dead = false;
flow->batch = NULL;
flow->mark = INVALID_FLOW_MARK;
@@ -3380,6 +3707,8 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
ds_put_cstr(&ds, "flow_add: ");
odp_format_ufid(ufid, &ds);
+ ds_put_cstr(&ds, " mega_");
+ odp_format_ufid(&flow->mega_ufid, &ds);
ds_put_cstr(&ds, " ");
odp_flow_format(key_buf.data, key_buf.size,
mask_buf.data, mask_buf.size,
@@ -3428,13 +3757,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
if (!netdev_flow) {
if (put->flags & DPIF_FP_CREATE) {
- if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
- dp_netdev_flow_add(pmd, match, ufid, put->actions,
- put->actions_len);
- error = 0;
- } else {
- error = EFBIG;
- }
+ dp_netdev_flow_add(pmd, match, ufid, put->actions,
+ put->actions_len);
} else {
error = ENOENT;
}
@@ -4421,6 +4745,20 @@ tx_port_lookup(const struct hmap *hmap, odp_port_t port_no)
return NULL;
}
+static struct tx_bond *
+tx_bond_lookup(const struct cmap *tx_bonds, uint32_t bond_id)
+{
+ uint32_t hash = hash_bond_id(bond_id);
+ struct tx_bond *tx;
+
+ CMAP_FOR_EACH_WITH_HASH (tx, node, hash, tx_bonds) {
+ if (tx->bond_id == bond_id) {
+ return tx;
+ }
+ }
+ return NULL;
+}
+
static int
port_reconfigure(struct dp_netdev_port *port)
{
@@ -4940,9 +5278,17 @@ reconfigure_datapath(struct dp_netdev *dp)
/* Check for all the ports that need reconfiguration. We cache this in
* 'port->need_reconfigure', because netdev_is_reconf_required() can
- * change at any time. */
+ * change at any time.
+ * Also mark for reconfiguration all ports which will likely change their
+ * 'dynamic_txqs' parameter. It's required to stop using them before
+ * changing this setting and it's simpler to mark ports here and allow
+ * 'pmd_remove_stale_ports' to remove them from threads. There will be
+ * no actual reconfiguration in 'port_reconfigure' because it's
+ * unnecessary. */
HMAP_FOR_EACH (port, node, &dp->ports) {
- if (netdev_is_reconf_required(port->netdev)) {
+ if (netdev_is_reconf_required(port->netdev)
+ || (port->dynamic_txqs
+ != (netdev_n_txq(port->netdev) < wanted_txqs))) {
port->need_reconfigure = true;
}
}
@@ -5060,14 +5406,22 @@ reconfigure_datapath(struct dp_netdev *dp)
}
}
- /* Add every port to the tx cache of every pmd thread, if it's not
- * there already and if this pmd has at least one rxq to poll. */
+ /* Add every port and bond to the tx port and bond caches of
+ * every pmd thread, if it's not there already and if this pmd
+ * has at least one rxq to poll.
+ */
CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
ovs_mutex_lock(&pmd->port_mutex);
if (hmap_count(&pmd->poll_list) || pmd->core_id == NON_PMD_CORE_ID) {
+ struct tx_bond *bond;
+
HMAP_FOR_EACH (port, node, &dp->ports) {
dp_netdev_add_port_tx_to_pmd(pmd, port);
}
+
+ CMAP_FOR_EACH (bond, node, &dp->tx_bonds) {
+ dp_netdev_add_bond_tx_to_pmd(pmd, bond, false);
+ }
}
ovs_mutex_unlock(&pmd->port_mutex);
}
@@ -5607,6 +5961,9 @@ reload:
pmd->intrvl_tsc_prev = 0;
atomic_store_relaxed(&pmd->intrvl_cycles, 0);
cycles_counter_update(s);
+
+ pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
+
/* Protect pmd stats from external clearing while polling. */
ovs_mutex_lock(&pmd->perf_stats.stats_mutex);
for (;;) {
@@ -5641,6 +5998,16 @@ reload:
tx_packets = dp_netdev_pmd_flush_output_packets(pmd, false);
}
+ /* Do RCU synchronization at fixed interval. This ensures that
+ * synchronization would not be delayed long even at high load of
+ * packet processing. */
+ if (pmd->ctx.now > pmd->next_rcu_quiesce) {
+ if (!ovsrcu_try_quiesce()) {
+ pmd->next_rcu_quiesce =
+ pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
+ }
+ }
+
if (lc++ > 1024) {
lc = 0;
@@ -5648,6 +6015,8 @@ reload:
dp_netdev_pmd_try_optimize(pmd, poll_list, poll_cnt);
if (!ovsrcu_try_quiesce()) {
emc_cache_slow_sweep(&((pmd->flow_cache).emc_cache));
+ pmd->next_rcu_quiesce =
+ pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
}
for (i = 0; i < poll_cnt; i++) {
@@ -6110,16 +6479,19 @@ dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
atomic_init(&pmd->reload, false);
ovs_mutex_init(&pmd->flow_mutex);
ovs_mutex_init(&pmd->port_mutex);
+ ovs_mutex_init(&pmd->bond_mutex);
cmap_init(&pmd->flow_table);
cmap_init(&pmd->classifiers);
pmd->ctx.last_rxq = NULL;
pmd_thread_ctx_time_update(pmd);
pmd->next_optimization = pmd->ctx.now + DPCLS_OPTIMIZATION_INTERVAL;
+ pmd->next_rcu_quiesce = pmd->ctx.now + PMD_RCU_QUIESCE_INTERVAL;
pmd->rxq_next_cycle_store = pmd->ctx.now + PMD_RXQ_INTERVAL_LEN;
hmap_init(&pmd->poll_list);
hmap_init(&pmd->tx_ports);
hmap_init(&pmd->tnl_port_cache);
hmap_init(&pmd->send_port_cache);
+ cmap_init(&pmd->tx_bonds);
/* init the 'flow_cache' since there is no
* actual thread created for NON_PMD_CORE_ID. */
if (core_id == NON_PMD_CORE_ID) {
@@ -6140,6 +6512,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
hmap_destroy(&pmd->send_port_cache);
hmap_destroy(&pmd->tnl_port_cache);
hmap_destroy(&pmd->tx_ports);
+ cmap_destroy(&pmd->tx_bonds);
hmap_destroy(&pmd->poll_list);
/* All flows (including their dpcls_rules) have been deleted already */
CMAP_FOR_EACH (cls, node, &pmd->classifiers) {
@@ -6151,6 +6524,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
ovs_mutex_destroy(&pmd->flow_mutex);
seq_destroy(pmd->reload_seq);
ovs_mutex_destroy(&pmd->port_mutex);
+ ovs_mutex_destroy(&pmd->bond_mutex);
free(pmd);
}
@@ -6220,6 +6594,7 @@ dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd)
{
struct rxq_poll *poll;
struct tx_port *port;
+ struct tx_bond *tx;
ovs_mutex_lock(&pmd->port_mutex);
HMAP_FOR_EACH_POP (poll, node, &pmd->poll_list) {
@@ -6229,6 +6604,13 @@ dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd)
free(port);
}
ovs_mutex_unlock(&pmd->port_mutex);
+
+ ovs_mutex_lock(&pmd->bond_mutex);
+ CMAP_FOR_EACH (tx, node, &pmd->tx_bonds) {
+ cmap_remove(&pmd->tx_bonds, &tx->node, hash_bond_id(tx->bond_id));
+ ovsrcu_postpone(free, tx);
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
}
/* Adds rx queue to poll_list of PMD thread, if it's not there already. */
@@ -6304,6 +6686,62 @@ dp_netdev_del_port_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
free(tx);
pmd->need_reload = true;
}
+
+/* Add bond to the tx bond cmap of 'pmd'. */
+static void
+dp_netdev_add_bond_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct tx_bond *bond, bool update)
+ OVS_EXCLUDED(pmd->bond_mutex)
+{
+ struct tx_bond *tx;
+
+ ovs_mutex_lock(&pmd->bond_mutex);
+ tx = tx_bond_lookup(&pmd->tx_bonds, bond->bond_id);
+
+ if (tx && !update) {
+ /* It's not an update and the entry already exists. Do nothing. */
+ goto unlock;
+ }
+
+ if (tx) {
+ struct tx_bond *new_tx = xmemdup(bond, sizeof *bond);
+
+ /* Copy the stats for each bucket. */
+ for (int i = 0; i < BOND_BUCKETS; i++) {
+ uint64_t n_packets, n_bytes;
+
+ atomic_read_relaxed(&tx->member_buckets[i].n_packets, &n_packets);
+ atomic_read_relaxed(&tx->member_buckets[i].n_bytes, &n_bytes);
+ atomic_init(&new_tx->member_buckets[i].n_packets, n_packets);
+ atomic_init(&new_tx->member_buckets[i].n_bytes, n_bytes);
+ }
+ cmap_replace(&pmd->tx_bonds, &tx->node, &new_tx->node,
+ hash_bond_id(bond->bond_id));
+ ovsrcu_postpone(free, tx);
+ } else {
+ tx = xmemdup(bond, sizeof *bond);
+ cmap_insert(&pmd->tx_bonds, &tx->node, hash_bond_id(bond->bond_id));
+ }
+unlock:
+ ovs_mutex_unlock(&pmd->bond_mutex);
+}
+
+/* Delete bond from the tx bond cmap of 'pmd'. */
+static void
+dp_netdev_del_bond_tx_from_pmd(struct dp_netdev_pmd_thread *pmd,
+ uint32_t bond_id)
+ OVS_EXCLUDED(pmd->bond_mutex)
+{
+ struct tx_bond *tx;
+
+ ovs_mutex_lock(&pmd->bond_mutex);
+ tx = tx_bond_lookup(&pmd->tx_bonds, bond_id);
+ if (tx) {
+ cmap_remove(&pmd->tx_bonds, &tx->node, hash_bond_id(tx->bond_id));
+ ovsrcu_postpone(free, tx);
+ }
+ ovs_mutex_unlock(&pmd->bond_mutex);
+}
static char *
dpif_netdev_get_datapath_version(void)
@@ -7129,6 +7567,97 @@ dp_execute_userspace_action(struct dp_netdev_pmd_thread *pmd,
}
}
+static bool
+dp_execute_output_action(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets_,
+ bool should_steal, odp_port_t port_no)
+{
+ struct tx_port *p = pmd_send_port_cache_lookup(pmd, port_no);
+ struct dp_packet_batch out;
+
+ if (!OVS_LIKELY(p)) {
+ COVERAGE_ADD(datapath_drop_invalid_port,
+ dp_packet_batch_size(packets_));
+ dp_packet_delete_batch(packets_, should_steal);
+ return false;
+ }
+ if (!should_steal) {
+ dp_packet_batch_clone(&out, packets_);
+ dp_packet_batch_reset_cutlen(packets_);
+ packets_ = &out;
+ }
+ dp_packet_batch_apply_cutlen(packets_);
+#ifdef DPDK_NETDEV
+ if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
+ && packets_->packets[0]->source
+ != p->output_pkts.packets[0]->source)) {
+ /* XXX: netdev-dpdk assumes that all packets in a single
+ * output batch has the same source. Flush here to
+ * avoid memory access issues. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+#endif
+ if (dp_packet_batch_size(&p->output_pkts)
+ + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
+ /* Flush here to avoid overflow. */
+ dp_netdev_pmd_flush_output_on_port(pmd, p);
+ }
+ if (dp_packet_batch_is_empty(&p->output_pkts)) {
+ pmd->n_output_batches++;
+ }
+
+ struct dp_packet *packet;
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
+ pmd->ctx.last_rxq;
+ dp_packet_batch_add(&p->output_pkts, packet);
+ }
+ return true;
+}
+
+static void
+dp_execute_lb_output_action(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet_batch *packets_,
+ bool should_steal, uint32_t bond)
+{
+ struct tx_bond *p_bond = tx_bond_lookup(&pmd->tx_bonds, bond);
+ struct dp_packet_batch out;
+ struct dp_packet *packet;
+
+ if (!p_bond) {
+ COVERAGE_ADD(datapath_drop_invalid_bond,
+ dp_packet_batch_size(packets_));
+ dp_packet_delete_batch(packets_, should_steal);
+ return;
+ }
+ if (!should_steal) {
+ dp_packet_batch_clone(&out, packets_);
+ dp_packet_batch_reset_cutlen(packets_);
+ packets_ = &out;
+ }
+ dp_packet_batch_apply_cutlen(packets_);
+
+ DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
+ /*
+ * Lookup the bond-hash table using hash to get the member.
+ */
+ uint32_t hash = dp_packet_get_rss_hash(packet);
+ struct member_entry *s_entry
+ = &p_bond->member_buckets[hash & BOND_MASK];
+ odp_port_t bond_member = s_entry->member_id;
+ uint32_t size = dp_packet_size(packet);
+ struct dp_packet_batch output_pkt;
+
+ dp_packet_batch_init_packet(&output_pkt, packet);
+ if (OVS_LIKELY(dp_execute_output_action(pmd, &output_pkt, true,
+ bond_member))) {
+ /* Update member stats. */
+ non_atomic_ullong_add(&s_entry->n_packets, 1);
+ non_atomic_ullong_add(&s_entry->n_bytes, size);
+ }
+ }
+}
+
static void
dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
const struct nlattr *a, bool should_steal)
@@ -7144,49 +7673,14 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
switch ((enum ovs_action_attr)type) {
case OVS_ACTION_ATTR_OUTPUT:
- p = pmd_send_port_cache_lookup(pmd, nl_attr_get_odp_port(a));
- if (OVS_LIKELY(p)) {
- struct dp_packet *packet;
- struct dp_packet_batch out;
-
- if (!should_steal) {
- dp_packet_batch_clone(&out, packets_);
- dp_packet_batch_reset_cutlen(packets_);
- packets_ = &out;
- }
- dp_packet_batch_apply_cutlen(packets_);
-
-#ifdef DPDK_NETDEV
- if (OVS_UNLIKELY(!dp_packet_batch_is_empty(&p->output_pkts)
- && packets_->packets[0]->source
- != p->output_pkts.packets[0]->source)) {
- /* XXX: netdev-dpdk assumes that all packets in a single
- * output batch has the same source. Flush here to
- * avoid memory access issues. */
- dp_netdev_pmd_flush_output_on_port(pmd, p);
- }
-#endif
- if (dp_packet_batch_size(&p->output_pkts)
- + dp_packet_batch_size(packets_) > NETDEV_MAX_BURST) {
- /* Flush here to avoid overflow. */
- dp_netdev_pmd_flush_output_on_port(pmd, p);
- }
-
- if (dp_packet_batch_is_empty(&p->output_pkts)) {
- pmd->n_output_batches++;
- }
+ dp_execute_output_action(pmd, packets_, should_steal,
+ nl_attr_get_odp_port(a));
+ return;
- DP_PACKET_BATCH_FOR_EACH (i, packet, packets_) {
- p->output_pkts_rxqs[dp_packet_batch_size(&p->output_pkts)] =
- pmd->ctx.last_rxq;
- dp_packet_batch_add(&p->output_pkts, packet);
- }
- return;
- } else {
- COVERAGE_ADD(datapath_drop_invalid_port,
- dp_packet_batch_size(packets_));
- }
- break;
+ case OVS_ACTION_ATTR_LB_OUTPUT:
+ dp_execute_lb_output_action(pmd, packets_, should_steal,
+ nl_attr_get_u32(a));
+ return;
case OVS_ACTION_ATTR_TUNNEL_PUSH:
if (should_steal) {
@@ -7328,6 +7822,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
bool commit = false;
unsigned int left;
uint16_t zone = 0;
+ uint32_t tp_id = 0;
const char *helper = NULL;
const uint32_t *setmark = NULL;
const struct ovs_key_ct_labels *setlabel = NULL;
@@ -7363,8 +7858,11 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
* netlink events. */
break;
case OVS_CT_ATTR_TIMEOUT:
- /* Userspace datapath does not support customized timeout
- * policy yet. */
+ if (!str_to_uint(nl_attr_get_string(b), 10, &tp_id)) {
+ VLOG_WARN("Invalid Timeout Policy ID: %s.",
+ nl_attr_get_string(b));
+ tp_id = DEFAULT_TP_ID;
+ }
break;
case OVS_CT_ATTR_NAT: {
const struct nlattr *b_nest;
@@ -7450,7 +7948,7 @@ dp_execute_cb(void *aux_, struct dp_packet_batch *packets_,
conntrack_execute(dp->conntrack, packets_, aux->flow->dl_type, force,
commit, zone, setmark, setlabel, aux->flow->tp_src,
aux->flow->tp_dst, helper, nat_action_info_ref,
- pmd->ctx.now / 1000);
+ pmd->ctx.now / 1000, tp_id);
break;
}
@@ -7683,6 +8181,62 @@ dpif_netdev_ct_del_limits(struct dpif *dpif OVS_UNUSED,
return err;
}
+static int
+dpif_netdev_ct_set_timeout_policy(struct dpif *dpif,
+ const struct ct_dpif_timeout_policy *dpif_tp)
+{
+ struct timeout_policy tp;
+ struct dp_netdev *dp;
+
+ dp = get_dp_netdev(dpif);
+ memcpy(&tp.policy, dpif_tp, sizeof tp.policy);
+ return timeout_policy_update(dp->conntrack, &tp);
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy(struct dpif *dpif, uint32_t tp_id,
+ struct ct_dpif_timeout_policy *dpif_tp)
+{
+ struct timeout_policy *tp;
+ struct dp_netdev *dp;
+ int err = 0;
+
+ dp = get_dp_netdev(dpif);
+ tp = timeout_policy_get(dp->conntrack, tp_id);
+ if (!tp) {
+ return ENOENT;
+ }
+ memcpy(dpif_tp, &tp->policy, sizeof tp->policy);
+ return err;
+}
+
+static int
+dpif_netdev_ct_del_timeout_policy(struct dpif *dpif,
+ uint32_t tp_id)
+{
+ struct dp_netdev *dp;
+ int err = 0;
+
+ dp = get_dp_netdev(dpif);
+ err = timeout_policy_delete(dp->conntrack, tp_id);
+ return err;
+}
+
+static int
+dpif_netdev_ct_get_timeout_policy_name(struct dpif *dpif OVS_UNUSED,
+ uint32_t tp_id,
+ uint16_t dl_type OVS_UNUSED,
+ uint8_t nw_proto OVS_UNUSED,
+ char **tp_name, bool *is_generic)
+{
+ struct ds ds = DS_EMPTY_INITIALIZER;
+
+ ds_put_format(&ds, "%"PRIu32, tp_id);
+ *tp_name = ds_steal_cstr(&ds);
+ *is_generic = true;
+ return 0;
+}
+
static int
dpif_netdev_ipf_set_enabled(struct dpif *dpif, bool v6, bool enable)
{
@@ -7738,6 +8292,98 @@ dpif_netdev_ipf_dump_done(struct dpif *dpif OVS_UNUSED, void *ipf_dump_ctx)
}
+static int
+dpif_netdev_bond_add(struct dpif *dpif, uint32_t bond_id,
+ odp_port_t *member_map)
+{
+ struct tx_bond *new_tx = xzalloc(sizeof *new_tx);
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct dp_netdev_pmd_thread *pmd;
+
+ /* Prepare new bond mapping. */
+ new_tx->bond_id = bond_id;
+ for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ new_tx->member_buckets[bucket].member_id = member_map[bucket];
+ }
+
+ ovs_mutex_lock(&dp->bond_mutex);
+ /* Check if bond already existed. */
+ struct tx_bond *old_tx = tx_bond_lookup(&dp->tx_bonds, bond_id);
+ if (old_tx) {
+ cmap_replace(&dp->tx_bonds, &old_tx->node, &new_tx->node,
+ hash_bond_id(bond_id));
+ ovsrcu_postpone(free, old_tx);
+ } else {
+ cmap_insert(&dp->tx_bonds, &new_tx->node, hash_bond_id(bond_id));
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
+ /* Update all PMDs with new bond mapping. */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ dp_netdev_add_bond_tx_to_pmd(pmd, new_tx, true);
+ }
+ return 0;
+}
+
+static int
+dpif_netdev_bond_del(struct dpif *dpif, uint32_t bond_id)
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct dp_netdev_pmd_thread *pmd;
+ struct tx_bond *tx;
+
+ ovs_mutex_lock(&dp->bond_mutex);
+ /* Check if bond existed. */
+ tx = tx_bond_lookup(&dp->tx_bonds, bond_id);
+ if (tx) {
+ cmap_remove(&dp->tx_bonds, &tx->node, hash_bond_id(bond_id));
+ ovsrcu_postpone(free, tx);
+ } else {
+ /* Bond is not present. */
+ ovs_mutex_unlock(&dp->bond_mutex);
+ return ENOENT;
+ }
+ ovs_mutex_unlock(&dp->bond_mutex);
+
+ /* Remove the bond map in all pmds. */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ dp_netdev_del_bond_tx_from_pmd(pmd, bond_id);
+ }
+ return 0;
+}
+
+static int
+dpif_netdev_bond_stats_get(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes)
+{
+ struct dp_netdev *dp = get_dp_netdev(dpif);
+ struct dp_netdev_pmd_thread *pmd;
+
+ if (!tx_bond_lookup(&dp->tx_bonds, bond_id)) {
+ return ENOENT;
+ }
+
+ /* Search the bond in all PMDs. */
+ CMAP_FOR_EACH (pmd, node, &dp->poll_threads) {
+ struct tx_bond *pmd_bond_entry
+ = tx_bond_lookup(&pmd->tx_bonds, bond_id);
+
+ if (!pmd_bond_entry) {
+ continue;
+ }
+
+ /* Read bond stats. */
+ for (int i = 0; i < BOND_BUCKETS; i++) {
+ uint64_t pmd_n_bytes;
+
+ atomic_read_relaxed(&pmd_bond_entry->member_buckets[i].n_bytes,
+ &pmd_n_bytes);
+ n_bytes[i] += pmd_n_bytes;
+ }
+ }
+ return 0;
+}
+
const struct dpif_class dpif_netdev_class = {
"netdev",
true, /* cleanup_required */
@@ -7793,13 +8439,13 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_ct_set_limits,
dpif_netdev_ct_get_limits,
dpif_netdev_ct_del_limits,
- NULL, /* ct_set_timeout_policy */
- NULL, /* ct_get_timeout_policy */
- NULL, /* ct_del_timeout_policy */
+ dpif_netdev_ct_set_timeout_policy,
+ dpif_netdev_ct_get_timeout_policy,
+ dpif_netdev_ct_del_timeout_policy,
NULL, /* ct_timeout_policy_dump_start */
NULL, /* ct_timeout_policy_dump_next */
NULL, /* ct_timeout_policy_dump_done */
- NULL, /* ct_get_timeout_policy_name */
+ dpif_netdev_ct_get_timeout_policy_name,
dpif_netdev_ipf_set_enabled,
dpif_netdev_ipf_set_min_frag,
dpif_netdev_ipf_set_max_nfrags,
@@ -7811,6 +8457,9 @@ const struct dpif_class dpif_netdev_class = {
dpif_netdev_meter_set,
dpif_netdev_meter_get,
dpif_netdev_meter_del,
+ dpif_netdev_bond_add,
+ dpif_netdev_bond_del,
+ dpif_netdev_bond_stats_get,
};
static void
@@ -7983,13 +8632,11 @@ dpcls_create_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
subtable->mf_masks = xmalloc(sizeof(uint64_t) * (unit0 + unit1));
netdev_flow_key_gen_masks(mask, subtable->mf_masks, unit0, unit1);
- /* Probe for a specialized generic lookup function. */
- subtable->lookup_func = dpcls_subtable_generic_probe(unit0, unit1);
-
- /* If not set, assign generic lookup. Generic works for any miniflow. */
- if (!subtable->lookup_func) {
- subtable->lookup_func = dpcls_subtable_lookup_generic;
- }
+ /* Get the preferred subtable search function for this (u0,u1) subtable.
+ * The function is guaranteed to always return a valid implementation, and
+ * possibly an ISA optimized, and/or specialized implementation.
+ */
+ subtable->lookup_func = dpcls_subtable_get_best_impl(unit0, unit1);
cmap_insert(&cls->subtables_map, &subtable->cmap_node, mask->hash);
/* Add the new subtable at the end of the pvector (with no hits yet) */
@@ -8015,6 +8662,28 @@ dpcls_find_subtable(struct dpcls *cls, const struct netdev_flow_key *mask)
return dpcls_create_subtable(cls, mask);
}
+/* Checks for the best available implementation for each subtable lookup
+ * function, and assigns it as the lookup function pointer for each subtable.
+ * Returns the number of subtables that have changed lookup implementation.
+ */
+static uint32_t
+dpcls_subtable_lookup_reprobe(struct dpcls *cls)
+{
+ struct pvector *pvec = &cls->subtables;
+ uint32_t subtables_changed = 0;
+ struct dpcls_subtable *subtable = NULL;
+
+ PVECTOR_FOR_EACH (subtable, pvec) {
+ uint32_t u0_bits = subtable->mf_bits_set_unit0;
+ uint32_t u1_bits = subtable->mf_bits_set_unit1;
+ void *old_func = subtable->lookup_func;
+ subtable->lookup_func = dpcls_subtable_get_best_impl(u0_bits, u1_bits);
+ subtables_changed += (old_func != subtable->lookup_func);
+ }
+ pvector_publish(pvec);
+
+ return subtables_changed;
+}
/* Periodically sort the dpcls subtable vectors according to hit counts */
static void
diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c
index 582274c4677451946008d1f8efa97fa1d1d6ae01..4fc42daed2d9794665adaa5758a7617d54d605a8 100644
--- a/lib/dpif-netlink-rtnl.c
+++ b/lib/dpif-netlink-rtnl.c
@@ -58,6 +58,18 @@ VLOG_DEFINE_THIS_MODULE(dpif_netlink_rtnl);
#define IFLA_GENEVE_UDP_ZERO_CSUM6_RX 10
#endif
+#ifndef IFLA_BAREUDP_MAX
+#define IFLA_BAREUDP_MAX 0
+#endif
+#if IFLA_BAREUDP_MAX < 4
+#define IFLA_BAREUDP_PORT 1
+#define IFLA_BAREUDP_ETHERTYPE 2
+#define IFLA_BAREUDP_SRCPORT_MIN 3
+#define IFLA_BAREUDP_MULTIPROTO_MODE 4
+#endif
+
+#define BAREUDP_SRCPORT_MIN 49153
+
static const struct nl_policy rtlink_policy[] = {
[IFLA_LINKINFO] = { .type = NL_A_NESTED },
};
@@ -81,6 +93,10 @@ static const struct nl_policy geneve_policy[] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NL_A_U8 },
[IFLA_GENEVE_PORT] = { .type = NL_A_U16 },
};
+static const struct nl_policy bareudp_policy[] = {
+ [IFLA_BAREUDP_PORT] = { .type = NL_A_U16 },
+ [IFLA_BAREUDP_ETHERTYPE] = { .type = NL_A_U16 },
+};
static const char *
vport_type_to_kind(enum ovs_vport_type type,
@@ -111,6 +127,10 @@ vport_type_to_kind(enum ovs_vport_type type,
} else {
return NULL;
}
+ case OVS_VPORT_TYPE_GTPU:
+ return NULL;
+ case OVS_VPORT_TYPE_BAREUDP:
+ return "bareudp";
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_LISP:
@@ -241,6 +261,24 @@ dpif_netlink_rtnl_geneve_verify(const struct netdev_tunnel_config *tnl_cfg,
return err;
}
+static int
+dpif_netlink_rtnl_bareudp_verify(const struct netdev_tunnel_config *tnl_cfg,
+ const char *kind, struct ofpbuf *reply)
+{
+ struct nlattr *bareudp[ARRAY_SIZE(bareudp_policy)];
+ int err;
+
+ err = rtnl_policy_parse(kind, reply, bareudp_policy, bareudp,
+ ARRAY_SIZE(bareudp_policy));
+ if (!err) {
+ if ((tnl_cfg->dst_port != nl_attr_get_be16(bareudp[IFLA_BAREUDP_PORT]))
+ || (tnl_cfg->payload_ethertype
+ != nl_attr_get_be16(bareudp[IFLA_BAREUDP_ETHERTYPE]))) {
+ err = EINVAL;
+ }
+ }
+ return err;
+}
static int
dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
@@ -273,10 +311,14 @@ dpif_netlink_rtnl_verify(const struct netdev_tunnel_config *tnl_cfg,
case OVS_VPORT_TYPE_GENEVE:
err = dpif_netlink_rtnl_geneve_verify(tnl_cfg, kind, reply);
break;
+ case OVS_VPORT_TYPE_BAREUDP:
+ err = dpif_netlink_rtnl_bareudp_verify(tnl_cfg, kind, reply);
+ break;
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
+ case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
@@ -354,10 +396,21 @@ dpif_netlink_rtnl_create(const struct netdev_tunnel_config *tnl_cfg,
nl_msg_put_u8(&request, IFLA_GENEVE_UDP_ZERO_CSUM6_RX, 1);
nl_msg_put_be16(&request, IFLA_GENEVE_PORT, tnl_cfg->dst_port);
break;
+ case OVS_VPORT_TYPE_BAREUDP:
+ nl_msg_put_be16(&request, IFLA_BAREUDP_ETHERTYPE,
+ tnl_cfg->payload_ethertype);
+ nl_msg_put_u16(&request, IFLA_BAREUDP_SRCPORT_MIN,
+ BAREUDP_SRCPORT_MIN);
+ nl_msg_put_be16(&request, IFLA_BAREUDP_PORT, tnl_cfg->dst_port);
+ if (tnl_cfg->exts & (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE)) {
+ nl_msg_put_flag(&request, IFLA_BAREUDP_MULTIPROTO_MODE);
+ }
+ break;
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
+ case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
@@ -466,11 +519,13 @@ dpif_netlink_rtnl_port_destroy(const char *name, const char *type)
case OVS_VPORT_TYPE_ERSPAN:
case OVS_VPORT_TYPE_IP6ERSPAN:
case OVS_VPORT_TYPE_IP6GRE:
+ case OVS_VPORT_TYPE_BAREUDP:
return dpif_netlink_rtnl_destroy(name);
case OVS_VPORT_TYPE_NETDEV:
case OVS_VPORT_TYPE_INTERNAL:
case OVS_VPORT_TYPE_LISP:
case OVS_VPORT_TYPE_STT:
+ case OVS_VPORT_TYPE_GTPU:
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
default:
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 5b5c96d727c3e2d692a69aaee9b34061639844cf..ceb56c6851c62f5f3c89d92d405c05dcb1d0c54b 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -691,6 +691,7 @@ dpif_netlink_set_features(struct dpif *dpif_, uint32_t new_features)
dpif_netlink_dp_init(&request);
request.cmd = OVS_DP_CMD_SET;
+ request.name = dpif_->base_name;
request.dp_ifindex = dpif->dp_ifindex;
request.user_features = dpif->user_features | new_features;
@@ -745,6 +746,12 @@ get_vport_type(const struct dpif_netlink_vport *vport)
case OVS_VPORT_TYPE_IP6GRE:
return "ip6gre";
+ case OVS_VPORT_TYPE_GTPU:
+ return "gtpu";
+
+ case OVS_VPORT_TYPE_BAREUDP:
+ return "bareudp";
+
case OVS_VPORT_TYPE_UNSPEC:
case __OVS_VPORT_TYPE_MAX:
break;
@@ -778,6 +785,10 @@ netdev_to_ovs_vport_type(const char *type)
return OVS_VPORT_TYPE_IP6GRE;
} else if (!strcmp(type, "gre")) {
return OVS_VPORT_TYPE_GRE;
+ } else if (!strcmp(type, "gtpu")) {
+ return OVS_VPORT_TYPE_GTPU;
+ } else if (!strcmp(type, "bareudp")) {
+ return OVS_VPORT_TYPE_BAREUDP;
} else {
return OVS_VPORT_TYPE_UNSPEC;
}
@@ -1114,6 +1125,7 @@ dpif_netlink_port_get_pid(const struct dpif *dpif_, odp_port_t port_no)
static int
dpif_netlink_flow_flush(struct dpif *dpif_)
{
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif_));
const struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
struct dpif_netlink_flow flow;
@@ -1122,7 +1134,7 @@ dpif_netlink_flow_flush(struct dpif *dpif_)
flow.dp_ifindex = dpif->dp_ifindex;
if (netdev_is_flow_api_enabled()) {
- netdev_ports_flow_flush(dpif_->dpif_class);
+ netdev_ports_flow_flush(dpif_type_str);
}
return dpif_netlink_flow_transact(&flow, NULL, NULL);
@@ -1439,8 +1451,9 @@ start_netdev_dump(const struct dpif *dpif_,
ovs_mutex_lock(&dump->netdev_lock);
dump->netdev_current_dump = 0;
dump->netdev_dumps
- = netdev_ports_flow_dump_create(dpif_->dpif_class,
- &dump->netdev_dumps_num);
+ = netdev_ports_flow_dump_create(dpif_normalize_type(dpif_type(dpif_)),
+ &dump->netdev_dumps_num,
+ dump->up.terse);
ovs_mutex_unlock(&dump->netdev_lock);
}
@@ -1635,41 +1648,42 @@ dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
struct dpif_flow_attrs *attrs,
ovs_u128 *ufid,
struct dpif_flow *flow,
- bool terse OVS_UNUSED)
-{
-
- struct odp_flow_key_parms odp_parms = {
- .flow = &match->flow,
- .mask = &match->wc.masks,
- .support = {
- .max_vlan_headers = 2,
- .recirc = true,
- .ct_state = true,
- .ct_zone = true,
- .ct_mark = true,
- .ct_label = true,
- },
- };
- size_t offset;
-
+ bool terse)
+{
memset(flow, 0, sizeof *flow);
- /* Key */
- offset = key_buf->size;
- flow->key = ofpbuf_tail(key_buf);
- odp_flow_key_from_flow(&odp_parms, key_buf);
- flow->key_len = key_buf->size - offset;
+ if (!terse) {
+ struct odp_flow_key_parms odp_parms = {
+ .flow = &match->flow,
+ .mask = &match->wc.masks,
+ .support = {
+ .max_vlan_headers = 2,
+ .recirc = true,
+ .ct_state = true,
+ .ct_zone = true,
+ .ct_mark = true,
+ .ct_label = true,
+ },
+ };
+ size_t offset;
+
+ /* Key */
+ offset = key_buf->size;
+ flow->key = ofpbuf_tail(key_buf);
+ odp_flow_key_from_flow(&odp_parms, key_buf);
+ flow->key_len = key_buf->size - offset;
- /* Mask */
- offset = mask_buf->size;
- flow->mask = ofpbuf_tail(mask_buf);
- odp_parms.key_buf = key_buf;
- odp_flow_key_from_mask(&odp_parms, mask_buf);
- flow->mask_len = mask_buf->size - offset;
+ /* Mask */
+ offset = mask_buf->size;
+ flow->mask = ofpbuf_tail(mask_buf);
+ odp_parms.key_buf = key_buf;
+ odp_flow_key_from_mask(&odp_parms, mask_buf);
+ flow->mask_len = mask_buf->size - offset;
- /* Actions */
- flow->actions = nl_attr_get(actions);
- flow->actions_len = nl_attr_get_size(actions);
+ /* Actions */
+ flow->actions = nl_attr_get(actions);
+ flow->actions_len = nl_attr_get_size(actions);
+ }
/* Stats */
memcpy(&flow->stats, stats, sizeof *stats);
@@ -1994,6 +2008,7 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
static int
parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
{
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif));
struct dpif_flow *dpif_flow = get->flow;
struct match match;
struct nlattr *actions;
@@ -2008,8 +2023,8 @@ parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
int err;
ofpbuf_use_stack(&buf, &act_buf, sizeof act_buf);
- err = netdev_ports_flow_get(dpif->dpif.dpif_class, &match,
- &actions, get->ufid, &stats, &attrs, &buf);
+ err = netdev_ports_flow_get(dpif_type_str, &match, &actions, get->ufid,
+ &stats, &attrs, &buf);
if (err) {
return err;
}
@@ -2034,8 +2049,8 @@ parse_flow_get(struct dpif_netlink *dpif, struct dpif_flow_get *get)
static int
parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
{
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(&dpif->dpif));
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
- const struct dpif_class *dpif_class = dpif->dpif.dpif_class;
struct match match;
odp_port_t in_port;
const struct nlattr *nla;
@@ -2057,7 +2072,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
}
in_port = match.flow.in_port.odp_port;
- dev = netdev_ports_get(in_port, dpif_class);
+ dev = netdev_ports_get(in_port, dpif_type_str);
if (!dev) {
return EOPNOTSUPP;
}
@@ -2070,7 +2085,7 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
odp_port_t out_port;
out_port = nl_attr_get_odp_port(nla);
- outdev = netdev_ports_get(out_port, dpif_class);
+ outdev = netdev_ports_get(out_port, dpif_type_str);
if (!outdev) {
err = EOPNOTSUPP;
goto out;
@@ -2086,11 +2101,11 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
}
}
- info.dpif_class = dpif_class;
info.tp_dst_port = dst_port;
info.tunnel_csum_on = csum_on;
info.recirc_id_shared_with_tc = (dpif->user_features
& OVS_DP_F_TC_RECIRC_SHARING);
+ info.tc_modify_flow_deleted = false;
err = netdev_flow_put(dev, &match,
CONST_CAST(struct nlattr *, put->actions),
put->actions_len,
@@ -2141,7 +2156,11 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)
out:
if (err && err != EEXIST && (put->flags & DPIF_FP_MODIFY)) {
/* Modified rule can't be offloaded, try and delete from HW */
- int del_err = netdev_flow_del(dev, put->ufid, put->stats);
+ int del_err = 0;
+
+ if (!info.tc_modify_flow_deleted) {
+ del_err = netdev_flow_del(dev, put->ufid, put->stats);
+ }
if (!del_err) {
/* Delete from hw success, so old flow was offloaded.
@@ -2185,8 +2204,10 @@ try_send_to_netdev(struct dpif_netlink *dpif, struct dpif_op *op)
break;
}
- err = netdev_ports_flow_del(dpif->dpif.dpif_class, del->ufid,
- del->stats);
+ err = netdev_ports_flow_del(
+ dpif_normalize_type(dpif_type(&dpif->dpif)),
+ del->ufid,
+ del->stats);
log_flow_del_message(&dpif->dpif, &this_module, del, 0);
break;
}
@@ -3993,6 +4014,9 @@ const struct dpif_class dpif_netlink_class = {
dpif_netlink_meter_set,
dpif_netlink_meter_get,
dpif_netlink_meter_del,
+ NULL, /* bond_add */
+ NULL, /* bond_del */
+ NULL, /* bond_stats_get */
};
static int
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index b77317bca18bf752fe01e5a1284d8f8f8fa26e5b..b817fceac698460af88986e1396e6a0bca5f2a76 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -616,6 +616,18 @@ struct dpif_class {
* zero. */
int (*meter_del)(struct dpif *, ofproto_meter_id meter_id,
struct ofputil_meter_stats *, uint16_t n_bands);
+
+ /* Adds a bond with 'bond_id' and the member-map to 'dpif'. */
+ int (*bond_add)(struct dpif *dpif, uint32_t bond_id,
+ odp_port_t *member_map);
+
+ /* Removes bond identified by 'bond_id' from 'dpif'. */
+ int (*bond_del)(struct dpif *dpif, uint32_t bond_id);
+
+ /* Reads bond stats from 'dpif'. 'n_bytes' should be an array with size
+ * sufficient to store BOND_BUCKETS number of elements. */
+ int (*bond_stats_get)(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes);
};
extern const struct dpif_class dpif_netlink_class;
diff --git a/lib/dpif.c b/lib/dpif.c
index 9d9c716c13aa783c5644bc12215aa0aa31adbbf8..56d0b4a654322592e0f33b3273517902b2b2b26a 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -79,9 +79,9 @@ struct registered_dpif_class {
int refcount;
};
static struct shash dpif_classes = SHASH_INITIALIZER(&dpif_classes);
-static struct sset dpif_blacklist = SSET_INITIALIZER(&dpif_blacklist);
+static struct sset dpif_disallowed = SSET_INITIALIZER(&dpif_disallowed);
-/* Protects 'dpif_classes', including the refcount, and 'dpif_blacklist'. */
+/* Protects 'dpif_classes', including the refcount, and 'dpif_disallowed'. */
static struct ovs_mutex dpif_mutex = OVS_MUTEX_INITIALIZER;
/* Rate limit for individual messages going to or from the datapath, output at
@@ -134,8 +134,8 @@ dp_register_provider__(const struct dpif_class *new_class)
struct registered_dpif_class *registered_class;
int error;
- if (sset_contains(&dpif_blacklist, new_class->type)) {
- VLOG_DBG("attempted to register blacklisted provider: %s",
+ if (sset_contains(&dpif_disallowed, new_class->type)) {
+ VLOG_DBG("attempted to register disallowed provider: %s",
new_class->type);
return EINVAL;
}
@@ -219,13 +219,13 @@ dp_unregister_provider(const char *type)
return error;
}
-/* Blacklists a provider. Causes future calls of dp_register_provider() with
+/* Disallows a provider. Causes future calls of dp_register_provider() with
* a dpif_class which implements 'type' to fail. */
void
-dp_blacklist_provider(const char *type)
+dp_disallow_provider(const char *type)
{
ovs_mutex_lock(&dpif_mutex);
- sset_add(&dpif_blacklist, type);
+ sset_add(&dpif_disallowed, type);
ovs_mutex_unlock(&dpif_mutex);
}
@@ -347,6 +347,7 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
error = registered_class->dpif_class->open(registered_class->dpif_class,
name, create, &dpif);
if (!error) {
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif));
struct dpif_port_dump port_dump;
struct dpif_port dpif_port;
@@ -363,7 +364,7 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp)
err = netdev_open(dpif_port.name, dpif_port.type, &netdev);
if (!err) {
- netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port);
+ netdev_ports_insert(netdev, dpif_type_str, &dpif_port);
netdev_close(netdev);
} else {
VLOG_WARN("could not open netdev %s type %s: %s",
@@ -427,14 +428,15 @@ dpif_create_and_open(const char *name, const char *type, struct dpif **dpifp)
static void
dpif_remove_netdev_ports(struct dpif *dpif) {
- struct dpif_port_dump port_dump;
- struct dpif_port dpif_port;
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif));
+ struct dpif_port_dump port_dump;
+ struct dpif_port dpif_port;
- DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) {
- if (!dpif_is_tap_port(dpif_port.type)) {
- netdev_ports_remove(dpif_port.port_no, dpif->dpif_class);
- }
+ DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) {
+ if (!dpif_is_tap_port(dpif_port.type)) {
+ netdev_ports_remove(dpif_port.port_no, dpif_type_str);
}
+ }
}
/* Closes and frees the connection to 'dpif'. Does not destroy the datapath
@@ -597,12 +599,13 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)
if (!dpif_is_tap_port(netdev_get_type(netdev))) {
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif));
struct dpif_port dpif_port;
dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev));
dpif_port.name = CONST_CAST(char *, netdev_name);
dpif_port.port_no = port_no;
- netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port);
+ netdev_ports_insert(netdev, dpif_type_str, &dpif_port);
}
} else {
VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
@@ -634,7 +637,7 @@ dpif_port_del(struct dpif *dpif, odp_port_t port_no, bool local_delete)
}
}
- netdev_ports_remove(port_no, dpif->dpif_class);
+ netdev_ports_remove(port_no, dpif_normalize_type(dpif_type(dpif)));
return error;
}
@@ -1170,6 +1173,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
case OVS_ACTION_ATTR_CT:
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -1220,6 +1224,7 @@ dpif_execute_helper_cb(void *aux_, struct dp_packet_batch *packets_,
struct dp_packet *clone = NULL;
uint32_t cutlen = dp_packet_get_cutlen(packet);
if (cutlen && (type == OVS_ACTION_ATTR_OUTPUT
+ || type == OVS_ACTION_ATTR_LB_OUTPUT
|| type == OVS_ACTION_ATTR_TUNNEL_PUSH
|| type == OVS_ACTION_ATTR_TUNNEL_POP
|| type == OVS_ACTION_ATTR_USERSPACE)) {
@@ -1879,6 +1884,16 @@ dpif_supports_explicit_drop_action(const struct dpif *dpif)
return dpif_is_netdev(dpif);
}
+bool
+dpif_supports_lb_output_action(const struct dpif *dpif)
+{
+ /*
+ * Balance-tcp optimization is currently supported in netdev
+ * datapath only.
+ */
+ return dpif_is_netdev(dpif);
+}
+
/* Meters */
void
dpif_meter_get_features(const struct dpif *dpif,
@@ -1976,3 +1991,53 @@ dpif_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
}
return error;
}
+
+int
+dpif_bond_add(struct dpif *dpif, uint32_t bond_id, odp_port_t *member_map)
+{
+ return dpif->dpif_class->bond_del
+ ? dpif->dpif_class->bond_add(dpif, bond_id, member_map)
+ : EOPNOTSUPP;
+}
+
+int
+dpif_bond_del(struct dpif *dpif, uint32_t bond_id)
+{
+ return dpif->dpif_class->bond_del
+ ? dpif->dpif_class->bond_del(dpif, bond_id)
+ : EOPNOTSUPP;
+}
+
+int
+dpif_bond_stats_get(struct dpif *dpif, uint32_t bond_id,
+ uint64_t *n_bytes)
+{
+ memset(n_bytes, 0, BOND_BUCKETS * sizeof *n_bytes);
+
+ return dpif->dpif_class->bond_stats_get
+ ? dpif->dpif_class->bond_stats_get(dpif, bond_id, n_bytes)
+ : EOPNOTSUPP;
+}
+
+int
+dpif_get_n_offloaded_flows(struct dpif *dpif, uint64_t *n_flows)
+{
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif));
+ struct dpif_port_dump port_dump;
+ struct dpif_port dpif_port;
+ int ret, n_devs = 0;
+ uint64_t nflows;
+
+ *n_flows = 0;
+ DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) {
+ ret = netdev_ports_get_n_flows(dpif_type_str, dpif_port.port_no,
+ &nflows);
+ if (!ret) {
+ *n_flows += nflows;
+ } else if (ret == EOPNOTSUPP) {
+ continue;
+ }
+ n_devs++;
+ }
+ return n_devs ? 0 : EOPNOTSUPP;
+}
diff --git a/lib/dpif.h b/lib/dpif.h
index 4df8f7c8b7e7054e129304201a7e9989d72c0ad1..ecda896c78db675d69c57484a0b76ec4a79b3921 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -400,7 +400,7 @@ struct sset;
int dp_register_provider(const struct dpif_class *);
int dp_unregister_provider(const char *type);
-void dp_blacklist_provider(const char *type);
+void dp_disallow_provider(const char *type);
void dp_enumerate_types(struct sset *types);
const char *dpif_normalize_type(const char *);
@@ -437,6 +437,8 @@ int dpif_get_dp_stats(const struct dpif *, struct dpif_dp_stats *);
int dpif_set_features(struct dpif *, uint32_t new_features);
+int dpif_get_n_offloaded_flows(struct dpif *dpif, uint64_t *n_flows);
+
/* Port operations. */
@@ -891,6 +893,18 @@ int dpif_meter_get(const struct dpif *, ofproto_meter_id meter_id,
struct ofputil_meter_stats *, uint16_t n_bands);
int dpif_meter_del(struct dpif *, ofproto_meter_id meter_id,
struct ofputil_meter_stats *, uint16_t n_bands);
+
+/* Bonding. */
+
+/* Bit-mask for hashing a flow down to a bucket. */
+#define BOND_MASK 0xff
+#define BOND_BUCKETS (BOND_MASK + 1)
+
+int dpif_bond_add(struct dpif *, uint32_t bond_id, odp_port_t *member_map);
+int dpif_bond_del(struct dpif *, uint32_t bond_id);
+int dpif_bond_stats_get(struct dpif *, uint32_t bond_id, uint64_t *n_bytes);
+bool dpif_supports_lb_output_action(const struct dpif *);
+
/* Miscellaneous. */
diff --git a/lib/fatal-signal.c b/lib/fatal-signal.c
index 09f7c6ecf92c0bde369136db44c43fa0e03d6845..bbb31ef2751747b412f71bdaf344fb17a463a24a 100644
--- a/lib/fatal-signal.c
+++ b/lib/fatal-signal.c
@@ -158,6 +158,23 @@ fatal_signal_add_hook(void (*hook_cb)(void *aux), void (*cancel_cb)(void *aux),
}
#ifdef HAVE_UNWIND
+/* Convert unsigned long long to string. This is needed because
+ * using snprintf() is not async signal safe. */
+static inline int
+llong_to_hex_str(unsigned long long value, char *str)
+{
+ int i = 0, res;
+
+ if (value / 16 > 0) {
+ i = llong_to_hex_str(value / 16, str);
+ }
+
+ res = value % 16;
+ str[i] = "0123456789abcdef"[res];
+
+ return i + 1;
+}
+
/* Send the backtrace buffer to monitor thread.
*
* Note that this runs in the signal handling context, any system
@@ -184,10 +201,43 @@ send_backtrace_to_monitor(void) {
unw_get_reg(&cursor, UNW_REG_IP, &unw_bt[dep].ip);
unw_get_proc_name(&cursor, unw_bt[dep].func, UNW_MAX_FUNCN,
&unw_bt[dep].offset);
- dep++;
+ dep++;
}
- ignore(write(daemonize_fd, unw_bt, dep * sizeof(struct unw_backtrace)));
+ if (monitor) {
+ ignore(write(daemonize_fd, unw_bt,
+ dep * sizeof(struct unw_backtrace)));
+ } else {
+ /* Since there is no monitor daemon running, write backtrace
+ * in current process.
+ */
+ char str[] = "SIGSEGV detected, backtrace:\n";
+ char ip_str[16], offset_str[6];
+ char line[64], fn_name[UNW_MAX_FUNCN];
+
+ vlog_direct_write_to_log_file_unsafe(str);
+
+ for (int i = 0; i < dep; i++) {
+ memset(line, 0, sizeof line);
+ memset(fn_name, 0, sizeof fn_name);
+ memset(offset_str, 0, sizeof offset_str);
+ memset(ip_str, ' ', sizeof ip_str);
+ ip_str[sizeof(ip_str) - 1] = 0;
+
+ llong_to_hex_str(unw_bt[i].ip, ip_str);
+ llong_to_hex_str(unw_bt[i].offset, offset_str);
+
+ strcat(line, "0x");
+ strcat(line, ip_str);
+ strcat(line, "<");
+ memcpy(fn_name, unw_bt[i].func, UNW_MAX_FUNCN - 1);
+ strcat(line, fn_name);
+ strcat(line, "+0x");
+ strcat(line, offset_str);
+ strcat(line, ">\n");
+ vlog_direct_write_to_log_file_unsafe(line);
+ }
+ }
}
#else
static inline void
diff --git a/lib/flow.c b/lib/flow.c
index 45bb96b543be8fbd35ca0ede83344f32a92b7de9..cc1b3f2db25fa4414b34ec6354948b7dc0690ef1 100644
--- a/lib/flow.c
+++ b/lib/flow.c
@@ -129,7 +129,7 @@ struct mf_ctx {
* away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are
* defined as macros. */
-#if (FLOW_WC_SEQ != 41)
+#if (FLOW_WC_SEQ != 42)
#define MINIFLOW_ASSERT(X) ovs_assert(X)
BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime "
"assertions enabled. Consider updating FLOW_WC_SEQ after "
@@ -731,7 +731,7 @@ void
miniflow_extract(struct dp_packet *packet, struct miniflow *dst)
{
/* Add code to this function (or its callees) to extract new fields. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
const struct pkt_metadata *md = &packet->md;
const void *data = dp_packet_data(packet);
@@ -1107,6 +1107,7 @@ parse_tcp_flags(struct dp_packet *packet)
if (OVS_UNLIKELY(eth_type_mpls(dl_type))) {
packet->l2_5_ofs = (char *)data - frame;
}
+ packet->l3_ofs = (char *)data - frame;
if (OVS_LIKELY(dl_type == htons(ETH_TYPE_IP))) {
const struct ip_header *nh = data;
int ip_len;
@@ -1116,7 +1117,6 @@ parse_tcp_flags(struct dp_packet *packet)
return 0;
}
dp_packet_set_l2_pad_size(packet, size - tot_len);
- packet->l3_ofs = (uint16_t)((char *)nh - frame);
nw_proto = nh->ip_proto;
nw_frag = ipv4_get_nw_frag(nh);
@@ -1129,7 +1129,6 @@ parse_tcp_flags(struct dp_packet *packet)
if (OVS_UNLIKELY(!ipv6_sanity_check(nh, size))) {
return 0;
}
- packet->l3_ofs = (uint16_t)((char *)nh - frame);
data_pull(&data, &size, sizeof *nh);
plen = ntohs(nh->ip6_plen); /* Never pull padding. */
@@ -1188,7 +1187,7 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
{
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
match_init_catchall(flow_metadata);
if (flow->tunnel.tun_id != htonll(0)) {
@@ -1228,6 +1227,12 @@ flow_get_metadata(const struct flow *flow, struct match *flow_metadata)
if (flow->tunnel.erspan_hwid) {
match_set_tun_erspan_hwid(flow_metadata, flow->tunnel.erspan_hwid);
}
+ if (flow->tunnel.gtpu_flags) {
+ match_set_tun_gtpu_flags(flow_metadata, flow->tunnel.gtpu_flags);
+ }
+ if (flow->tunnel.gtpu_msgtype) {
+ match_set_tun_gtpu_msgtype(flow_metadata, flow->tunnel.gtpu_msgtype);
+ }
tun_metadata_get_fmd(&flow->tunnel, flow_metadata);
if (flow->metadata != htonll(0)) {
match_set_metadata(flow_metadata, flow->metadata);
@@ -1768,7 +1773,7 @@ flow_wildcards_init_for_packet(struct flow_wildcards *wc,
memset(&wc->masks, 0x0, sizeof wc->masks);
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
if (flow_tnl_dst_is_set(&flow->tunnel)) {
if (flow->tunnel.flags & FLOW_TNL_F_KEY) {
@@ -1789,6 +1794,8 @@ flow_wildcards_init_for_packet(struct flow_wildcards *wc,
WC_MASK_FIELD(wc, tunnel.erspan_idx);
WC_MASK_FIELD(wc, tunnel.erspan_dir);
WC_MASK_FIELD(wc, tunnel.erspan_hwid);
+ WC_MASK_FIELD(wc, tunnel.gtpu_flags);
+ WC_MASK_FIELD(wc, tunnel.gtpu_msgtype);
if (!(flow->tunnel.flags & FLOW_TNL_F_UDPIF)) {
if (flow->tunnel.metadata.present.map) {
@@ -1919,7 +1926,7 @@ void
flow_wc_map(const struct flow *flow, struct flowmap *map)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
flowmap_init(map);
@@ -2022,7 +2029,7 @@ void
flow_wildcards_clear_non_packet_fields(struct flow_wildcards *wc)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
memset(&wc->masks.metadata, 0, sizeof wc->masks.metadata);
memset(&wc->masks.regs, 0, sizeof wc->masks.regs);
@@ -2166,7 +2173,7 @@ flow_wildcards_set_xxreg_mask(struct flow_wildcards *wc, int idx,
uint32_t
miniflow_hash_5tuple(const struct miniflow *flow, uint32_t basis)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
uint32_t hash = basis;
if (flow) {
@@ -2213,7 +2220,7 @@ ASSERT_SEQUENTIAL(ipv6_src, ipv6_dst);
uint32_t
flow_hash_5tuple(const struct flow *flow, uint32_t basis)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
uint32_t hash = basis;
if (flow) {
@@ -2891,7 +2898,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type,
if (clear_flow_L3) {
/* Clear all L3 and L4 fields and dp_hash. */
- BUILD_ASSERT(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT(FLOW_WC_SEQ == 42);
memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0,
sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT);
flow->dp_hash = 0;
@@ -3189,7 +3196,7 @@ flow_compose(struct dp_packet *p, const struct flow *flow,
/* Add code to this function (or its callees) for emitting new fields or
* protocols. (This isn't essential, so it can be skipped for initial
* testing.) */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
uint32_t pseudo_hdr_csum;
size_t l4_len;
diff --git a/lib/flow.h b/lib/flow.h
index 75751763c81a1ee88c8aa7644c544b7ca75fd3c7..b32f0b27754a11aeba35dfe51488e646fe3c32b3 100644
--- a/lib/flow.h
+++ b/lib/flow.h
@@ -964,7 +964,7 @@ static inline void
pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
{
/* Update this function whenever struct flow changes. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
md->recirc_id = flow->recirc_id;
md->dp_hash = flow->dp_hash;
diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index ed748dbde7867581c2ff65ce8f67320d33789d2e..8c5126ffcbf19287fd6bc6e26688c64a5b53050d 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -50,6 +50,10 @@ struct jsonrpc {
struct ovs_list output; /* Contains "struct ofpbuf"s. */
size_t output_count; /* Number of elements in "output". */
size_t backlog;
+
+ /* Limits. */
+ size_t max_output; /* 'output_count' disconnection threshold. */
+ size_t max_backlog; /* 'backlog' disconnection threshold. */
};
/* Rate limit for error messages. */
@@ -178,6 +182,17 @@ jsonrpc_get_backlog(const struct jsonrpc *rpc)
return rpc->status ? 0 : rpc->backlog;
}
+/* Sets thresholds for send backlog. If send backlog contains more than
+ * 'max_n_msgs' messages or is larger than 'max_backlog_bytes' bytes,
+ * connection will be dropped. */
+void
+jsonrpc_set_backlog_threshold(struct jsonrpc *rpc,
+ size_t max_n_msgs, size_t max_backlog_bytes)
+{
+ rpc->max_output = max_n_msgs;
+ rpc->max_backlog = max_backlog_bytes;
+}
+
/* Returns the number of bytes that have been received on 'rpc''s underlying
* stream. (The value wraps around if it exceeds UINT_MAX.) */
unsigned int
@@ -261,9 +276,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg *msg)
rpc->backlog += length;
if (rpc->output_count >= 50) {
- VLOG_INFO_RL(&rl, "excessive sending backlog, jsonrpc: %s, num of"
+ static struct vlog_rate_limit bl_rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ bool disconnect = false;
+
+ VLOG_INFO_RL(&bl_rl, "excessive sending backlog, jsonrpc: %s, num of"
" msgs: %"PRIuSIZE", backlog: %"PRIuSIZE".", rpc->name,
rpc->output_count, rpc->backlog);
+ if (rpc->max_output && rpc->output_count > rpc->max_output) {
+ disconnect = true;
+ VLOG_WARN("sending backlog exceeded maximum number of messages (%"
+ PRIuSIZE" > %"PRIuSIZE"), disconnecting, jsonrpc: %s.",
+ rpc->output_count, rpc->max_output, rpc->name);
+ } else if (rpc->max_backlog && rpc->backlog > rpc->max_backlog) {
+ disconnect = true;
+ VLOG_WARN("sending backlog exceeded maximum size (%"PRIuSIZE" > %"
+ PRIuSIZE" bytes), disconnecting, jsonrpc: %s.",
+ rpc->backlog, rpc->max_backlog, rpc->name);
+ }
+ if (disconnect) {
+ jsonrpc_error(rpc, E2BIG);
+ }
}
if (rpc->backlog == length) {
@@ -787,6 +819,10 @@ struct jsonrpc_session {
int last_error;
unsigned int seqno;
uint8_t dscp;
+
+ /* Limits for jsonrpc. */
+ size_t max_n_msgs;
+ size_t max_backlog_bytes;
};
static void
@@ -825,8 +861,10 @@ jsonrpc_session_open_multiple(const struct svec *remotes, bool retry)
s = xmalloc(sizeof *s);
/* Set 'n' remotes from 'names'. */
- ovs_assert(remotes->n > 0);
svec_clone(&s->remotes, remotes);
+ if (!s->remotes.n) {
+ svec_add(&s->remotes, "invalid:");
+ }
s->next_remote = 0;
s->reconnect = reconnect_create(time_msec());
@@ -840,6 +878,8 @@ jsonrpc_session_open_multiple(const struct svec *remotes, bool retry)
s->dscp = 0;
s->last_error = 0;
+ jsonrpc_session_set_backlog_threshold(s, 0, 0);
+
const char *name = reconnect_get_name(s->reconnect);
if (!pstream_verify_name(name)) {
reconnect_set_passive(s->reconnect, true, time_msec());
@@ -880,6 +920,7 @@ jsonrpc_session_open_unreliably(struct jsonrpc *jsonrpc, uint8_t dscp)
s->pstream = NULL;
s->seqno = 1;
+ jsonrpc_session_set_backlog_threshold(s, 0, 0);
return s;
}
@@ -968,6 +1009,8 @@ jsonrpc_session_run(struct jsonrpc_session *s)
}
reconnect_connected(s->reconnect, time_msec());
s->rpc = jsonrpc_open(stream);
+ jsonrpc_set_backlog_threshold(s->rpc, s->max_n_msgs,
+ s->max_backlog_bytes);
s->seqno++;
} else if (error != EAGAIN) {
reconnect_listen_error(s->reconnect, time_msec(), error);
@@ -1008,6 +1051,8 @@ jsonrpc_session_run(struct jsonrpc_session *s)
if (!error) {
reconnect_connected(s->reconnect, time_msec());
s->rpc = jsonrpc_open(s->stream);
+ jsonrpc_set_backlog_threshold(s->rpc, s->max_n_msgs,
+ s->max_backlog_bytes);
s->stream = NULL;
s->seqno++;
} else if (error != EAGAIN) {
@@ -1110,13 +1155,16 @@ jsonrpc_session_recv(struct jsonrpc_session *s)
received_bytes = jsonrpc_get_received_bytes(s->rpc);
jsonrpc_recv(s->rpc, &msg);
+
+ long long int now = time_msec();
+ reconnect_receive_attempted(s->reconnect, now);
if (received_bytes != jsonrpc_get_received_bytes(s->rpc)) {
/* Data was successfully received.
*
* Previously we only counted receiving a full message as activity,
* but with large messages or a slow connection that policy could
* time out the session mid-message. */
- reconnect_activity(s->reconnect, time_msec());
+ reconnect_activity(s->reconnect, now);
}
if (msg) {
@@ -1248,3 +1296,18 @@ jsonrpc_session_set_dscp(struct jsonrpc_session *s, uint8_t dscp)
jsonrpc_session_force_reconnect(s);
}
}
+
+/* Sets thresholds for send backlog. If send backlog contains more than
+ * 'max_n_msgs' messages or is larger than 'max_backlog_bytes' bytes,
+ * connection will be closed (then reconnected, if that feature is enabled). */
+void
+jsonrpc_session_set_backlog_threshold(struct jsonrpc_session *s,
+ size_t max_n_msgs,
+ size_t max_backlog_bytes)
+{
+ s->max_n_msgs = max_n_msgs;
+ s->max_backlog_bytes = max_backlog_bytes;
+ if (s->rpc) {
+ jsonrpc_set_backlog_threshold(s->rpc, max_n_msgs, max_backlog_bytes);
+ }
+}
diff --git a/lib/jsonrpc.h b/lib/jsonrpc.h
index a44114e8dcd9133c3f4ac2c7c841cd368717a301..d75d66b863cbe3bc31fa3e562e52407875ef27bc 100644
--- a/lib/jsonrpc.h
+++ b/lib/jsonrpc.h
@@ -51,6 +51,9 @@ void jsonrpc_wait(struct jsonrpc *);
int jsonrpc_get_status(const struct jsonrpc *);
size_t jsonrpc_get_backlog(const struct jsonrpc *);
+void jsonrpc_set_backlog_threshold(struct jsonrpc *, size_t max_n_msgs,
+ size_t max_backlog_bytes);
+
unsigned int jsonrpc_get_received_bytes(const struct jsonrpc *);
const char *jsonrpc_get_name(const struct jsonrpc *);
@@ -140,6 +143,9 @@ void jsonrpc_session_set_probe_interval(struct jsonrpc_session *,
int probe_interval);
void jsonrpc_session_set_dscp(struct jsonrpc_session *,
uint8_t dscp);
+void jsonrpc_session_set_backlog_threshold(struct jsonrpc_session *,
+ size_t max_n_msgs,
+ size_t max_backlog_bytes);
const char *jsonrpc_session_get_id(const struct jsonrpc_session *);
#endif /* jsonrpc.h */
diff --git a/lib/lacp.c b/lib/lacp.c
index 705d88f5047a19ed5935709f8cd28f4ca414f5f3..540b2aa8ca831da4fbddc1c783b1bb3857690e00 100644
--- a/lib/lacp.c
+++ b/lib/lacp.c
@@ -92,12 +92,13 @@ enum pdu_subtype {
SUBTYPE_MARKER, /* Link Aggregation Marker Protocol. */
};
-enum slave_status {
+enum member_status {
LACP_CURRENT, /* Current State. Partner up to date. */
LACP_EXPIRED, /* Expired State. Partner out of date. */
LACP_DEFAULTED, /* Defaulted State. No partner. */
};
+/* A LACP primary interface. */
struct lacp {
struct ovs_list node; /* Node in all_lacps list. */
char *name; /* Name of this lacp object. */
@@ -105,8 +106,8 @@ struct lacp {
uint16_t sys_priority; /* System Priority. */
bool active; /* Active or Passive. */
- struct hmap slaves; /* Slaves this LACP object controls. */
- struct slave *key_slave; /* Slave whose ID will be the aggregation key. */
+ struct hmap members; /* Members this LACP object controls. */
+ struct member *key_member; /* Member whose ID will be aggregation key. */
bool fast; /* True if using fast probe interval. */
bool negotiated; /* True if LACP negotiations were successful. */
@@ -116,17 +117,18 @@ struct lacp {
struct ovs_refcount ref_cnt;
};
-struct slave {
- void *aux; /* Handle used to identify this slave. */
- struct hmap_node node; /* Node in master's slaves map. */
+/* A LACP member interface. */
+struct member {
+ void *aux; /* Handle used to identify this member. */
+ struct hmap_node node; /* Node in primary's members map. */
- struct lacp *lacp; /* LACP object containing this slave. */
+ struct lacp *lacp; /* LACP object containing this member. */
uint16_t port_id; /* Port ID. */
uint16_t port_priority; /* Port Priority. */
uint16_t key; /* Aggregation Key. 0 if default. */
- char *name; /* Name of this slave. */
+ char *name; /* Name of this member. */
- enum slave_status status; /* Slave status. */
+ enum member_status status; /* Member status. */
bool attached; /* Attached. Traffic may flow. */
bool carrier_up; /* Carrier state of link. */
struct lacp_info partner; /* Partner information. */
@@ -149,20 +151,20 @@ static struct ovs_list *const all_lacps OVS_GUARDED_BY(mutex) = &all_lacps__;
static void lacp_update_attached(struct lacp *) OVS_REQUIRES(mutex);
-static void slave_destroy(struct slave *) OVS_REQUIRES(mutex);
-static void slave_set_defaulted(struct slave *) OVS_REQUIRES(mutex);
-static void slave_set_expired(struct slave *) OVS_REQUIRES(mutex);
-static void slave_get_actor(struct slave *, struct lacp_info *actor)
+static void member_destroy(struct member *) OVS_REQUIRES(mutex);
+static void member_set_defaulted(struct member *) OVS_REQUIRES(mutex);
+static void member_set_expired(struct member *) OVS_REQUIRES(mutex);
+static void member_get_actor(struct member *, struct lacp_info *actor)
OVS_REQUIRES(mutex);
-static void slave_get_priority(struct slave *, struct lacp_info *priority)
+static void member_get_priority(struct member *, struct lacp_info *priority)
OVS_REQUIRES(mutex);
-static bool slave_may_tx(const struct slave *)
+static bool member_may_tx(const struct member *)
OVS_REQUIRES(mutex);
-static struct slave *slave_lookup(const struct lacp *, const void *slave)
+static struct member *member_lookup(const struct lacp *, const void *member)
OVS_REQUIRES(mutex);
static bool info_tx_equal(struct lacp_info *, struct lacp_info *)
OVS_REQUIRES(mutex);
-static bool slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex);
+static bool member_may_enable__(struct member *) OVS_REQUIRES(mutex);
static unixctl_cb_func lacp_unixctl_show;
static unixctl_cb_func lacp_unixctl_show_stats;
@@ -254,7 +256,7 @@ lacp_create(void) OVS_EXCLUDED(mutex)
struct lacp *lacp;
lacp = xzalloc(sizeof *lacp);
- hmap_init(&lacp->slaves);
+ hmap_init(&lacp->members);
ovs_refcount_init(&lacp->ref_cnt);
lacp_lock();
@@ -273,19 +275,19 @@ lacp_ref(const struct lacp *lacp_)
return lacp;
}
-/* Destroys 'lacp' and its slaves. Does nothing if 'lacp' is NULL. */
+/* Destroys 'lacp' and its members. Does nothing if 'lacp' is NULL. */
void
lacp_unref(struct lacp *lacp) OVS_EXCLUDED(mutex)
{
if (lacp && ovs_refcount_unref_relaxed(&lacp->ref_cnt) == 1) {
- struct slave *slave, *next;
+ struct member *member, *next;
lacp_lock();
- HMAP_FOR_EACH_SAFE (slave, next, node, &lacp->slaves) {
- slave_destroy(slave);
+ HMAP_FOR_EACH_SAFE (member, next, node, &lacp->members) {
+ member_destroy(member);
}
- hmap_destroy(&lacp->slaves);
+ hmap_destroy(&lacp->members);
ovs_list_remove(&lacp->node);
free(lacp->name);
free(lacp);
@@ -336,39 +338,40 @@ lacp_is_active(const struct lacp *lacp) OVS_EXCLUDED(mutex)
return ret;
}
-/* Processes 'packet' which was received on 'slave_'. This function should be
- * called on all packets received on 'slave_' with Ethernet Type ETH_TYPE_LACP.
+/* Processes 'packet' which was received on 'member_'. This function should be
+ * called on all packets received on 'member_' with Ethernet Type
+ * ETH_TYPE_LACP.
*/
bool
-lacp_process_packet(struct lacp *lacp, const void *slave_,
+lacp_process_packet(struct lacp *lacp, const void *member_,
const struct dp_packet *packet)
OVS_EXCLUDED(mutex)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
const struct lacp_pdu *pdu;
long long int tx_rate;
- struct slave *slave;
+ struct member *member;
bool lacp_may_enable = false;
enum pdu_subtype subtype;
lacp_lock();
- slave = slave_lookup(lacp, slave_);
- if (!slave) {
+ member = member_lookup(lacp, member_);
+ if (!member) {
goto out;
}
- slave->count_rx_pdus++;
+ member->count_rx_pdus++;
pdu = parse_lacp_packet(packet, &subtype);
switch (subtype) {
case SUBTYPE_LACP:
break;
case SUBTYPE_MARKER:
- slave->count_rx_pdus_marker++;
+ member->count_rx_pdus_marker++;
VLOG_DBG("%s: received a LACP marker PDU.", lacp->name);
goto out;
case SUBTYPE_UNUSED:
default:
- slave->count_rx_pdus_bad++;
+ member->count_rx_pdus_bad++;
VLOG_WARN_RL(&rl, "%s: received an unparsable LACP PDU.",
lacp->name);
goto out;
@@ -377,30 +380,30 @@ lacp_process_packet(struct lacp *lacp, const void *slave_,
/* On some NICs L1 state reporting is slow. In case LACP packets are
* received while carrier (L1) state is still down, drop the LACP PDU and
* trigger re-checking of L1 state. */
- if (!slave->carrier_up) {
+ if (!member->carrier_up) {
VLOG_INFO_RL(&rl, "%s: carrier state is DOWN,"
- " dropping received LACP PDU.", slave->name);
+ " dropping received LACP PDU.", member->name);
seq_change(connectivity_seq_get());
goto out;
}
- slave->status = LACP_CURRENT;
+ member->status = LACP_CURRENT;
tx_rate = lacp->fast ? LACP_FAST_TIME_TX : LACP_SLOW_TIME_TX;
- timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * tx_rate);
+ timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * tx_rate);
- slave->ntt_actor = pdu->partner;
+ member->ntt_actor = pdu->partner;
/* Update our information about our partner if it's out of date. This may
* cause priorities to change so re-calculate attached status of all
- * slaves. */
- if (memcmp(&slave->partner, &pdu->actor, sizeof pdu->actor)) {
+ * members. */
+ if (memcmp(&member->partner, &pdu->actor, sizeof pdu->actor)) {
lacp->update = true;
- slave->partner = pdu->actor;
+ member->partner = pdu->actor;
}
/* Evaluate may_enable here to avoid dropping of packets till main thread
* sets may_enable to true. */
- lacp_may_enable = slave_may_enable__(slave);
+ lacp_may_enable = member_may_enable__(member);
out:
lacp_unlock();
@@ -426,92 +429,92 @@ lacp_status(const struct lacp *lacp) OVS_EXCLUDED(mutex)
}
}
-/* Registers 'slave_' as subordinate to 'lacp'. This should be called at least
- * once per slave in a LACP managed bond. Should also be called whenever a
- * slave's settings change. */
+/* Registers 'member_' as subordinate to 'lacp'. This should be called at
+ * least once per member in a LACP managed bond. Should also be called
+ * whenever a member's settings change. */
void
-lacp_slave_register(struct lacp *lacp, void *slave_,
- const struct lacp_slave_settings *s)
+lacp_member_register(struct lacp *lacp, void *member_,
+ const struct lacp_member_settings *s)
OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
lacp_lock();
- slave = slave_lookup(lacp, slave_);
- if (!slave) {
- slave = xzalloc(sizeof *slave);
- slave->lacp = lacp;
- slave->aux = slave_;
- hmap_insert(&lacp->slaves, &slave->node, hash_pointer(slave_, 0));
- slave_set_defaulted(slave);
-
- if (!lacp->key_slave) {
- lacp->key_slave = slave;
+ member = member_lookup(lacp, member_);
+ if (!member) {
+ member = xzalloc(sizeof *member);
+ member->lacp = lacp;
+ member->aux = member_;
+ hmap_insert(&lacp->members, &member->node, hash_pointer(member_, 0));
+ member_set_defaulted(member);
+
+ if (!lacp->key_member) {
+ lacp->key_member = member;
}
}
- if (!slave->name || strcmp(s->name, slave->name)) {
- free(slave->name);
- slave->name = xstrdup(s->name);
+ if (!member->name || strcmp(s->name, member->name)) {
+ free(member->name);
+ member->name = xstrdup(s->name);
}
- if (slave->port_id != s->id
- || slave->port_priority != s->priority
- || slave->key != s->key) {
- slave->port_id = s->id;
- slave->port_priority = s->priority;
- slave->key = s->key;
+ if (member->port_id != s->id
+ || member->port_priority != s->priority
+ || member->key != s->key) {
+ member->port_id = s->id;
+ member->port_priority = s->priority;
+ member->key = s->key;
lacp->update = true;
if (lacp->active || lacp->negotiated) {
- slave_set_expired(slave);
+ member_set_expired(member);
}
}
lacp_unlock();
}
-/* Unregisters 'slave_' with 'lacp'. */
+/* Unregisters 'member_' with 'lacp'. */
void
-lacp_slave_unregister(struct lacp *lacp, const void *slave_)
+lacp_member_unregister(struct lacp *lacp, const void *member_)
OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
lacp_lock();
- slave = slave_lookup(lacp, slave_);
- if (slave) {
- slave_destroy(slave);
+ member = member_lookup(lacp, member_);
+ if (member) {
+ member_destroy(member);
lacp->update = true;
}
lacp_unlock();
}
-/* This function should be called whenever the carrier status of 'slave_' has
+/* This function should be called whenever the carrier status of 'member_' has
* changed. If 'lacp' is null, this function has no effect.*/
void
-lacp_slave_carrier_changed(const struct lacp *lacp, const void *slave_,
- bool carrier_up)
+lacp_member_carrier_changed(const struct lacp *lacp, const void *member_,
+ bool carrier_up)
OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
if (!lacp) {
return;
}
lacp_lock();
- slave = slave_lookup(lacp, slave_);
- if (!slave) {
+ member = member_lookup(lacp, member_);
+ if (!member) {
goto out;
}
- if (slave->status == LACP_CURRENT || slave->lacp->active) {
- slave_set_expired(slave);
+ if (member->status == LACP_CURRENT || member->lacp->active) {
+ member_set_expired(member);
}
- if (slave->carrier_up != carrier_up) {
- slave->carrier_up = carrier_up;
- slave->count_carrier_changed++;
+ if (member->carrier_up != carrier_up) {
+ member->carrier_up = carrier_up;
+ member->count_carrier_changed++;
}
out:
@@ -519,35 +522,35 @@ out:
}
static bool
-slave_may_enable__(struct slave *slave) OVS_REQUIRES(mutex)
+member_may_enable__(struct member *member) OVS_REQUIRES(mutex)
{
- /* The slave may be enabled if it's attached to an aggregator and its
+ /* The member may be enabled if it's attached to an aggregator and its
* partner is synchronized.*/
- return slave->attached && (slave->partner.state & LACP_STATE_SYNC
- || (slave->lacp && slave->lacp->fallback_ab
- && slave->status == LACP_DEFAULTED));
+ return member->attached && (member->partner.state & LACP_STATE_SYNC
+ || (member->lacp && member->lacp->fallback_ab
+ && member->status == LACP_DEFAULTED));
}
-/* This function should be called before enabling 'slave_' to send or receive
- * traffic. If it returns false, 'slave_' should not enabled. As a
+/* This function should be called before enabling 'member_' to send or receive
+ * traffic. If it returns false, 'member_' should not enabled. As a
* convenience, returns true if 'lacp' is NULL. */
bool
-lacp_slave_may_enable(const struct lacp *lacp, const void *slave_)
+lacp_member_may_enable(const struct lacp *lacp, const void *member_)
OVS_EXCLUDED(mutex)
{
if (lacp) {
- struct slave *slave;
+ struct member *member;
bool ret = false;
lacp_lock();
- slave = slave_lookup(lacp, slave_);
- if (slave) {
- /* It is only called when carrier is up. So, enable slave's
+ member = member_lookup(lacp, member_);
+ if (member) {
+ /* It is only called when carrier is up. So, enable member's
* carrier state if it is currently down. */
- if (!slave->carrier_up) {
- slave->carrier_up = true;
+ if (!member->carrier_up) {
+ member->carrier_up = true;
}
- ret = slave_may_enable__(slave);
+ ret = member_may_enable__(member);
}
lacp_unlock();
return ret;
@@ -556,19 +559,19 @@ lacp_slave_may_enable(const struct lacp *lacp, const void *slave_)
}
}
-/* Returns true if partner information on 'slave_' is up to date. 'slave_'
+/* Returns true if partner information on 'member_' is up to date. 'member_'
* not being current, generally indicates a connectivity problem, or a
* misconfigured (or broken) partner. */
bool
-lacp_slave_is_current(const struct lacp *lacp, const void *slave_)
+lacp_member_is_current(const struct lacp *lacp, const void *member_)
OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
bool ret;
lacp_lock();
- slave = slave_lookup(lacp, slave_);
- ret = slave ? slave->status != LACP_DEFAULTED : false;
+ member = member_lookup(lacp, member_);
+ ret = member ? member->status != LACP_DEFAULTED : false;
lacp_unlock();
return ret;
}
@@ -577,21 +580,21 @@ lacp_slave_is_current(const struct lacp *lacp, const void *slave_)
void
lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
lacp_lock();
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
- if (timer_expired(&slave->rx)) {
- enum slave_status old_status = slave->status;
-
- if (slave->status == LACP_CURRENT) {
- slave_set_expired(slave);
- slave->count_link_expired++;
- } else if (slave->status == LACP_EXPIRED) {
- slave_set_defaulted(slave);
- slave->count_link_defaulted++;
+ HMAP_FOR_EACH (member, node, &lacp->members) {
+ if (timer_expired(&member->rx)) {
+ enum member_status old_status = member->status;
+
+ if (member->status == LACP_CURRENT) {
+ member_set_expired(member);
+ member->count_link_expired++;
+ } else if (member->status == LACP_EXPIRED) {
+ member_set_defaulted(member);
+ member->count_link_defaulted++;
}
- if (slave->status != old_status) {
+ if (member->status != old_status) {
seq_change(connectivity_seq_get());
}
}
@@ -602,30 +605,30 @@ lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
seq_change(connectivity_seq_get());
}
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
+ HMAP_FOR_EACH (member, node, &lacp->members) {
struct lacp_info actor;
- if (!slave_may_tx(slave)) {
+ if (!member_may_tx(member)) {
continue;
}
- slave_get_actor(slave, &actor);
+ member_get_actor(member, &actor);
- if (timer_expired(&slave->tx)
- || !info_tx_equal(&actor, &slave->ntt_actor)) {
+ if (timer_expired(&member->tx)
+ || !info_tx_equal(&actor, &member->ntt_actor)) {
long long int duration;
struct lacp_pdu pdu;
- slave->ntt_actor = actor;
- compose_lacp_pdu(&actor, &slave->partner, &pdu);
- send_pdu(slave->aux, &pdu, sizeof pdu);
- slave->count_tx_pdus++;
+ member->ntt_actor = actor;
+ compose_lacp_pdu(&actor, &member->partner, &pdu);
+ send_pdu(member->aux, &pdu, sizeof pdu);
+ member->count_tx_pdus++;
- duration = (slave->partner.state & LACP_STATE_TIME
+ duration = (member->partner.state & LACP_STATE_TIME
? LACP_FAST_TIME_TX
: LACP_SLOW_TIME_TX);
- timer_set_duration(&slave->tx, duration);
+ timer_set_duration(&member->tx, duration);
seq_change(connectivity_seq_get());
}
}
@@ -636,16 +639,16 @@ lacp_run(struct lacp *lacp, lacp_send_pdu *send_pdu) OVS_EXCLUDED(mutex)
void
lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
lacp_lock();
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
- if (slave_may_tx(slave)) {
- timer_wait(&slave->tx);
+ HMAP_FOR_EACH (member, node, &lacp->members) {
+ if (member_may_tx(member)) {
+ timer_wait(&member->tx);
}
- if (slave->status != LACP_DEFAULTED) {
- timer_wait(&slave->rx);
+ if (member->status != LACP_DEFAULTED) {
+ timer_wait(&member->rx);
}
}
lacp_unlock();
@@ -653,12 +656,12 @@ lacp_wait(struct lacp *lacp) OVS_EXCLUDED(mutex)
/* Static Helpers. */
-/* Updates the attached status of all slaves controlled by 'lacp' and sets its
- * negotiated parameter to true if any slaves are attachable. */
+/* Updates the attached status of all members controlled by 'lacp' and sets its
+ * negotiated parameter to true if any members are attachable. */
static void
lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex)
{
- struct slave *lead, *lead_current, *slave;
+ struct member *lead, *lead_current, *member;
struct lacp_info lead_pri;
bool lead_enable;
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 10);
@@ -671,12 +674,12 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex)
/* Check if there is a working interface.
* Store as lead_current, if there is one. */
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
- if (slave->status == LACP_CURRENT && slave->attached) {
+ HMAP_FOR_EACH (member, node, &lacp->members) {
+ if (member->status == LACP_CURRENT && member->attached) {
struct lacp_info pri;
- slave_get_priority(slave, &pri);
+ member_get_priority(member, &pri);
if (!lead_current || memcmp(&pri, &lead_pri, sizeof pri) < 0) {
- lead_current = slave;
+ lead_current = member;
lead = lead_current;
lead_pri = pri;
lead_enable = true;
@@ -685,43 +688,43 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex)
}
/* Find interface with highest priority. */
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
+ HMAP_FOR_EACH (member, node, &lacp->members) {
struct lacp_info pri;
- slave->attached = false;
+ member->attached = false;
/* XXX: In the future allow users to configure the expected system ID.
* For now just special case loopback. */
- if (eth_addr_equals(slave->partner.sys_id, slave->lacp->sys_id)) {
- VLOG_WARN_RL(&rl, "slave %s: Loopback detected. Slave is "
- "connected to its own bond", slave->name);
+ if (eth_addr_equals(member->partner.sys_id, member->lacp->sys_id)) {
+ VLOG_WARN_RL(&rl, "member %s: Loopback detected. Interface is "
+ "connected to its own bond", member->name);
continue;
}
- if (slave->status == LACP_DEFAULTED) {
+ if (member->status == LACP_DEFAULTED) {
if (lacp->fallback_ab) {
- slave->attached = true;
+ member->attached = true;
}
continue;
}
- slave_get_priority(slave, &pri);
- bool enable = slave_may_enable__(slave);
+ member_get_priority(member, &pri);
+ bool enable = member_may_enable__(member);
/* Check if partner MAC address is the same as on the working
- * interface. Activate slave only if the MAC is the same, or
+ * interface. Activate member only if the MAC is the same, or
* there is no working interface. */
if (!lead_current || (lead_current
- && eth_addr_equals(slave->partner.sys_id,
+ && eth_addr_equals(member->partner.sys_id,
lead_current->partner.sys_id))) {
- slave->attached = true;
+ member->attached = true;
}
- if (slave->attached &&
+ if (member->attached &&
(!lead
|| enable > lead_enable
|| (enable == lead_enable
&& memcmp(&pri, &lead_pri, sizeof pri) < 0))) {
- lead = slave;
+ lead = member;
lead_enable = enable;
lead_pri = pri;
}
@@ -730,65 +733,66 @@ lacp_update_attached(struct lacp *lacp) OVS_REQUIRES(mutex)
lacp->negotiated = lead != NULL;
if (lead) {
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
- if ((lacp->fallback_ab && slave->status == LACP_DEFAULTED)
- || lead->partner.key != slave->partner.key
+ HMAP_FOR_EACH (member, node, &lacp->members) {
+ if ((lacp->fallback_ab && member->status == LACP_DEFAULTED)
+ || lead->partner.key != member->partner.key
|| !eth_addr_equals(lead->partner.sys_id,
- slave->partner.sys_id)) {
- slave->attached = false;
+ member->partner.sys_id)) {
+ member->attached = false;
}
}
}
}
static void
-slave_destroy(struct slave *slave) OVS_REQUIRES(mutex)
+member_destroy(struct member *member) OVS_REQUIRES(mutex)
{
- if (slave) {
- struct lacp *lacp = slave->lacp;
+ if (member) {
+ struct lacp *lacp = member->lacp;
lacp->update = true;
- hmap_remove(&lacp->slaves, &slave->node);
+ hmap_remove(&lacp->members, &member->node);
- if (lacp->key_slave == slave) {
- struct hmap_node *slave_node = hmap_first(&lacp->slaves);
+ if (lacp->key_member == member) {
+ struct hmap_node *member_node = hmap_first(&lacp->members);
- if (slave_node) {
- lacp->key_slave = CONTAINER_OF(slave_node, struct slave, node);
+ if (member_node) {
+ lacp->key_member = CONTAINER_OF(member_node, struct member,
+ node);
} else {
- lacp->key_slave = NULL;
+ lacp->key_member = NULL;
}
}
- free(slave->name);
- free(slave);
+ free(member->name);
+ free(member);
}
}
static void
-slave_set_defaulted(struct slave *slave) OVS_REQUIRES(mutex)
+member_set_defaulted(struct member *member) OVS_REQUIRES(mutex)
{
- memset(&slave->partner, 0, sizeof slave->partner);
+ memset(&member->partner, 0, sizeof member->partner);
- slave->lacp->update = true;
- slave->status = LACP_DEFAULTED;
+ member->lacp->update = true;
+ member->status = LACP_DEFAULTED;
}
static void
-slave_set_expired(struct slave *slave) OVS_REQUIRES(mutex)
+member_set_expired(struct member *member) OVS_REQUIRES(mutex)
{
- slave->status = LACP_EXPIRED;
- slave->partner.state |= LACP_STATE_TIME;
- slave->partner.state &= ~LACP_STATE_SYNC;
+ member->status = LACP_EXPIRED;
+ member->partner.state |= LACP_STATE_TIME;
+ member->partner.state &= ~LACP_STATE_SYNC;
- timer_set_duration(&slave->rx, LACP_RX_MULTIPLIER * LACP_FAST_TIME_TX);
+ timer_set_duration(&member->rx, LACP_RX_MULTIPLIER * LACP_FAST_TIME_TX);
}
static void
-slave_get_actor(struct slave *slave, struct lacp_info *actor)
+member_get_actor(struct member *member, struct lacp_info *actor)
OVS_REQUIRES(mutex)
{
- struct lacp *lacp = slave->lacp;
+ struct lacp *lacp = member->lacp;
uint16_t key;
uint8_t state = 0;
@@ -800,62 +804,62 @@ slave_get_actor(struct slave *slave, struct lacp_info *actor)
state |= LACP_STATE_TIME;
}
- if (slave->attached) {
+ if (member->attached) {
state |= LACP_STATE_SYNC;
}
- if (slave->status == LACP_DEFAULTED) {
+ if (member->status == LACP_DEFAULTED) {
state |= LACP_STATE_DEF;
}
- if (slave->status == LACP_EXPIRED) {
+ if (member->status == LACP_EXPIRED) {
state |= LACP_STATE_EXP;
}
- if (hmap_count(&lacp->slaves) > 1) {
+ if (hmap_count(&lacp->members) > 1) {
state |= LACP_STATE_AGG;
}
- if (slave->attached || !lacp->negotiated) {
+ if (member->attached || !lacp->negotiated) {
state |= LACP_STATE_COL | LACP_STATE_DIST;
}
- key = lacp->key_slave->key;
+ key = lacp->key_member->key;
if (!key) {
- key = lacp->key_slave->port_id;
+ key = lacp->key_member->port_id;
}
actor->state = state;
actor->key = htons(key);
- actor->port_priority = htons(slave->port_priority);
- actor->port_id = htons(slave->port_id);
+ actor->port_priority = htons(member->port_priority);
+ actor->port_id = htons(member->port_id);
actor->sys_priority = htons(lacp->sys_priority);
actor->sys_id = lacp->sys_id;
}
-/* Given 'slave', populates 'priority' with data representing its LACP link
+/* Given 'member', populates 'priority' with data representing its LACP link
* priority. If two priority objects populated by this function are compared
* using memcmp, the higher priority link will be less than the lower priority
* link. */
static void
-slave_get_priority(struct slave *slave, struct lacp_info *priority)
+member_get_priority(struct member *member, struct lacp_info *priority)
OVS_REQUIRES(mutex)
{
uint16_t partner_priority, actor_priority;
/* Choose the lacp_info of the higher priority system by comparing their
* system priorities and mac addresses. */
- actor_priority = slave->lacp->sys_priority;
- partner_priority = ntohs(slave->partner.sys_priority);
+ actor_priority = member->lacp->sys_priority;
+ partner_priority = ntohs(member->partner.sys_priority);
if (actor_priority < partner_priority) {
- slave_get_actor(slave, priority);
+ member_get_actor(member, priority);
} else if (partner_priority < actor_priority) {
- *priority = slave->partner;
- } else if (eth_addr_compare_3way(slave->lacp->sys_id,
- slave->partner.sys_id) < 0) {
- slave_get_actor(slave, priority);
+ *priority = member->partner;
+ } else if (eth_addr_compare_3way(member->lacp->sys_id,
+ member->partner.sys_id) < 0) {
+ member_get_actor(member, priority);
} else {
- *priority = slave->partner;
+ *priority = member->partner;
}
/* Key and state are not used in priority comparisons. */
@@ -864,22 +868,22 @@ slave_get_priority(struct slave *slave, struct lacp_info *priority)
}
static bool
-slave_may_tx(const struct slave *slave) OVS_REQUIRES(mutex)
+member_may_tx(const struct member *member) OVS_REQUIRES(mutex)
{
/* Check for L1 state as well as LACP state. */
- return (slave->carrier_up) && ((slave->lacp->active) ||
- (slave->status != LACP_DEFAULTED));
+ return (member->carrier_up) && ((member->lacp->active) ||
+ (member->status != LACP_DEFAULTED));
}
-static struct slave *
-slave_lookup(const struct lacp *lacp, const void *slave_) OVS_REQUIRES(mutex)
+static struct member *
+member_lookup(const struct lacp *lacp, const void *member_) OVS_REQUIRES(mutex)
{
- struct slave *slave;
+ struct member *member;
- HMAP_FOR_EACH_IN_BUCKET (slave, node, hash_pointer(slave_, 0),
- &lacp->slaves) {
- if (slave->aux == slave_) {
- return slave;
+ HMAP_FOR_EACH_IN_BUCKET (member, node, hash_pointer(member_, 0),
+ &lacp->members) {
+ if (member->aux == member_) {
+ return member;
}
}
@@ -961,10 +965,10 @@ ds_put_lacp_state(struct ds *ds, uint8_t state)
static void
lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex)
{
- struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
- const struct shash_node **sorted_slaves = NULL;
+ struct shash member_shash = SHASH_INITIALIZER(&member_shash);
+ const struct shash_node **sorted_members = NULL;
- struct slave *slave;
+ struct member *member;
int i;
ds_put_format(ds, "---- %s ----\n", lacp->name);
@@ -977,10 +981,10 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex)
ds_put_format(ds, " sys_id: " ETH_ADDR_FMT "\n", ETH_ADDR_ARGS(lacp->sys_id));
ds_put_format(ds, " sys_priority: %u\n", lacp->sys_priority);
ds_put_cstr(ds, " aggregation key: ");
- if (lacp->key_slave) {
- ds_put_format(ds, "%u", lacp->key_slave->key
- ? lacp->key_slave->key
- : lacp->key_slave->port_id);
+ if (lacp->key_member) {
+ ds_put_format(ds, "%u", lacp->key_member->key
+ ? lacp->key_member->key
+ : lacp->key_member->port_id);
} else {
ds_put_cstr(ds, "none");
}
@@ -993,18 +997,18 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex)
ds_put_cstr(ds, "slow\n");
}
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
- shash_add(&slave_shash, slave->name, slave);
+ HMAP_FOR_EACH (member, node, &lacp->members) {
+ shash_add(&member_shash, member->name, member);
}
- sorted_slaves = shash_sort(&slave_shash);
+ sorted_members = shash_sort(&member_shash);
- for (i = 0; i < shash_count(&slave_shash); i++) {
+ for (i = 0; i < shash_count(&member_shash); i++) {
char *status;
struct lacp_info actor;
- slave = sorted_slaves[i]->data;
- slave_get_actor(slave, &actor);
- switch (slave->status) {
+ member = sorted_members[i]->data;
+ member_get_actor(member, &actor);
+ switch (member->status) {
case LACP_CURRENT:
status = "current";
break;
@@ -1018,11 +1022,11 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex)
OVS_NOT_REACHED();
}
- ds_put_format(ds, "\nslave: %s: %s %s\n", slave->name, status,
- slave->attached ? "attached" : "detached");
- ds_put_format(ds, " port_id: %u\n", slave->port_id);
- ds_put_format(ds, " port_priority: %u\n", slave->port_priority);
- ds_put_format(ds, " may_enable: %s\n", (slave_may_enable__(slave)
+ ds_put_format(ds, "\nmember: %s: %s %s\n", member->name, status,
+ member->attached ? "attached" : "detached");
+ ds_put_format(ds, " port_id: %u\n", member->port_id);
+ ds_put_format(ds, " port_priority: %u\n", member->port_priority);
+ ds_put_format(ds, " may_enable: %s\n", (member_may_enable__(member)
? "true" : "false"));
ds_put_format(ds, "\n actor sys_id: " ETH_ADDR_FMT "\n",
@@ -1040,58 +1044,58 @@ lacp_print_details(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex)
ds_put_cstr(ds, "\n\n");
ds_put_format(ds, " partner sys_id: " ETH_ADDR_FMT "\n",
- ETH_ADDR_ARGS(slave->partner.sys_id));
+ ETH_ADDR_ARGS(member->partner.sys_id));
ds_put_format(ds, " partner sys_priority: %u\n",
- ntohs(slave->partner.sys_priority));
+ ntohs(member->partner.sys_priority));
ds_put_format(ds, " partner port_id: %u\n",
- ntohs(slave->partner.port_id));
+ ntohs(member->partner.port_id));
ds_put_format(ds, " partner port_priority: %u\n",
- ntohs(slave->partner.port_priority));
+ ntohs(member->partner.port_priority));
ds_put_format(ds, " partner key: %u\n",
- ntohs(slave->partner.key));
+ ntohs(member->partner.key));
ds_put_cstr(ds, " partner state:");
- ds_put_lacp_state(ds, slave->partner.state);
+ ds_put_lacp_state(ds, member->partner.state);
ds_put_cstr(ds, "\n");
}
- shash_destroy(&slave_shash);
- free(sorted_slaves);
+ shash_destroy(&member_shash);
+ free(sorted_members);
}
static void
lacp_print_stats(struct ds *ds, struct lacp *lacp) OVS_REQUIRES(mutex)
{
- struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
- const struct shash_node **sorted_slaves = NULL;
+ struct shash member_shash = SHASH_INITIALIZER(&member_shash);
+ const struct shash_node **sorted_members = NULL;
- struct slave *slave;
+ struct member *member;
int i;
ds_put_format(ds, "---- %s statistics ----\n", lacp->name);
- HMAP_FOR_EACH (slave, node, &lacp->slaves) {
- shash_add(&slave_shash, slave->name, slave);
+ HMAP_FOR_EACH (member, node, &lacp->members) {
+ shash_add(&member_shash, member->name, member);
}
- sorted_slaves = shash_sort(&slave_shash);
-
- for (i = 0; i < shash_count(&slave_shash); i++) {
- slave = sorted_slaves[i]->data;
- ds_put_format(ds, "\nslave: %s:\n", slave->name);
- ds_put_format(ds, " TX PDUs: %u\n", slave->count_tx_pdus);
- ds_put_format(ds, " RX PDUs: %u\n", slave->count_rx_pdus);
- ds_put_format(ds, " RX Bad PDUs: %u\n", slave->count_rx_pdus_bad);
+ sorted_members = shash_sort(&member_shash);
+
+ for (i = 0; i < shash_count(&member_shash); i++) {
+ member = sorted_members[i]->data;
+ ds_put_format(ds, "\nmember: %s:\n", member->name);
+ ds_put_format(ds, " TX PDUs: %u\n", member->count_tx_pdus);
+ ds_put_format(ds, " RX PDUs: %u\n", member->count_rx_pdus);
+ ds_put_format(ds, " RX Bad PDUs: %u\n", member->count_rx_pdus_bad);
ds_put_format(ds, " RX Marker Request PDUs: %u\n",
- slave->count_rx_pdus_marker);
+ member->count_rx_pdus_marker);
ds_put_format(ds, " Link Expired: %u\n",
- slave->count_link_expired);
+ member->count_link_expired);
ds_put_format(ds, " Link Defaulted: %u\n",
- slave->count_link_defaulted);
+ member->count_link_defaulted);
ds_put_format(ds, " Carrier Status Changed: %u\n",
- slave->count_carrier_changed);
+ member->count_carrier_changed);
}
- shash_destroy(&slave_shash);
- free(sorted_slaves);
+ shash_destroy(&member_shash);
+ free(sorted_members);
}
static void
@@ -1152,27 +1156,28 @@ out:
lacp_unlock();
}
-/* Extract a snapshot of the current state and counters for a slave port.
- Return false if the slave is not active. */
+/* Extract a snapshot of the current state and counters for a member port.
+ Return false if the member is not active. */
bool
-lacp_get_slave_stats(const struct lacp *lacp, const void *slave_, struct lacp_slave_stats *stats)
+lacp_get_member_stats(const struct lacp *lacp, const void *member_,
+ struct lacp_member_stats *stats)
OVS_EXCLUDED(mutex)
{
- struct slave *slave;
+ struct member *member;
struct lacp_info actor;
bool ret;
ovs_mutex_lock(&mutex);
- slave = slave_lookup(lacp, slave_);
- if (slave) {
+ member = member_lookup(lacp, member_);
+ if (member) {
ret = true;
- slave_get_actor(slave, &actor);
+ member_get_actor(member, &actor);
stats->dot3adAggPortActorSystemID = actor.sys_id;
- stats->dot3adAggPortPartnerOperSystemID = slave->partner.sys_id;
- stats->dot3adAggPortAttachedAggID = (lacp->key_slave->key ?
- lacp->key_slave->key :
- lacp->key_slave->port_id);
+ stats->dot3adAggPortPartnerOperSystemID = member->partner.sys_id;
+ stats->dot3adAggPortAttachedAggID = (lacp->key_member->key ?
+ lacp->key_member->key :
+ lacp->key_member->port_id);
/* Construct my admin-state. Assume aggregation is configured on. */
stats->dot3adAggPortActorAdminState = LACP_STATE_AGG;
@@ -1189,12 +1194,12 @@ lacp_get_slave_stats(const struct lacp *lacp, const void *slave_, struct lacp_sl
stats->dot3adAggPortPartnerAdminState = 0;
stats->dot3adAggPortActorOperState = actor.state;
- stats->dot3adAggPortPartnerOperState = slave->partner.state;
+ stats->dot3adAggPortPartnerOperState = member->partner.state;
/* Read out the latest counters */
- stats->dot3adAggPortStatsLACPDUsRx = slave->count_rx_pdus;
- stats->dot3adAggPortStatsIllegalRx = slave->count_rx_pdus_bad;
- stats->dot3adAggPortStatsLACPDUsTx = slave->count_tx_pdus;
+ stats->dot3adAggPortStatsLACPDUsRx = member->count_rx_pdus;
+ stats->dot3adAggPortStatsIllegalRx = member->count_rx_pdus_bad;
+ stats->dot3adAggPortStatsLACPDUsTx = member->count_tx_pdus;
} else {
ret = false;
}
diff --git a/lib/lacp.h b/lib/lacp.h
index d731ae9a6be8130be5dc0809d6e861a26f993331..908ec201c6e695b8834ce28d6e06139a1212b1cb 100644
--- a/lib/lacp.h
+++ b/lib/lacp.h
@@ -46,32 +46,32 @@ struct lacp *lacp_ref(const struct lacp *);
void lacp_configure(struct lacp *, const struct lacp_settings *);
bool lacp_is_active(const struct lacp *);
-bool lacp_process_packet(struct lacp *, const void *slave,
+bool lacp_process_packet(struct lacp *, const void *member,
const struct dp_packet *packet);
enum lacp_status lacp_status(const struct lacp *);
-struct lacp_slave_settings {
+struct lacp_member_settings {
char *name; /* Name (for debugging). */
uint16_t id; /* Port ID. */
uint16_t priority; /* Port priority. */
uint16_t key; /* Aggregation key. */
};
-void lacp_slave_register(struct lacp *, void *slave_,
- const struct lacp_slave_settings *);
-void lacp_slave_unregister(struct lacp *, const void *slave);
-void lacp_slave_carrier_changed(const struct lacp *, const void *slave,
- bool carrier_up);
-bool lacp_slave_may_enable(const struct lacp *, const void *slave);
-bool lacp_slave_is_current(const struct lacp *, const void *slave_);
+void lacp_member_register(struct lacp *, void *member_,
+ const struct lacp_member_settings *);
+void lacp_member_unregister(struct lacp *, const void *member);
+void lacp_member_carrier_changed(const struct lacp *, const void *member,
+ bool carrier_up);
+bool lacp_member_may_enable(const struct lacp *, const void *member);
+bool lacp_member_is_current(const struct lacp *, const void *member_);
/* Callback function for lacp_run() for sending a LACP PDU. */
-typedef void lacp_send_pdu(void *slave, const void *pdu, size_t pdu_size);
+typedef void lacp_send_pdu(void *member, const void *pdu, size_t pdu_size);
void lacp_run(struct lacp *, lacp_send_pdu *);
void lacp_wait(struct lacp *);
-struct lacp_slave_stats {
+struct lacp_member_stats {
/* id */
struct eth_addr dot3adAggPortActorSystemID;
struct eth_addr dot3adAggPortPartnerOperSystemID;
@@ -92,6 +92,7 @@ struct lacp_slave_stats {
/* uint32_t dot3adAggPortStatsMarkerResponsePDUsTx; */
};
-bool lacp_get_slave_stats(const struct lacp *, const void *slave_, struct lacp_slave_stats *);
+bool lacp_get_member_stats(const struct lacp *, const void *member_,
+ struct lacp_member_stats *);
#endif /* lacp.h */
diff --git a/lib/lldp/lldp-const.h b/lib/lldp/lldp-const.h
index eceb612d18cd922bdb9dbbc30effd0299b9611ef..8c5c0733ef154b1f19f9a27333177ca31b28d40a 100644
--- a/lib/lldp/lldp-const.h
+++ b/lib/lldp/lldp-const.h
@@ -218,13 +218,13 @@
#define LLDPD_MODE_MAX LLDPD_MODE_FDP
-/* Bond slave src mac type constants */
-#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_UNKNOWN 0
-#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_REAL 1
-#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_ZERO 2
-#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_FIXED 3
-#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_LOCALLY_ADMINISTERED 4
-#define LLDP_BOND_SLAVE_SRC_MAC_TYPE_MAX \
- LLDP_BOND_SLAVE_SRC_MAC_TYPE_LOCALLY_ADMINISTERED
+/* Bond member src mac type constants */
+#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_UNKNOWN 0
+#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_REAL 1
+#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_ZERO 2
+#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_FIXED 3
+#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_LOCALLY_ADMINISTERED 4
+#define LLDP_BOND_MEMBER_SRC_MAC_TYPE_MAX \
+ LLDP_BOND_MEMBER_SRC_MAC_TYPE_LOCALLY_ADMINISTERED
#endif /* _LLDP_H */
diff --git a/lib/lldp/lldp.c b/lib/lldp/lldp.c
index 74f747fcdcbb2288fabafdc7a97f70b52372d789..e5755307fbef05b68e6103ec84d6b912ffd68f57 100644
--- a/lib/lldp/lldp.c
+++ b/lib/lldp/lldp.c
@@ -59,7 +59,7 @@ VLOG_DEFINE_THIS_MODULE(lldp);
} while (0)
#define PEEK_DISCARD_UINT8 PEEK_DISCARD(1)
#define PEEK_DISCARD_UINT16 PEEK_DISCARD(2)
-#define PEEK_DISCARD_UINT32 PEEK_DISCARD(3)
+#define PEEK_DISCARD_UINT32 PEEK_DISCARD(4)
#define PEEK_CMP(value, bytes) \
(length -= (bytes), \
pos += (bytes), \
@@ -341,6 +341,12 @@ lldp_send(struct lldpd *global OVS_UNUSED,
return dp_packet_size(p);
}
+#define CHECK_TLV_MAX_SIZE(x, name) \
+ do { if (tlv_size > (x)) { \
+ VLOG_WARN(name " TLV too large received on %s", \
+ hardware->h_ifname); \
+ goto malformed; \
+ } } while (0)
int
lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
@@ -359,7 +365,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
int length, af;
bool gotend = false;
bool ttl_received = false;
- int tlv_size, tlv_type, tlv_subtype;
+ int tlv_size, tlv_type, tlv_subtype, tlv_count = 0;
u_int8_t *pos, *tlv;
void *b;
struct lldpd_aa_isid_vlan_maps_tlv *isid_vlan_map = NULL;
@@ -411,6 +417,31 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
hardware->h_ifname);
goto malformed;
}
+ /* Check order for mandatory TLVs */
+ tlv_count++;
+ switch (tlv_type) {
+ case LLDP_TLV_CHASSIS_ID:
+ if (tlv_count != 1) {
+ VLOG_WARN("first TLV should be a chassis ID on %s, not %d",
+ hardware->h_ifname, tlv_type);
+ goto malformed;
+ }
+ break;
+ case LLDP_TLV_PORT_ID:
+ if (tlv_count != 2) {
+ VLOG_WARN("second TLV should be a port ID on %s, not %d",
+ hardware->h_ifname, tlv_type);
+ goto malformed;
+ }
+ break;
+ case LLDP_TLV_TTL:
+ if (tlv_count != 3) {
+ VLOG_WARN("third TLV should be a TTL on %s, not %d",
+ hardware->h_ifname, tlv_type);
+ goto malformed;
+ }
+ break;
+ }
switch (tlv_type) {
case LLDP_TLV_END:
@@ -428,7 +459,8 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
case LLDP_TLV_CHASSIS_ID:
case LLDP_TLV_PORT_ID:
- CHECK_TLV_SIZE(2, "Port Id");
+ CHECK_TLV_SIZE(2, "Port/Chassis Id");
+ CHECK_TLV_MAX_SIZE(256, "Port/Chassis Id");
tlv_subtype = PEEK_UINT8;
if (tlv_subtype == 0 || tlv_subtype > 7) {
VLOG_WARN("unknown subtype for tlv id received on %s",
@@ -438,10 +470,22 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
b = xzalloc(tlv_size - 1);
PEEK_BYTES(b, tlv_size - 1);
if (tlv_type == LLDP_TLV_PORT_ID) {
+ if (port->p_id != NULL) {
+ VLOG_WARN("Port ID TLV received twice on %s",
+ hardware->h_ifname);
+ free(b);
+ goto malformed;
+ }
port->p_id_subtype = tlv_subtype;
port->p_id = b;
port->p_id_len = tlv_size - 1;
} else {
+ if (chassis->c_id != NULL) {
+ VLOG_WARN("Chassis ID TLV received twice on %s",
+ hardware->h_ifname);
+ free(b);
+ goto malformed;
+ }
chassis->c_id_subtype = tlv_subtype;
chassis->c_id = b;
chassis->c_id_len = tlv_size - 1;
@@ -449,6 +493,11 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
break;
case LLDP_TLV_TTL:
+ if (ttl_received) {
+ VLOG_WARN("TTL TLV received twice on %s",
+ hardware->h_ifname);
+ goto malformed;
+ }
CHECK_TLV_SIZE(2, "TTL");
chassis->c_ttl = PEEK_UINT16;
ttl_received = true;
@@ -481,6 +530,11 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
case LLDP_TLV_MGMT_ADDR:
CHECK_TLV_SIZE(1, "Management address");
addr_str_length = PEEK_UINT8;
+ if (addr_str_length > sizeof(addr_str_buffer)) {
+ VLOG_WARN("too large management address on %s",
+ hardware->h_ifname);
+ goto malformed;
+ }
CHECK_TLV_SIZE(1 + addr_str_length, "Management address");
PEEK_BYTES(addr_str_buffer, addr_str_length);
addr_length = addr_str_length - 1;
@@ -505,7 +559,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
break;
case LLDP_TLV_ORG:
- CHECK_TLV_SIZE(4, "Organisational");
+ CHECK_TLV_SIZE(1 + sizeof orgid, "Organisational");
PEEK_BYTES(orgid, sizeof orgid);
tlv_subtype = PEEK_UINT8;
if (memcmp(dot1, orgid, sizeof orgid) == 0) {
@@ -625,6 +679,7 @@ lldp_decode(struct lldpd *cfg OVS_UNUSED, char *frame, int s,
VLOG_WARN("unknown tlv (%d) received on %s",
tlv_type,
hardware->h_ifname);
+ hardware->h_rx_unrecognized_cnt++;
goto malformed;
}
if (pos > tlv + tlv_size) {
diff --git a/lib/lldp/lldpd-structs.h b/lib/lldp/lldpd-structs.h
index 6a3ffb8d33f00760491322c7c5a6b56e6080d51d..fe5d5f9f86388a285f703ffc06aa58c9a7ee520c 100644
--- a/lib/lldp/lldpd-structs.h
+++ b/lib/lldp/lldpd-structs.h
@@ -135,8 +135,8 @@ struct lldpd_config {
int c_set_ifdescr; /* Set interface description */
int c_promisc; /* Interfaces should be in promiscuous mode */
int c_tx_hold; /* Transmit hold */
- int c_bond_slave_src_mac_type; /* Src mac type in lldp frames over bond
- * slaves */
+ int c_bond_member_src_mac_type; /* Src mac type in lldp frames over bond
+ * member interfaces */
int c_lldp_portid_type; /* The PortID type */
};
@@ -158,9 +158,9 @@ struct lldpd_ops {
};
/* An interface is uniquely identified by h_ifindex, h_ifname and h_ops. This
- * means if an interface becomes enslaved, it will be considered as a new
- * interface. The same applies for renaming and we include the index in case of
- * renaming to an existing interface.
+ * means if an interface becomes a bond member, it will be considered as a
+ * new interface. The same applies for renaming and we include the index in
+ * case of renaming to an existing interface.
*/
struct lldpd_hardware {
struct ovs_list h_entries;
diff --git a/lib/lldp/lldpd.c b/lib/lldp/lldpd.c
index 19e930526695ab71f7f62630f575b8443ac58a97..34738535dbd192aacc8a99c44028188a92417ccf 100644
--- a/lib/lldp/lldpd.c
+++ b/lib/lldp/lldpd.c
@@ -244,6 +244,7 @@ lldpd_decode(struct lldpd *cfg, char *frame, int s,
if (s < sizeof(struct eth_header) + 4) {
/* Too short, just discard it */
+ hw->h_rx_discarded_cnt++;
return;
}
@@ -284,6 +285,7 @@ lldpd_decode(struct lldpd *cfg, char *frame, int s,
VLOG_DBG("function for %s protocol did not "
"decode this frame",
cfg->g_protocols[i].name);
+ hw->h_rx_discarded_cnt++;
return;
}
chassis->c_protocol = port->p_protocol = cfg->g_protocols[i].mode;
diff --git a/lib/lockfile.c b/lib/lockfile.c
index 36728ff912d9ad139777783468b35783d1e88f21..42782d29e068adbec859767cabd7b43ffd911e7b 100644
--- a/lib/lockfile.c
+++ b/lib/lockfile.c
@@ -61,9 +61,9 @@ static struct hmap *const lock_table OVS_GUARDED_BY(lock_table_mutex)
static void lockfile_unhash(struct lockfile *);
static int lockfile_try_lock(const char *name, pid_t *pidp,
struct lockfile **lockfilep)
- OVS_REQUIRES(&lock_table_mutex);
+ OVS_REQUIRES(lock_table_mutex);
static void lockfile_do_unlock(struct lockfile * lockfile)
- OVS_REQUIRES(&lock_table_mutex);
+ OVS_REQUIRES(lock_table_mutex);
/* Returns the name of the lockfile that would be created for locking a file
* named 'filename_'. The caller is responsible for freeing the returned name,
@@ -188,7 +188,7 @@ lockfile_hash(dev_t device, ino_t inode)
}
static struct lockfile *
-lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(&lock_table_mutex)
+lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(lock_table_mutex)
{
struct lockfile *lockfile;
@@ -202,7 +202,7 @@ lockfile_find(dev_t device, ino_t inode) OVS_REQUIRES(&lock_table_mutex)
}
static void
-lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(&lock_table_mutex)
+lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(lock_table_mutex)
{
if (lockfile->fd >= 0) {
close(lockfile->fd);
@@ -213,7 +213,7 @@ lockfile_unhash(struct lockfile *lockfile) OVS_REQUIRES(&lock_table_mutex)
static struct lockfile *
lockfile_register(const char *name, dev_t device, ino_t inode, int fd)
- OVS_REQUIRES(&lock_table_mutex)
+ OVS_REQUIRES(lock_table_mutex)
{
struct lockfile *lockfile;
@@ -236,7 +236,7 @@ lockfile_register(const char *name, dev_t device, ino_t inode, int fd)
#ifdef _WIN32
static void
lockfile_do_unlock(struct lockfile *lockfile)
- OVS_REQUIRES(&lock_table_mutex)
+ OVS_REQUIRES(lock_table_mutex)
{
if (lockfile->fd >= 0) {
OVERLAPPED overl;
@@ -252,7 +252,7 @@ lockfile_do_unlock(struct lockfile *lockfile)
static int
lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep)
- OVS_REQUIRES(&lock_table_mutex)
+ OVS_REQUIRES(lock_table_mutex)
{
HANDLE lock_handle;
BOOL retval;
@@ -306,7 +306,7 @@ lockfile_do_unlock(struct lockfile *lockfile)
static int
lockfile_try_lock(const char *name, pid_t *pidp, struct lockfile **lockfilep)
- OVS_REQUIRES(&lock_table_mutex)
+ OVS_REQUIRES(lock_table_mutex)
{
struct flock l;
struct stat s;
diff --git a/lib/mac-learning.c b/lib/mac-learning.c
index f6183480d9f242462d8d01d92554b1b0b2a2b9dc..9442858d95c3c5f2983dce1f49aa7ab2d6951e84 100644
--- a/lib/mac-learning.c
+++ b/lib/mac-learning.c
@@ -384,7 +384,7 @@ is_mac_learning_update_needed(const struct mac_learning *ml,
if (is_gratuitous_arp) {
/* We don't want to learn from gratuitous ARP packets that are
- * reflected back over bond slaves so we lock the learning table. For
+ * reflected back over bond members so we lock the learning table. For
* more detail, see the bigger comment in update_learning_table__(). */
if (!is_bond) {
return true; /* Need to set the gratuitous ARP lock. */
@@ -424,7 +424,7 @@ update_learning_table__(struct mac_learning *ml, struct eth_addr src,
mac = mac_learning_insert(ml, src, vlan);
if (is_gratuitous_arp) {
/* Gratuitous ARP packets received over non-bond interfaces could be
- * reflected back over bond slaves. We don't want to learn from these
+ * reflected back over bond members. We don't want to learn from these
* reflected packets, so we lock each entry for which a gratuitous ARP
* packet was received over a non-bond interface and refrain from
* learning from gratuitous ARP packets that arrive over bond
diff --git a/lib/mac-learning.h b/lib/mac-learning.h
index ad2f1fe4ebef9a234e7aaf47626086721f591980..0ddab06cbc27981f277108bd213f2f8047017fbd 100644
--- a/lib/mac-learning.h
+++ b/lib/mac-learning.h
@@ -95,7 +95,7 @@ struct mac_learning;
#define MAC_ENTRY_DEFAULT_IDLE_TIME 300
/* Time, in seconds, to lock an entry updated by a gratuitous ARP to avoid
- * relearning based on a reflection from a bond slave. */
+ * relearning based on a reflection from a bond member. */
#define MAC_GRAT_ARP_LOCK_TIME 5
/* A MAC learning table entry.
diff --git a/lib/match.c b/lib/match.c
index 0d1ec31ef843dc74ff7c60e2c2bcf230ceadfa2d..ba716579d8ecde56d79aaa10d9f6e22d21de1aa3 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -374,6 +374,34 @@ match_set_tun_erspan_hwid(struct match *match, uint8_t hwid)
match_set_tun_erspan_hwid_masked(match, hwid, UINT8_MAX);
}
+void
+match_set_tun_gtpu_flags_masked(struct match *match, uint8_t flags,
+ uint8_t mask)
+{
+ match->wc.masks.tunnel.gtpu_flags = flags;
+ match->flow.tunnel.gtpu_flags = flags & mask;
+}
+
+void
+match_set_tun_gtpu_flags(struct match *match, uint8_t flags)
+{
+ match_set_tun_gtpu_flags_masked(match, flags, UINT8_MAX);
+}
+
+void
+match_set_tun_gtpu_msgtype_masked(struct match *match, uint8_t msgtype,
+ uint8_t mask)
+{
+ match->wc.masks.tunnel.gtpu_msgtype = msgtype;
+ match->flow.tunnel.gtpu_msgtype = msgtype & mask;
+}
+
+void
+match_set_tun_gtpu_msgtype(struct match *match, uint8_t msgtype)
+{
+ match_set_tun_gtpu_msgtype_masked(match, msgtype, UINT8_MAX);
+}
+
void
match_set_in_port(struct match *match, ofp_port_t ofp_port)
{
@@ -899,6 +927,14 @@ match_set_nw_proto(struct match *match, uint8_t nw_proto)
match->wc.masks.nw_proto = UINT8_MAX;
}
+void
+match_set_nw_proto_masked(struct match *match,
+ const uint8_t nw_proto, const uint8_t mask)
+{
+ match->flow.nw_proto = nw_proto;
+ match->wc.masks.nw_proto = mask;
+}
+
void
match_set_nw_src(struct match *match, ovs_be32 nw_src)
{
@@ -992,6 +1028,30 @@ match_set_icmp_code(struct match *match, uint8_t icmp_code)
match_set_tp_dst(match, htons(icmp_code));
}
+void
+match_set_arp_opcode_masked(struct match *match,
+ const uint8_t opcode,
+ const uint8_t mask)
+{
+ match_set_nw_proto_masked(match, opcode, mask);
+}
+
+void
+match_set_arp_spa_masked(struct match *match,
+ const ovs_be32 arp_spa,
+ const ovs_be32 mask)
+{
+ match_set_nw_src_masked(match, arp_spa, mask);
+}
+
+void
+match_set_arp_tpa_masked(struct match *match,
+ const ovs_be32 arp_tpa,
+ const ovs_be32 mask)
+{
+ match_set_nw_dst_masked(match, arp_tpa, mask);
+}
+
void
match_set_arp_sha(struct match *match, const struct eth_addr sha)
{
@@ -1325,6 +1385,12 @@ format_flow_tunnel(struct ds *s, const struct match *match)
if (wc->masks.tunnel.erspan_hwid && tnl->erspan_ver == 2) {
ds_put_format(s, "tun_erspan_hwid=%#"PRIx8",", tnl->erspan_hwid);
}
+ if (wc->masks.tunnel.gtpu_flags) {
+ ds_put_format(s, "gtpu_flags=%#"PRIx8",", tnl->gtpu_flags);
+ }
+ if (wc->masks.tunnel.gtpu_msgtype) {
+ ds_put_format(s, "gtpu_msgtype=%"PRIu8",", tnl->gtpu_msgtype);
+ }
if (wc->masks.tunnel.flags & FLOW_TNL_F_MASK) {
format_flags_masked(s, "tun_flags", flow_tun_flag_to_string,
tnl->flags & FLOW_TNL_F_MASK,
@@ -1396,7 +1462,7 @@ match_format(const struct match *match,
bool is_megaflow = false;
int i;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
if (priority != OFP_DEFAULT_PRIORITY) {
ds_put_format(s, "%spriority=%s%d,",
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 8b62e6d968354edb1c28d49d9466442f30646cc0..c808d205d5b491aa5be429cfcc60a8df219f6bae 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -391,6 +391,10 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc)
case MFF_NSH_C3:
case MFF_NSH_C4:
return !wc->masks.nsh.context[mf->id - MFF_NSH_C1];
+ case MFF_TUN_GTPU_FLAGS:
+ return !wc->masks.tunnel.gtpu_flags;
+ case MFF_TUN_GTPU_MSGTYPE:
+ return !wc->masks.tunnel.gtpu_msgtype;
case MFF_N_IDS:
default:
@@ -530,6 +534,8 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value)
case MFF_TUN_ERSPAN_VER:
case MFF_TUN_ERSPAN_DIR:
case MFF_TUN_ERSPAN_HWID:
+ case MFF_TUN_GTPU_FLAGS:
+ case MFF_TUN_GTPU_MSGTYPE:
CASE_MFF_TUN_METADATA:
case MFF_METADATA:
case MFF_IN_PORT:
@@ -711,6 +717,12 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow,
case MFF_TUN_ERSPAN_HWID:
value->u8 = flow->tunnel.erspan_hwid;
break;
+ case MFF_TUN_GTPU_FLAGS:
+ value->u8 = flow->tunnel.gtpu_flags;
+ break;
+ case MFF_TUN_GTPU_MSGTYPE:
+ value->u8 = flow->tunnel.gtpu_msgtype;
+ break;
CASE_MFF_TUN_METADATA:
tun_metadata_read(&flow->tunnel, mf, value);
break;
@@ -1042,6 +1054,12 @@ mf_set_value(const struct mf_field *mf,
case MFF_TUN_ERSPAN_HWID:
match_set_tun_erspan_hwid(match, value->u8);
break;
+ case MFF_TUN_GTPU_FLAGS:
+ match_set_tun_gtpu_flags(match, value->u8);
+ break;
+ case MFF_TUN_GTPU_MSGTYPE:
+ match_set_tun_gtpu_msgtype(match, value->u8);
+ break;
CASE_MFF_TUN_METADATA:
tun_metadata_set_match(mf, value, NULL, match, err_str);
break;
@@ -1459,6 +1477,12 @@ mf_set_flow_value(const struct mf_field *mf,
case MFF_TUN_ERSPAN_HWID:
flow->tunnel.erspan_hwid = value->u8;
break;
+ case MFF_TUN_GTPU_FLAGS:
+ flow->tunnel.gtpu_flags = value->u8;
+ break;
+ case MFF_TUN_GTPU_MSGTYPE:
+ flow->tunnel.gtpu_msgtype = value->u8;
+ break;
CASE_MFF_TUN_METADATA:
tun_metadata_write(&flow->tunnel, mf, value);
break;
@@ -1780,6 +1804,8 @@ mf_is_pipeline_field(const struct mf_field *mf)
case MFF_TUN_ERSPAN_IDX:
case MFF_TUN_ERSPAN_DIR:
case MFF_TUN_ERSPAN_HWID:
+ case MFF_TUN_GTPU_FLAGS:
+ case MFF_TUN_GTPU_MSGTYPE:
CASE_MFF_TUN_METADATA:
case MFF_METADATA:
case MFF_IN_PORT:
@@ -1970,6 +1996,12 @@ mf_set_wild(const struct mf_field *mf, struct match *match, char **err_str)
case MFF_TUN_ERSPAN_HWID:
match_set_tun_erspan_hwid_masked(match, 0, 0);
break;
+ case MFF_TUN_GTPU_FLAGS:
+ match_set_tun_gtpu_flags_masked(match, 0, 0);
+ break;
+ case MFF_TUN_GTPU_MSGTYPE:
+ match_set_tun_gtpu_msgtype_masked(match, 0, 0);
+ break;
CASE_MFF_TUN_METADATA:
tun_metadata_set_match(mf, NULL, NULL, match, err_str);
break;
@@ -2296,12 +2328,6 @@ mf_set(const struct mf_field *mf,
switch (mf->id) {
case MFF_CT_ZONE:
case MFF_CT_NW_PROTO:
- case MFF_CT_NW_SRC:
- case MFF_CT_NW_DST:
- case MFF_CT_IPV6_SRC:
- case MFF_CT_IPV6_DST:
- case MFF_CT_TP_SRC:
- case MFF_CT_TP_DST:
case MFF_RECIRC_ID:
case MFF_PACKET_TYPE:
case MFF_CONJ_ID:
@@ -2377,6 +2403,12 @@ mf_set(const struct mf_field *mf,
case MFF_TUN_ERSPAN_HWID:
match_set_tun_erspan_hwid_masked(match, value->u8, mask->u8);
break;
+ case MFF_TUN_GTPU_FLAGS:
+ match_set_tun_gtpu_flags_masked(match, value->u8, mask->u8);
+ break;
+ case MFF_TUN_GTPU_MSGTYPE:
+ match_set_tun_gtpu_msgtype_masked(match, value->u8, mask->u8);
+ break;
CASE_MFF_TUN_METADATA:
tun_metadata_set_match(mf, value, mask, match, err_str);
break;
@@ -2419,6 +2451,30 @@ mf_set(const struct mf_field *mf,
ntoh128(mask->be128));
break;
+ case MFF_CT_NW_SRC:
+ match_set_ct_nw_src_masked(match, value->be32, mask->be32);
+ break;
+
+ case MFF_CT_NW_DST:
+ match_set_ct_nw_dst_masked(match, value->be32, mask->be32);
+ break;
+
+ case MFF_CT_IPV6_SRC:
+ match_set_ct_ipv6_src_masked(match, &value->ipv6, &mask->ipv6);
+ break;
+
+ case MFF_CT_IPV6_DST:
+ match_set_ct_ipv6_dst_masked(match, &value->ipv6, &mask->ipv6);
+ break;
+
+ case MFF_CT_TP_SRC:
+ match_set_ct_tp_src_masked(match, value->be16, mask->be16);
+ break;
+
+ case MFF_CT_TP_DST:
+ match_set_ct_tp_dst_masked(match, value->be16, mask->be16);
+ break;
+
case MFF_ETH_DST:
match_set_dl_dst_masked(match, value->mac, mask->mac);
break;
diff --git a/lib/meta-flow.xml b/lib/meta-flow.xml
index 90b405c737501ab7ac77c88861edd52a0134a904..28865f88c8a55006745f4d123fa16558ce2dfaba 100644
--- a/lib/meta-flow.xml
+++ b/lib/meta-flow.xml
@@ -1240,6 +1240,8 @@ tcp,tp_src=0x07c0/0xfff0
priority, that is, any given packet must be able to match at most one
conjunctive flow at a given priority. Overlapping conjunctive flows
yield unpredictable results.
+ (The flows that constitute a conjunctive flow may overlap with those
+ that constitute the same or another conjunctive flow.)
Following a conjunctive flow match, the search for the flow with
@@ -1456,7 +1458,8 @@ ovs-ofctl add-flow br-int 'in_port=3,tun_src=192.168.1.1,tun_id=5001 actions=1'
LISP has a 24-bit instance ID.
GRE has an optional 32-bit key.
STT has a 64-bit key.
- ERSPAN has a 10-bit key (Session ID).
+ ERSPAN has a 10-bit key (Session ID).
+ GTPU has a 32-bit key (Tunnel Endpoint ID).
@@ -1797,6 +1800,82 @@ ovs-ofctl add-flow br-int 'in_port=3,tun_src=192.168.1.1,tun_id=5001 actions=1'
A 6-bit unique identifier of an ERSPAN v2 engine within a system.
+
GTP-U Metadata Fields
+
+
+ These fields provide access to set-up GPRS Tunnelling Protocol
+ for User Plane (GTPv1-U), based on 3GPP TS 29.281. A GTP-U
+ header has the following format:
+
+
+
+
+
+
+
+
+ The flags and message type have the Open vSwitch GTP-U specific fields
+ described below. Open vSwitch makes the TEID (Tunnel Endpoint
+ Identifier), which identifies a tunnel endpoint in the receiving GTP-U
+ protocol entity, available via .
+
+
+
+
+ This field holds the 8-bit GTP-U flags, encoded as:
+
+
+
+
+
+
+
+ The flags are:
+
+
+ - version
+ - Used to determine the version of the GTP-U protocol, which should
+ be set to 1.
+
+ - PT
+ - Protocol type, used as a protocol discriminator
+ between GTP (1) and GTP' (0).
+
+ - rsv
+ - Reserved. Must be zero.
+
+ - E
+ - If 1, indicates the presence of a meaningful value of the Next
+ Extension Header field.
+
+ - S
+ - If 1, indicates the presence of a meaningful value of the Sequence
+ Number field.
+
+ - PN
+ - If 1, indicates the presence of a meaningful value of the N-PDU
+ Number field.
+
+
+
+
+ This field indicates whether it's a signalling message used for path
+ management, or a user plane message which carries the original packet.
+ The complete range of message types can be referred to [3GPP TS 29.281].
+
+
Geneve Fields
@@ -2566,8 +2645,8 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)
est (0x02)
- Part of an existing connection. Set to 1 if this is a committed
- connection.
+ Part of an existing connection. Set to 1 if packets of a committed
+ connection have been seen by conntrack from both directions.
rel (0x04)
@@ -2715,7 +2794,7 @@ actions=clone(load:0->NXM_OF_IN_PORT[],output:123)
connection), or be of different protocol (i.e., when an ICMP response
is sent to an UDP packet). In case of related connections, e.g., an
FTP data connection, the original direction tuple contains the
- original direction headers from the master connection, e.g., an FTP
+ original direction headers from the parent connection, e.g., an FTP
control connection.
@@ -3841,18 +3920,18 @@ r r c c c.
-
- When
is 1, there is another MPLS label
+ When is 0, there is another MPLS label
following this one, so the Ethertype passed to pop_mpls
should be an MPLS Ethertype. For example: table=0,
- dl_type=0x8847, mpls_bos=1, actions=pop_mpls:0x8847,
+ dl_type=0x8847, mpls_bos=0, actions=pop_mpls:0x8847,
goto_table:1
-
- When
is 0, this MPLS label is the last one,
+ When is 1, this MPLS label is the last one,
so the Ethertype passed to pop_mpls should be a non-MPLS
Ethertype such as IPv4. For example: table=1, dl_type=0x8847,
- mpls_bos=0, actions=pop_mpls:0x0800, goto_table:2
+ mpls_bos=1, actions=pop_mpls:0x0800, goto_table:2
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 6187129c00e585846cb3cb470592918f4eff30de..2640a421ac17367685cee11009f987e702598631 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -26,17 +26,10 @@
#include
#include
-/* Include rte_compat.h first to allow experimental API's needed for the
- * rte_meter.h rfc4115 functions. Once they are no longer marked as
- * experimental the #define and rte_compat.h include can be removed.
- */
-#define ALLOW_EXPERIMENTAL_API
-#include
#include
#include
#include
#include
-#include
#include
#include
#include
@@ -152,9 +145,18 @@ typedef uint16_t dpdk_port_t;
#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
+/* List of required flags advertised by the hardware that will be used
+ * if TSO is enabled. Ideally this should include DEV_TX_OFFLOAD_SCTP_CKSUM.
+ * However, very few drivers supports that the moment and SCTP is not a
+ * widely used protocol as TCP and UDP, so it's optional. */
+#define DPDK_TX_TSO_OFFLOAD_FLAGS (DEV_TX_OFFLOAD_TCP_TSO \
+ | DEV_TX_OFFLOAD_TCP_CKSUM \
+ | DEV_TX_OFFLOAD_UDP_CKSUM \
+ | DEV_TX_OFFLOAD_IPV4_CKSUM)
+
+
static const struct rte_eth_conf port_conf = {
.rxmode = {
- .mq_mode = ETH_MQ_RX_RSS,
.split_hdr_size = 0,
.offloads = 0,
},
@@ -206,10 +208,6 @@ struct netdev_dpdk_sw_stats {
uint64_t tx_invalid_hwol_drops;
};
-enum { DPDK_RING_SIZE = 256 };
-BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
-enum { DRAIN_TSC = 200000ULL };
-
enum dpdk_dev_type {
DPDK_DEV_ETH = 0,
DPDK_DEV_VHOST = 1,
@@ -387,22 +385,6 @@ struct dpdk_tx_queue {
);
};
-/* dpdk has no way to remove dpdk ring ethernet devices
- so we have to keep them around once they've been created
-*/
-
-static struct ovs_list dpdk_ring_list OVS_GUARDED_BY(dpdk_mutex)
- = OVS_LIST_INITIALIZER(&dpdk_ring_list);
-
-struct dpdk_ring {
- /* For the client rings */
- struct rte_ring *cring_tx;
- struct rte_ring *cring_rx;
- unsigned int user_port_id; /* User given port no, parsed from port name */
- dpdk_port_t eth_port_id; /* ethernet device port id */
- struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
-};
-
struct ingress_policer {
struct rte_meter_srtcm_params app_srtcm_params;
struct rte_meter_srtcm in_policer;
@@ -415,6 +397,7 @@ enum dpdk_hw_ol_features {
NETDEV_RX_HW_CRC_STRIP = 1 << 1,
NETDEV_RX_HW_SCATTER = 1 << 2,
NETDEV_TX_TSO_OFFLOAD = 1 << 3,
+ NETDEV_TX_SCTP_CHECKSUM_OFFLOAD = 1 << 4,
};
/*
@@ -533,6 +516,9 @@ struct netdev_dpdk {
* otherwise interrupt mode is used. */
bool requested_lsc_interrupt_mode;
bool lsc_interrupt_mode;
+
+ /* VF configuration. */
+ struct eth_addr requested_hwaddr;
);
PADDED_MEMBERS(CACHE_LINE_SIZE,
@@ -975,6 +961,14 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
rte_eth_dev_info_get(dev->port_id, &info);
+ /* As of DPDK 19.11, it is not allowed to set a mq_mode for
+ * virtio PMD driver. */
+ if (!strcmp(info.driver_name, "net_virtio")) {
+ conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
+ } else {
+ conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
+ }
+
/* As of DPDK 17.11.1 a few PMDs require to explicitly enable
* scatter to support jumbo RX.
* Setting scatter for the device is done after checking for
@@ -997,9 +991,10 @@ dpdk_eth_dev_port_config(struct netdev_dpdk *dev, int n_rxq, int n_txq)
}
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
- conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_TSO;
- conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- conf.txmode.offloads |= DEV_TX_OFFLOAD_IPV4_CKSUM;
+ conf.txmode.offloads |= DPDK_TX_TSO_OFFLOAD_FLAGS;
+ if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
+ conf.txmode.offloads |= DEV_TX_OFFLOAD_SCTP_CKSUM;
+ }
}
/* Limit configured rss hash functions to only those supported
@@ -1100,12 +1095,10 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
struct rte_ether_addr eth_addr;
int diag;
int n_rxq, n_txq;
+ uint32_t tx_tso_offload_capa = DPDK_TX_TSO_OFFLOAD_FLAGS;
uint32_t rx_chksm_offload_capa = DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM |
DEV_RX_OFFLOAD_IPV4_CKSUM;
- uint32_t tx_tso_offload_capa = DEV_TX_OFFLOAD_TCP_TSO |
- DEV_TX_OFFLOAD_TCP_CKSUM |
- DEV_TX_OFFLOAD_IPV4_CKSUM;
rte_eth_dev_info_get(dev->port_id, &info);
@@ -1137,6 +1130,13 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
if ((info.tx_offload_capa & tx_tso_offload_capa)
== tx_tso_offload_capa) {
dev->hw_ol_features |= NETDEV_TX_TSO_OFFLOAD;
+ if (info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM) {
+ dev->hw_ol_features |= NETDEV_TX_SCTP_CHECKSUM_OFFLOAD;
+ } else {
+ VLOG_WARN("%s: Tx SCTP checksum offload is not supported, "
+ "SCTP packets sent to this device will be dropped",
+ netdev_get_name(&dev->up));
+ }
} else {
VLOG_WARN("%s: Tx TSO offload is not supported.",
netdev_get_name(&dev->up));
@@ -1282,27 +1282,6 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no,
return 0;
}
-/* dev_name must be the prefix followed by a positive decimal number.
- * (no leading + or - signs are allowed) */
-static int
-dpdk_dev_parse_name(const char dev_name[], const char prefix[],
- unsigned int *port_no)
-{
- const char *cport;
-
- if (strncmp(dev_name, prefix, strlen(prefix))) {
- return ENODEV;
- }
-
- cport = dev_name + strlen(prefix);
-
- if (str_to_uint(cport, 10, port_no)) {
- return 0;
- } else {
- return ENODEV;
- }
-}
-
/* Get the number of OVS interfaces which have the same DPDK
* rte device (e.g. same pci bus address).
* FIXME: avoid direct access to DPDK internal array rte_eth_devices.
@@ -1327,7 +1306,7 @@ static int
vhost_common_construct(struct netdev *netdev)
OVS_REQUIRES(dpdk_mutex)
{
- int socket_id = rte_lcore_to_socket_id(rte_get_master_lcore());
+ int socket_id = rte_lcore_to_socket_id(rte_get_main_lcore());
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
dev->vhost_rxq_enabled = dpdk_rte_mzalloc(OVS_VHOST_MAX_QUEUE_NUM *
@@ -1478,7 +1457,6 @@ netdev_dpdk_destruct(struct netdev *netdev)
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct rte_device *rte_dev;
struct rte_eth_dev *eth_dev;
- bool remove_on_close;
ovs_mutex_lock(&dpdk_mutex);
@@ -1490,20 +1468,15 @@ netdev_dpdk_destruct(struct netdev *netdev)
* FIXME: avoid direct access to DPDK internal array rte_eth_devices.
*/
eth_dev = &rte_eth_devices[dev->port_id];
- remove_on_close =
- eth_dev->data &&
- (eth_dev->data->dev_flags & RTE_ETH_DEV_CLOSE_REMOVE);
rte_dev = eth_dev->device;
/* Remove the eth device. */
rte_eth_dev_close(dev->port_id);
- /* Remove this rte device and all its eth devices if flag
- * RTE_ETH_DEV_CLOSE_REMOVE is not supported (which means representors
- * are not supported), or if all the eth devices belonging to the rte
- * device are closed.
+ /* Remove this rte device and all its eth devices if all the eth
+ * devices belonging to the rte device are closed.
*/
- if (!remove_on_close || !netdev_dpdk_get_num_ports(rte_dev)) {
+ if (!netdev_dpdk_get_num_ports(rte_dev)) {
int ret = rte_dev_remove(rte_dev);
if (ret < 0) {
@@ -1710,6 +1683,16 @@ out:
return ret;
}
+static bool
+dpdk_port_is_representor(struct netdev_dpdk *dev)
+ OVS_REQUIRES(dev->mutex)
+{
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(dev->port_id, &dev_info);
+ return (*dev_info.dev_flags) & RTE_ETH_DEV_REPRESENTOR;
+}
+
static int
netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
{
@@ -1744,6 +1727,11 @@ netdev_dpdk_get_config(const struct netdev *netdev, struct smap *args)
}
smap_add(args, "lsc_interrupt_mode",
dev->lsc_interrupt_mode ? "true" : "false");
+
+ if (dpdk_port_is_representor(dev)) {
+ smap_add_format(args, "dpdk-vf-mac", ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(dev->requested_hwaddr));
+ }
}
ovs_mutex_unlock(&dev->mutex);
@@ -1923,6 +1911,7 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
{RTE_FC_RX_PAUSE, RTE_FC_FULL }
};
const char *new_devargs;
+ const char *vf_mac;
int err = 0;
ovs_mutex_lock(&dpdk_mutex);
@@ -1993,6 +1982,28 @@ netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args,
goto out;
}
+ vf_mac = smap_get(args, "dpdk-vf-mac");
+ if (vf_mac) {
+ struct eth_addr mac;
+
+ if (!dpdk_port_is_representor(dev)) {
+ VLOG_WARN_BUF(errp, "'%s' is trying to set the VF MAC '%s' "
+ "but 'options:dpdk-vf-mac' is only supported for "
+ "VF representors.",
+ netdev_get_name(netdev), vf_mac);
+ } else if (!eth_addr_from_string(vf_mac, &mac)) {
+ VLOG_WARN_BUF(errp, "interface '%s': cannot parse VF MAC '%s'.",
+ netdev_get_name(netdev), vf_mac);
+ } else if (eth_addr_is_multicast(mac)) {
+ VLOG_WARN_BUF(errp,
+ "interface '%s': cannot set VF MAC to multicast "
+ "address '%s'.", netdev_get_name(netdev), vf_mac);
+ } else if (!eth_addr_equals(dev->requested_hwaddr, mac)) {
+ dev->requested_hwaddr = mac;
+ netdev_request_reconfigure(netdev);
+ }
+ }
+
lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false);
if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) {
dev->requested_lsc_interrupt_mode = lsc_interrupt_mode;
@@ -2042,19 +2053,6 @@ out:
return err;
}
-static int
-netdev_dpdk_ring_set_config(struct netdev *netdev, const struct smap *args,
- char **errp OVS_UNUSED)
-{
- struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-
- ovs_mutex_lock(&dev->mutex);
- dpdk_set_rxq_config(dev, args);
- ovs_mutex_unlock(&dev->mutex);
-
- return 0;
-}
-
static int
netdev_dpdk_vhost_client_set_config(struct netdev *netdev,
const struct smap *args,
@@ -2070,12 +2068,6 @@ netdev_dpdk_vhost_client_set_config(struct netdev *netdev,
if (!nullable_string_is_equal(path, dev->vhost_id)) {
free(dev->vhost_id);
dev->vhost_id = nullable_xstrdup(path);
- /* check zero copy configuration */
- if (smap_get_bool(args, "dq-zero-copy", false)) {
- dev->vhost_driver_flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
- } else {
- dev->vhost_driver_flags &= ~RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
- }
netdev_request_reconfigure(netdev);
}
}
@@ -2947,19 +2939,45 @@ netdev_dpdk_eth_send(struct netdev *netdev, int qid,
return 0;
}
+static int
+netdev_dpdk_set_etheraddr__(struct netdev_dpdk *dev, const struct eth_addr mac)
+ OVS_REQUIRES(dev->mutex)
+{
+ int err = 0;
+
+ if (dev->type == DPDK_DEV_ETH) {
+ struct rte_ether_addr ea;
+
+ memcpy(ea.addr_bytes, mac.ea, ETH_ADDR_LEN);
+ err = -rte_eth_dev_default_mac_addr_set(dev->port_id, &ea);
+ }
+ if (!err) {
+ dev->hwaddr = mac;
+ } else {
+ VLOG_WARN("%s: Failed to set requested mac("ETH_ADDR_FMT"): %s",
+ netdev_get_name(&dev->up), ETH_ADDR_ARGS(mac),
+ rte_strerror(err));
+ }
+
+ return err;
+}
+
static int
netdev_dpdk_set_etheraddr(struct netdev *netdev, const struct eth_addr mac)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+ int err = 0;
ovs_mutex_lock(&dev->mutex);
if (!eth_addr_equals(dev->hwaddr, mac)) {
- dev->hwaddr = mac;
- netdev_change_seq_changed(netdev);
+ err = netdev_dpdk_set_etheraddr__(dev, mac);
+ if (!err) {
+ netdev_change_seq_changed(netdev);
+ }
}
ovs_mutex_unlock(&dev->mutex);
- return 0;
+ return err;
}
static int
@@ -3658,6 +3676,7 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
struct rte_eth_dev_info dev_info;
uint32_t link_speed;
+ uint32_t dev_flags;
if (!rte_eth_dev_is_valid_port(dev->port_id)) {
return ENODEV;
@@ -3667,6 +3686,7 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
ovs_mutex_lock(&dev->mutex);
rte_eth_dev_info_get(dev->port_id, &dev_info);
link_speed = dev->link.link_speed;
+ dev_flags = *dev_info.dev_flags;
ovs_mutex_unlock(&dev->mutex);
const struct rte_bus *bus;
const struct rte_pci_device *pci_dev;
@@ -3714,6 +3734,11 @@ netdev_dpdk_get_status(const struct netdev *netdev, struct smap *args)
smap_add(args, "link_speed",
netdev_dpdk_link_speed_to_str__(link_speed));
+ if (dev_flags & RTE_ETH_DEV_REPRESENTOR) {
+ smap_add_format(args, "dpdk-vf-mac", ETH_ADDR_FMT,
+ ETH_ADDR_ARGS(dev->hwaddr));
+ }
+
return 0;
}
@@ -4240,131 +4265,6 @@ netdev_dpdk_class_init(void)
return 0;
}
-/* Client Rings */
-
-static int
-dpdk_ring_create(const char dev_name[], unsigned int port_no,
- dpdk_port_t *eth_port_id)
-{
- struct dpdk_ring *ring_pair;
- char *ring_name;
- int port_id;
-
- ring_pair = dpdk_rte_mzalloc(sizeof *ring_pair);
- if (!ring_pair) {
- return ENOMEM;
- }
-
- /* XXX: Add support for multiquque ring. */
- ring_name = xasprintf("%s_tx", dev_name);
-
- /* Create single producer tx ring, netdev does explicit locking. */
- ring_pair->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
- RING_F_SP_ENQ);
- free(ring_name);
- if (ring_pair->cring_tx == NULL) {
- rte_free(ring_pair);
- return ENOMEM;
- }
-
- ring_name = xasprintf("%s_rx", dev_name);
-
- /* Create single consumer rx ring, netdev does explicit locking. */
- ring_pair->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
- RING_F_SC_DEQ);
- free(ring_name);
- if (ring_pair->cring_rx == NULL) {
- rte_free(ring_pair);
- return ENOMEM;
- }
-
- port_id = rte_eth_from_rings(dev_name, &ring_pair->cring_rx, 1,
- &ring_pair->cring_tx, 1, SOCKET0);
-
- if (port_id < 0) {
- rte_free(ring_pair);
- return ENODEV;
- }
-
- ring_pair->user_port_id = port_no;
- ring_pair->eth_port_id = port_id;
- *eth_port_id = port_id;
-
- ovs_list_push_back(&dpdk_ring_list, &ring_pair->list_node);
-
- return 0;
-}
-
-static int
-dpdk_ring_open(const char dev_name[], dpdk_port_t *eth_port_id)
- OVS_REQUIRES(dpdk_mutex)
-{
- struct dpdk_ring *ring_pair;
- unsigned int port_no;
- int err = 0;
-
- /* Names always start with "dpdkr" */
- err = dpdk_dev_parse_name(dev_name, "dpdkr", &port_no);
- if (err) {
- return err;
- }
-
- /* Look through our list to find the device */
- LIST_FOR_EACH (ring_pair, list_node, &dpdk_ring_list) {
- if (ring_pair->user_port_id == port_no) {
- VLOG_INFO("Found dpdk ring device %s:", dev_name);
- /* Really all that is needed */
- *eth_port_id = ring_pair->eth_port_id;
- return 0;
- }
- }
- /* Need to create the device rings */
- return dpdk_ring_create(dev_name, port_no, eth_port_id);
-}
-
-static int
-netdev_dpdk_ring_send(struct netdev *netdev, int qid,
- struct dp_packet_batch *batch, bool concurrent_txq)
-{
- struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
- struct dp_packet *packet;
-
- /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that
- * the offload fields are clear. This is because the same mbuf may be
- * modified by the consumer of the ring and return into the datapath
- * without recalculating the RSS hash or revalidating the checksums. */
- DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
- dp_packet_reset_offload(packet);
- }
-
- netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
- return 0;
-}
-
-static int
-netdev_dpdk_ring_construct(struct netdev *netdev)
-{
- dpdk_port_t port_no = 0;
- int err = 0;
-
- VLOG_WARN_ONCE("dpdkr a.k.a. ring ports are considered deprecated. "
- "Please migrate to virtio-based interfaces, e.g. "
- "dpdkvhostuserclient ports, net_virtio_user DPDK vdev.");
-
- ovs_mutex_lock(&dpdk_mutex);
-
- err = dpdk_ring_open(netdev->name, &port_no);
- if (err) {
- goto unlock_dpdk;
- }
-
- err = common_construct(netdev, port_no, DPDK_DEV_ETH,
- rte_eth_dev_socket_id(port_no));
-unlock_dpdk:
- ovs_mutex_unlock(&dpdk_mutex);
- return err;
-}
-
/* QoS Functions */
/*
@@ -5075,6 +4975,7 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
&& dev->lsc_interrupt_mode == dev->requested_lsc_interrupt_mode
&& dev->rxq_size == dev->requested_rxq_size
&& dev->txq_size == dev->requested_txq_size
+ && eth_addr_equals(dev->hwaddr, dev->requested_hwaddr)
&& dev->socket_id == dev->requested_socket_id
&& dev->started && !dev->reset_needed) {
/* Reconfiguration is unnecessary */
@@ -5106,13 +5007,37 @@ netdev_dpdk_reconfigure(struct netdev *netdev)
dev->txq_size = dev->requested_txq_size;
rte_free(dev->tx_q);
+
+ if (!eth_addr_equals(dev->hwaddr, dev->requested_hwaddr)) {
+ err = netdev_dpdk_set_etheraddr__(dev, dev->requested_hwaddr);
+ if (err) {
+ goto out;
+ }
+ }
+
err = dpdk_eth_dev_init(dev);
if (dev->hw_ol_features & NETDEV_TX_TSO_OFFLOAD) {
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
+ netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ if (dev->hw_ol_features & NETDEV_TX_SCTP_CHECKSUM_OFFLOAD) {
+ netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
+ }
}
+ /* If both requested and actual hwaddr were previously
+ * unset (initialized to 0), then first device init above
+ * will have set actual hwaddr to something new.
+ * This would trigger spurious MAC reconfiguration unless
+ * the requested MAC is kept in sync.
+ *
+ * This is harmless in case requested_hwaddr was
+ * configured by the user, as netdev_dpdk_set_etheraddr__()
+ * will have succeeded to get to this point.
+ */
+ dev->requested_hwaddr = dev->hwaddr;
+
dev->tx_q = netdev_dpdk_alloc_txq(netdev->n_txq);
if (!dev->tx_q) {
err = ENOMEM;
@@ -5186,7 +5111,7 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
int err;
uint64_t vhost_flags = 0;
- bool zc_enabled;
+ uint64_t vhost_unsup_flags;
ovs_mutex_lock(&dev->mutex);
@@ -5212,13 +5137,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
vhost_flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
}
- zc_enabled = dev->vhost_driver_flags
- & RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
- /* Enable zero copy flag, if requested */
- if (zc_enabled) {
- vhost_flags |= RTE_VHOST_USER_DEQUEUE_ZERO_COPY;
- }
-
/* Enable External Buffers if TCP Segmentation Offload is enabled. */
if (userspace_tso_enabled()) {
vhost_flags |= RTE_VHOST_USER_EXTBUF_SUPPORT;
@@ -5235,9 +5153,6 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
VLOG_INFO("vHost User device '%s' created in 'client' mode, "
"using client socket '%s'",
dev->up.name, dev->vhost_id);
- if (zc_enabled) {
- VLOG_INFO("Zero copy enabled for vHost port %s", dev->up.name);
- }
}
err = rte_vhost_driver_callback_register(dev->vhost_id,
@@ -5251,17 +5166,24 @@ netdev_dpdk_vhost_client_reconfigure(struct netdev *netdev)
if (userspace_tso_enabled()) {
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
+ netdev->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
+ netdev->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
netdev->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
+ vhost_unsup_flags = 1ULL << VIRTIO_NET_F_HOST_ECN
+ | 1ULL << VIRTIO_NET_F_HOST_UFO;
} else {
- err = rte_vhost_driver_disable_features(dev->vhost_id,
- 1ULL << VIRTIO_NET_F_HOST_TSO4
- | 1ULL << VIRTIO_NET_F_HOST_TSO6
- | 1ULL << VIRTIO_NET_F_CSUM);
- if (err) {
- VLOG_ERR("rte_vhost_driver_disable_features failed for "
- "vhost user client port: %s\n", dev->up.name);
- goto unlock;
- }
+ /* This disables checksum offloading and all the features
+ * that depends on it (TSO, UFO, ECN) according to virtio
+ * specification. */
+ vhost_unsup_flags = 1ULL << VIRTIO_NET_F_CSUM;
+ }
+
+ err = rte_vhost_driver_disable_features(dev->vhost_id,
+ vhost_unsup_flags);
+ if (err) {
+ VLOG_ERR("rte_vhost_driver_disable_features failed for "
+ "vhost user client port: %s\n", dev->up.name);
+ goto unlock;
}
err = rte_vhost_driver_start(dev->vhost_id);
@@ -5430,14 +5352,6 @@ static const struct netdev_class dpdk_class = {
.send = netdev_dpdk_eth_send,
};
-static const struct netdev_class dpdk_ring_class = {
- .type = "dpdkr",
- NETDEV_DPDK_CLASS_BASE,
- .construct = netdev_dpdk_ring_construct,
- .set_config = netdev_dpdk_ring_set_config,
- .send = netdev_dpdk_ring_send,
-};
-
static const struct netdev_class dpdk_vhost_class = {
.type = "dpdkvhostuser",
NETDEV_DPDK_CLASS_COMMON,
@@ -5473,7 +5387,6 @@ void
netdev_dpdk_register(void)
{
netdev_register_provider(&dpdk_class);
- netdev_register_provider(&dpdk_ring_class);
netdev_register_provider(&dpdk_vhost_class);
netdev_register_provider(&dpdk_vhost_client_class);
}
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index c6f3d27409b683b32f702ff40edec80348c04e5d..6be23dbeed57f03f992c7e984074884d39ffe0da 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -119,10 +119,6 @@ COVERAGE_DEFINE(netdev_set_ethtool);
#define TC_RTAB_SIZE 1024
#endif
-#ifndef TCM_IFINDEX_MAGIC_BLOCK
-#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
-#endif
-
/* Linux 2.6.21 introduced struct tpacket_auxdata.
* Linux 2.6.27 added the tp_vlan_tci member.
* Linux 3.0 defined TP_STATUS_VLAN_VALID.
@@ -231,6 +227,14 @@ struct rtnl_link_stats64 {
uint64_t tx_compressed;
};
+/* Linux 3.19 introduced virtio_types.h. It might be missing
+ * if we are using old kernel. */
+#ifndef HAVE_VIRTIO_TYPES
+typedef __u16 __bitwise__ __virtio16;
+typedef __u32 __bitwise__ __virtio32;
+typedef __u64 __bitwise__ __virtio64;
+#endif
+
enum {
VALID_IFINDEX = 1 << 0,
VALID_ETHERADDR = 1 << 1,
@@ -253,15 +257,15 @@ enum {
IOV_AUXBUF = 1,
};
-struct linux_lag_slave {
+struct linux_lag_member {
uint32_t block_id;
struct shash_node *node;
};
-/* Protects 'lag_shash' and the mutable members of struct linux_lag_slave. */
+/* Protects 'lag_shash' and the mutable members of struct linux_lag_member. */
static struct ovs_mutex lag_mutex = OVS_MUTEX_INITIALIZER;
-/* All slaves whose LAG masters are network devices in OvS. */
+/* All members whose LAG primary interfaces are OVS network devices. */
static struct shash lag_shash OVS_GUARDED_BY(lag_mutex)
= SHASH_INITIALIZER(&lag_shash);
@@ -657,13 +661,9 @@ static void
netdev_linux_update_lag(struct rtnetlink_change *change)
OVS_REQUIRES(lag_mutex)
{
- struct linux_lag_slave *lag;
+ struct linux_lag_member *lag;
- if (!rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) {
- return;
- }
-
- if (change->slave && netdev_linux_kind_is_lag(change->slave)) {
+ if (change->sub && netdev_linux_kind_is_lag(change->sub)) {
lag = shash_find_data(&lag_shash, change->ifname);
if (!lag) {
@@ -691,12 +691,12 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
/* delete ingress block in case it exists */
tc_add_del_qdisc(change->if_index, false, 0, TC_INGRESS);
- /* LAG master is linux netdev so add slave to same block. */
+ /* LAG master is linux netdev so add member to same block. */
error = tc_add_del_qdisc(change->if_index, true, block_id,
TC_INGRESS);
if (error) {
- VLOG_WARN("failed to bind LAG slave %s to master's block",
- change->ifname);
+ VLOG_WARN("failed to bind LAG member %s to "
+ "primary's block", change->ifname);
shash_delete(&lag_shash, lag->node);
free(lag);
}
@@ -705,7 +705,7 @@ netdev_linux_update_lag(struct rtnetlink_change *change)
netdev_close(master_netdev);
}
} else if (change->master_ifindex == 0) {
- /* Check if this was a lag slave that has been freed. */
+ /* Check if this was a lag member that has been removed. */
lag = shash_find_data(&lag_shash, change->ifname);
if (lag) {
@@ -760,8 +760,11 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED)
netdev_linux_update(netdev, nsid, &change);
ovs_mutex_unlock(&netdev->mutex);
}
- else if (!netdev_ && change.ifname) {
- /* Netdev is not present in OvS but its master could be. */
+
+ if (change.ifname &&
+ rtnetlink_type_is_rtnlgrp_link(change.nlmsg_type)) {
+
+ /* Need to try updating the LAG information. */
ovs_mutex_lock(&lag_mutex);
netdev_linux_update_lag(&change);
ovs_mutex_unlock(&lag_mutex);
@@ -857,7 +860,7 @@ netdev_linux_update__(struct netdev_linux *dev,
rtnetlink_report_link();
}
- if (change->master && netdev_linux_kind_is_lag(change->master)) {
+ if (change->primary && netdev_linux_kind_is_lag(change->primary)) {
dev->is_lag_master = true;
}
@@ -923,6 +926,8 @@ netdev_linux_common_construct(struct netdev *netdev_)
if (userspace_tso_enabled()) {
netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_TSO;
netdev_->ol_flags |= NETDEV_TX_OFFLOAD_TCP_CKSUM;
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_UDP_CKSUM;
+ netdev_->ol_flags |= NETDEV_TX_OFFLOAD_SCTP_CKSUM;
netdev_->ol_flags |= NETDEV_TX_OFFLOAD_IPV4_CKSUM;
}
@@ -1010,6 +1015,23 @@ netdev_linux_construct_tap(struct netdev *netdev_)
goto error_close;
}
+ if (userspace_tso_enabled()) {
+ /* Old kernels don't support TUNSETOFFLOAD. If TUNSETOFFLOAD is
+ * available, it will return EINVAL when a flag is unknown.
+ * Therefore, try enabling offload with no flags to check
+ * if TUNSETOFFLOAD support is available or not. */
+ if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, 0) == 0 || errno != EINVAL) {
+ unsigned long oflags = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
+
+ if (ioctl(netdev->tap_fd, TUNSETOFFLOAD, oflags) == -1) {
+ VLOG_WARN("%s: enabling tap offloading failed: %s", name,
+ ovs_strerror(errno));
+ error = errno;
+ goto error_close;
+ }
+ }
+ }
+
netdev->present = true;
return 0;
@@ -2193,18 +2215,6 @@ netdev_linux_get_stats(const struct netdev *netdev_,
/* stats not available from OVS then use netdev stats. */
*stats = dev_stats;
} else {
- /* Use kernel netdev's packet and byte counts since vport's counters
- * do not reflect packet counts on the wire when GSO, TSO or GRO are
- * enabled. */
- stats->rx_packets = dev_stats.rx_packets;
- stats->rx_bytes = dev_stats.rx_bytes;
- stats->tx_packets = dev_stats.tx_packets;
- stats->tx_bytes = dev_stats.tx_bytes;
-
- stats->rx_errors += dev_stats.rx_errors;
- stats->tx_errors += dev_stats.tx_errors;
- stats->rx_dropped += dev_stats.rx_dropped;
- stats->tx_dropped += dev_stats.tx_dropped;
stats->multicast += dev_stats.multicast;
stats->collisions += dev_stats.collisions;
stats->rx_length_errors += dev_stats.rx_length_errors;
@@ -2602,9 +2612,8 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
uint16_t eth_type = (OVS_FORCE uint16_t) htons(ETH_P_ALL);
size_t basic_offset, action_offset, inner_offset;
uint16_t prio = TC_RESERVED_PRIORITY_POLICE;
- int ifindex, index, err = 0;
+ int ifindex, err = 0;
struct tc_police pol_act;
- uint32_t block_id = 0;
struct ofpbuf request;
struct ofpbuf *reply;
struct tcmsg *tcmsg;
@@ -2615,10 +2624,9 @@ tc_add_matchall_policer(struct netdev *netdev, uint32_t kbits_rate,
return err;
}
- index = block_id ? TCM_IFINDEX_MAGIC_BLOCK : ifindex;
- tcmsg = tc_make_request(index, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
+ tcmsg = tc_make_request(ifindex, RTM_NEWTFILTER, NLM_F_CREATE | NLM_F_ECHO,
&request);
- tcmsg->tcm_parent = block_id ? : TC_INGRESS_PARENT;
+ tcmsg->tcm_parent = TC_INGRESS_PARENT;
tcmsg->tcm_info = tc_make_handle(prio, eth_type);
tcmsg->tcm_handle = handle;
@@ -3586,24 +3594,34 @@ const struct netdev_class netdev_internal_class = {
};
#ifdef HAVE_AF_XDP
+#define NETDEV_AFXDP_CLASS_COMMON \
+ .init = netdev_afxdp_init, \
+ .construct = netdev_afxdp_construct, \
+ .destruct = netdev_afxdp_destruct, \
+ .get_stats = netdev_afxdp_get_stats, \
+ .get_custom_stats = netdev_afxdp_get_custom_stats, \
+ .get_status = netdev_linux_get_status, \
+ .set_config = netdev_afxdp_set_config, \
+ .get_config = netdev_afxdp_get_config, \
+ .reconfigure = netdev_afxdp_reconfigure, \
+ .get_numa_id = netdev_linux_get_numa_id, \
+ .send = netdev_afxdp_batch_send, \
+ .rxq_construct = netdev_afxdp_rxq_construct, \
+ .rxq_destruct = netdev_afxdp_rxq_destruct, \
+ .rxq_recv = netdev_afxdp_rxq_recv
+
const struct netdev_class netdev_afxdp_class = {
NETDEV_LINUX_CLASS_COMMON,
+ NETDEV_AFXDP_CLASS_COMMON,
.type = "afxdp",
.is_pmd = true,
- .init = netdev_afxdp_init,
- .construct = netdev_afxdp_construct,
- .destruct = netdev_afxdp_destruct,
- .get_stats = netdev_afxdp_get_stats,
- .get_custom_stats = netdev_afxdp_get_custom_stats,
- .get_status = netdev_linux_get_status,
- .set_config = netdev_afxdp_set_config,
- .get_config = netdev_afxdp_get_config,
- .reconfigure = netdev_afxdp_reconfigure,
- .get_numa_id = netdev_linux_get_numa_id,
- .send = netdev_afxdp_batch_send,
- .rxq_construct = netdev_afxdp_rxq_construct,
- .rxq_destruct = netdev_afxdp_rxq_destruct,
- .rxq_recv = netdev_afxdp_rxq_recv,
+};
+
+const struct netdev_class netdev_afxdp_nonpmd_class = {
+ NETDEV_LINUX_CLASS_COMMON,
+ NETDEV_AFXDP_CLASS_COMMON,
+ .type = "afxdp-nonpmd",
+ .is_pmd = false,
};
#endif
@@ -6358,7 +6376,7 @@ netdev_linux_update_via_netlink(struct netdev_linux *netdev)
netdev->get_ifindex_error = 0;
changed = true;
}
- if (change->master && netdev_linux_kind_is_lag(change->master)) {
+ if (change->primary && netdev_linux_kind_is_lag(change->primary)) {
netdev->is_lag_master = true;
}
if (changed) {
diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index a78972888e333687050324b54f00d172611a7e9f..b89dfdd52a86526298931fe568d05e4bcb783b02 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -29,7 +29,6 @@
#include
#include
-#include
#include
#include
@@ -55,6 +54,9 @@ static struct vlog_rate_limit err_rl = VLOG_RATE_LIMIT_INIT(60, 5);
#define GENEVE_BASE_HLEN (sizeof(struct udp_header) + \
sizeof(struct genevehdr))
+#define GTPU_HLEN (sizeof(struct udp_header) + \
+ sizeof(struct gtpuhdr))
+
uint16_t tnl_udp_port_min = 32768;
uint16_t tnl_udp_port_max = 61000;
@@ -213,6 +215,27 @@ udp_extract_tnl_md(struct dp_packet *packet, struct flow_tnl *tnl,
return udp + 1;
}
+static void
+netdev_tnl_calc_udp_csum(struct udp_header *udp, struct dp_packet *packet,
+ int ip_tot_size)
+{
+ uint32_t csum;
+
+ if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
+ csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(
+ dp_packet_data(packet)));
+ } else {
+ csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(
+ dp_packet_data(packet)));
+ }
+
+ csum = csum_continue(csum, udp, ip_tot_size);
+ udp->udp_csum = csum_finish(csum);
+
+ if (!udp->udp_csum) {
+ udp->udp_csum = htons(0xffff);
+ }
+}
void
netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
@@ -229,19 +252,7 @@ netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
udp->udp_len = htons(ip_tot_size);
if (udp->udp_csum) {
- uint32_t csum;
- if (netdev_tnl_is_header_ipv6(dp_packet_data(packet))) {
- csum = packet_csum_pseudoheader6(netdev_tnl_ipv6_hdr(dp_packet_data(packet)));
- } else {
- csum = packet_csum_pseudoheader(netdev_tnl_ip_hdr(dp_packet_data(packet)));
- }
-
- csum = csum_continue(csum, udp, ip_tot_size);
- udp->udp_csum = csum_finish(csum);
-
- if (!udp->udp_csum) {
- udp->udp_csum = htons(0xffff);
- }
+ netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
}
}
@@ -707,6 +718,133 @@ netdev_erspan_build_header(const struct netdev *netdev,
return 0;
}
+struct dp_packet *
+netdev_gtpu_pop_header(struct dp_packet *packet)
+{
+ struct pkt_metadata *md = &packet->md;
+ struct flow_tnl *tnl = &md->tunnel;
+ struct gtpuhdr *gtph;
+ unsigned int gtpu_hlen;
+ unsigned int hlen;
+
+ ovs_assert(packet->l3_ofs > 0);
+ ovs_assert(packet->l4_ofs > 0);
+
+ pkt_metadata_init_tnl(md);
+ if (GTPU_HLEN > dp_packet_l4_size(packet)) {
+ goto err;
+ }
+
+ gtph = udp_extract_tnl_md(packet, tnl, &hlen);
+ if (!gtph) {
+ goto err;
+ }
+
+ tnl->gtpu_flags = gtph->md.flags;
+ tnl->gtpu_msgtype = gtph->md.msgtype;
+ tnl->tun_id = be32_to_be64(get_16aligned_be32(>ph->teid));
+
+ if (tnl->gtpu_msgtype == GTPU_MSGTYPE_GPDU) {
+ struct ip_header *ip;
+
+ if (gtph->md.flags & GTPU_S_MASK) {
+ gtpu_hlen = GTPU_HLEN + sizeof(struct gtpuhdr_opt);
+ } else {
+ gtpu_hlen = GTPU_HLEN;
+ }
+ ip = ALIGNED_CAST(struct ip_header *, (char *)gtph + gtpu_hlen);
+
+ if (IP_VER(ip->ip_ihl_ver) == 4) {
+ packet->packet_type = htonl(PT_IPV4);
+ } else if (IP_VER(ip->ip_ihl_ver) == 6) {
+ packet->packet_type = htonl(PT_IPV6);
+ } else {
+ VLOG_WARN_RL(&err_rl, "GTP-U: Receive non-IP packet.");
+ }
+ dp_packet_reset_packet(packet, hlen + gtpu_hlen);
+ } else {
+ /* non-GPDU GTP-U messages, ex: echo request, end marker.
+ * Users should redirect these packets to controller, or.
+ * any application that handles GTP-U messages, so keep
+ * the original packet.
+ */
+ packet->packet_type = htonl(PT_ETH);
+ VLOG_WARN_ONCE("Receive non-GPDU msgtype: %"PRIu8,
+ gtph->md.msgtype);
+ }
+
+ return packet;
+
+err:
+ dp_packet_delete(packet);
+ return NULL;
+}
+
+void
+netdev_gtpu_push_header(const struct netdev *netdev,
+ struct dp_packet *packet,
+ const struct ovs_action_push_tnl *data)
+{
+ struct netdev_vport *dev = netdev_vport_cast(netdev);
+ struct netdev_tunnel_config *tnl_cfg;
+ struct udp_header *udp;
+ struct gtpuhdr *gtpuh;
+ int ip_tot_size;
+ unsigned int payload_len;
+
+ payload_len = dp_packet_size(packet);
+ udp = netdev_tnl_push_ip_header(packet, data->header,
+ data->header_len, &ip_tot_size);
+ udp->udp_src = netdev_tnl_get_src_port(packet);
+ udp->udp_len = htons(ip_tot_size);
+ netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
+
+ gtpuh = ALIGNED_CAST(struct gtpuhdr *, udp + 1);
+
+ tnl_cfg = &dev->tnl_cfg;
+ if (tnl_cfg->set_seq) {
+ ovs_be16 *seqno = ALIGNED_CAST(ovs_be16 *, gtpuh + 1);
+ *seqno = htons(tnl_cfg->seqno++);
+ payload_len += sizeof(struct gtpuhdr_opt);
+ }
+ gtpuh->len = htons(payload_len);
+}
+
+int
+netdev_gtpu_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data,
+ const struct netdev_tnl_build_header_params *params)
+{
+ struct netdev_vport *dev = netdev_vport_cast(netdev);
+ struct netdev_tunnel_config *tnl_cfg;
+ struct gtpuhdr *gtph;
+ unsigned int gtpu_hlen;
+
+ ovs_mutex_lock(&dev->mutex);
+ tnl_cfg = &dev->tnl_cfg;
+ gtph = udp_build_header(tnl_cfg, data, params);
+
+ /* Set to default if not set in flow. */
+ gtph->md.flags = params->flow->tunnel.gtpu_flags ?
+ params->flow->tunnel.gtpu_flags : GTPU_FLAGS_DEFAULT;
+ gtph->md.msgtype = params->flow->tunnel.gtpu_msgtype ?
+ params->flow->tunnel.gtpu_msgtype : GTPU_MSGTYPE_GPDU;
+ put_16aligned_be32(>ph->teid,
+ be64_to_be32(params->flow->tunnel.tun_id));
+
+ gtpu_hlen = sizeof *gtph;
+ if (tnl_cfg->set_seq) {
+ gtph->md.flags |= GTPU_S_MASK;
+ gtpu_hlen += sizeof(struct gtpuhdr_opt);
+ }
+ ovs_mutex_unlock(&dev->mutex);
+
+ data->header_len += gtpu_hlen;
+ data->tnl_type = OVS_VPORT_TYPE_GTPU;
+
+ return 0;
+}
+
struct dp_packet *
netdev_vxlan_pop_header(struct dp_packet *packet)
{
diff --git a/lib/netdev-native-tnl.h b/lib/netdev-native-tnl.h
index 5dc00122d93eee8bfd956bc5ea7941da053990b0..22ae2ce5369b193268a2ec3ca7e126d7761aa8d9 100644
--- a/lib/netdev-native-tnl.h
+++ b/lib/netdev-native-tnl.h
@@ -52,6 +52,19 @@ netdev_erspan_push_header(const struct netdev *netdev,
struct dp_packet *
netdev_erspan_pop_header(struct dp_packet *packet);
+struct dp_packet *
+netdev_gtpu_pop_header(struct dp_packet *packet);
+
+void
+netdev_gtpu_push_header(const struct netdev *netdev,
+ struct dp_packet *packet,
+ const struct ovs_action_push_tnl *data);
+
+int
+netdev_gtpu_build_header(const struct netdev *netdev,
+ struct ovs_action_push_tnl *data,
+ const struct netdev_tnl_build_header_params *p);
+
void
netdev_tnl_push_udp_header(const struct netdev *netdev,
struct dp_packet *packet,
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index f8c46bbaada096433c6b573e9fc84d148634c3f3..01c52e1de6ef1c3426f7331d8c61b0f0f303fbd3 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -16,6 +16,8 @@
*/
#include
+#include
+#include
#include
#include "cmap.h"
@@ -76,7 +78,7 @@ ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
return NULL;
}
-static inline void
+static inline struct ufid_to_rte_flow_data *
ufid_to_rte_flow_associate(const ovs_u128 *ufid,
struct rte_flow *rte_flow, bool actions_offloaded)
{
@@ -101,6 +103,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
cmap_insert(&ufid_to_rte_flow,
CONST_CAST(struct cmap_node *, &data->node), hash);
+ return data;
}
static inline void
@@ -118,7 +121,7 @@ ufid_to_rte_flow_disassociate(const ovs_u128 *ufid)
}
}
- VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n",
+ VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow",
UUID_ARGS((struct uuid *) ufid));
}
@@ -142,13 +145,22 @@ struct flow_actions {
static void
dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
{
- ds_put_format(s,
- " Attributes: "
- "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n",
- attr->ingress, attr->egress, attr->priority, attr->group,
- attr->transfer);
+ ds_put_format(s, "%s%spriority %"PRIu32" group %"PRIu32" %s",
+ attr->ingress ? "ingress " : "",
+ attr->egress ? "egress " : "", attr->priority, attr->group,
+ attr->transfer ? "transfer " : "");
}
+/* Adds one pattern item 'field' with the 'mask' to dynamic string 's' using
+ * 'testpmd command'-like format. */
+#define DUMP_PATTERN_ITEM(mask, field, fmt, spec_pri, mask_pri) \
+ if (is_all_ones(&mask, sizeof mask)) { \
+ ds_put_format(s, field " is " fmt " ", spec_pri); \
+ } else if (!is_all_zeros(&mask, sizeof mask)) { \
+ ds_put_format(s, field " spec " fmt " " field " mask " fmt " ", \
+ spec_pri, mask_pri); \
+ }
+
static void
dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
{
@@ -156,219 +168,258 @@ dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
const struct rte_flow_item_eth *eth_spec = item->spec;
const struct rte_flow_item_eth *eth_mask = item->mask;
- ds_put_cstr(s, "rte flow eth pattern:\n");
+ ds_put_cstr(s, "eth ");
if (eth_spec) {
- ds_put_format(s,
- " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
- "type=0x%04" PRIx16"\n",
- ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
- ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
- ntohs(eth_spec->type));
- } else {
- ds_put_cstr(s, " Spec = null\n");
- }
- if (eth_mask) {
- ds_put_format(s,
- " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
- "type=0x%04"PRIx16"\n",
- ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes),
- ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes),
- ntohs(eth_mask->type));
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ if (!eth_mask) {
+ eth_mask = &rte_flow_item_eth_mask;
+ }
+ DUMP_PATTERN_ITEM(eth_mask->src, "src", ETH_ADDR_FMT,
+ ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
+ ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes));
+ DUMP_PATTERN_ITEM(eth_mask->dst, "dst", ETH_ADDR_FMT,
+ ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
+ ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes));
+ DUMP_PATTERN_ITEM(eth_mask->type, "type", "0x%04"PRIx16,
+ ntohs(eth_spec->type),
+ ntohs(eth_mask->type));
}
+ ds_put_cstr(s, "/ ");
} else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
const struct rte_flow_item_vlan *vlan_spec = item->spec;
const struct rte_flow_item_vlan *vlan_mask = item->mask;
- ds_put_cstr(s, "rte flow vlan pattern:\n");
+ ds_put_cstr(s, "vlan ");
if (vlan_spec) {
- ds_put_format(s,
- " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
- ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci));
- } else {
- ds_put_cstr(s, " Spec = null\n");
- }
-
- if (vlan_mask) {
- ds_put_format(s,
- " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
- ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci));
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ if (!vlan_mask) {
+ vlan_mask = &rte_flow_item_vlan_mask;
+ }
+ DUMP_PATTERN_ITEM(vlan_mask->inner_type, "inner_type", "0x%"PRIx16,
+ ntohs(vlan_spec->inner_type),
+ ntohs(vlan_mask->inner_type));
+ DUMP_PATTERN_ITEM(vlan_mask->tci, "tci", "0x%"PRIx16,
+ ntohs(vlan_spec->tci), ntohs(vlan_mask->tci));
}
+ ds_put_cstr(s, "/ ");
} else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
- ds_put_cstr(s, "rte flow ipv4 pattern:\n");
+ ds_put_cstr(s, "ipv4 ");
if (ipv4_spec) {
- ds_put_format(s,
- " Spec: tos=0x%"PRIx8", ttl=%"PRIx8
- ", proto=0x%"PRIx8
- ", src="IP_FMT", dst="IP_FMT"\n",
- ipv4_spec->hdr.type_of_service,
- ipv4_spec->hdr.time_to_live,
- ipv4_spec->hdr.next_proto_id,
- IP_ARGS(ipv4_spec->hdr.src_addr),
- IP_ARGS(ipv4_spec->hdr.dst_addr));
- } else {
- ds_put_cstr(s, " Spec = null\n");
- }
- if (ipv4_mask) {
- ds_put_format(s,
- " Mask: tos=0x%"PRIx8", ttl=%"PRIx8
- ", proto=0x%"PRIx8
- ", src="IP_FMT", dst="IP_FMT"\n",
- ipv4_mask->hdr.type_of_service,
- ipv4_mask->hdr.time_to_live,
- ipv4_mask->hdr.next_proto_id,
- IP_ARGS(ipv4_mask->hdr.src_addr),
- IP_ARGS(ipv4_mask->hdr.dst_addr));
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ if (!ipv4_mask) {
+ ipv4_mask = &rte_flow_item_ipv4_mask;
+ }
+ DUMP_PATTERN_ITEM(ipv4_mask->hdr.src_addr, "src", IP_FMT,
+ IP_ARGS(ipv4_spec->hdr.src_addr),
+ IP_ARGS(ipv4_mask->hdr.src_addr));
+ DUMP_PATTERN_ITEM(ipv4_mask->hdr.dst_addr, "dst", IP_FMT,
+ IP_ARGS(ipv4_spec->hdr.dst_addr),
+ IP_ARGS(ipv4_mask->hdr.dst_addr));
+ DUMP_PATTERN_ITEM(ipv4_mask->hdr.next_proto_id, "proto",
+ "0x%"PRIx8, ipv4_spec->hdr.next_proto_id,
+ ipv4_mask->hdr.next_proto_id);
+ DUMP_PATTERN_ITEM(ipv4_mask->hdr.type_of_service, "tos",
+ "0x%"PRIx8, ipv4_spec->hdr.type_of_service,
+ ipv4_mask->hdr.type_of_service);
+ DUMP_PATTERN_ITEM(ipv4_mask->hdr.time_to_live, "ttl",
+ "0x%"PRIx8, ipv4_spec->hdr.time_to_live,
+ ipv4_mask->hdr.time_to_live);
}
+ ds_put_cstr(s, "/ ");
} else if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
const struct rte_flow_item_udp *udp_spec = item->spec;
const struct rte_flow_item_udp *udp_mask = item->mask;
- ds_put_cstr(s, "rte flow udp pattern:\n");
+ ds_put_cstr(s, "udp ");
if (udp_spec) {
- ds_put_format(s,
- " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n",
- ntohs(udp_spec->hdr.src_port),
- ntohs(udp_spec->hdr.dst_port));
- } else {
- ds_put_cstr(s, " Spec = null\n");
- }
- if (udp_mask) {
- ds_put_format(s,
- " Mask: src_port=0x%"PRIx16
- ", dst_port=0x%"PRIx16"\n",
- ntohs(udp_mask->hdr.src_port),
- ntohs(udp_mask->hdr.dst_port));
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ if (!udp_mask) {
+ udp_mask = &rte_flow_item_udp_mask;
+ }
+ DUMP_PATTERN_ITEM(udp_mask->hdr.src_port, "src", "%"PRIu16,
+ ntohs(udp_spec->hdr.src_port),
+ ntohs(udp_mask->hdr.src_port));
+ DUMP_PATTERN_ITEM(udp_mask->hdr.dst_port, "dst", "%"PRIu16,
+ ntohs(udp_spec->hdr.dst_port),
+ ntohs(udp_mask->hdr.dst_port));
}
+ ds_put_cstr(s, "/ ");
} else if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) {
const struct rte_flow_item_sctp *sctp_spec = item->spec;
const struct rte_flow_item_sctp *sctp_mask = item->mask;
- ds_put_cstr(s, "rte flow sctp pattern:\n");
+ ds_put_cstr(s, "sctp ");
if (sctp_spec) {
- ds_put_format(s,
- " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n",
- ntohs(sctp_spec->hdr.src_port),
- ntohs(sctp_spec->hdr.dst_port));
- } else {
- ds_put_cstr(s, " Spec = null\n");
- }
- if (sctp_mask) {
- ds_put_format(s,
- " Mask: src_port=0x%"PRIx16
- ", dst_port=0x%"PRIx16"\n",
- ntohs(sctp_mask->hdr.src_port),
- ntohs(sctp_mask->hdr.dst_port));
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ if (!sctp_mask) {
+ sctp_mask = &rte_flow_item_sctp_mask;
+ }
+ DUMP_PATTERN_ITEM(sctp_mask->hdr.src_port, "src", "%"PRIu16,
+ ntohs(sctp_spec->hdr.src_port),
+ ntohs(sctp_mask->hdr.src_port));
+ DUMP_PATTERN_ITEM(sctp_mask->hdr.dst_port, "dst", "%"PRIu16,
+ ntohs(sctp_spec->hdr.dst_port),
+ ntohs(sctp_mask->hdr.dst_port));
}
+ ds_put_cstr(s, "/ ");
} else if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) {
const struct rte_flow_item_icmp *icmp_spec = item->spec;
const struct rte_flow_item_icmp *icmp_mask = item->mask;
- ds_put_cstr(s, "rte flow icmp pattern:\n");
+ ds_put_cstr(s, "icmp ");
if (icmp_spec) {
- ds_put_format(s,
- " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n",
- icmp_spec->hdr.icmp_type,
- icmp_spec->hdr.icmp_code);
- } else {
- ds_put_cstr(s, " Spec = null\n");
- }
- if (icmp_mask) {
- ds_put_format(s,
- " Mask: icmp_type=0x%"PRIx8
- ", icmp_code=0x%"PRIx8"\n",
- icmp_spec->hdr.icmp_type,
- icmp_spec->hdr.icmp_code);
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ if (!icmp_mask) {
+ icmp_mask = &rte_flow_item_icmp_mask;
+ }
+ DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_type, "icmp_type", "%"PRIu8,
+ icmp_spec->hdr.icmp_type,
+ icmp_mask->hdr.icmp_type);
+ DUMP_PATTERN_ITEM(icmp_mask->hdr.icmp_code, "icmp_code", "%"PRIu8,
+ icmp_spec->hdr.icmp_code,
+ icmp_mask->hdr.icmp_code);
}
+ ds_put_cstr(s, "/ ");
} else if (item->type == RTE_FLOW_ITEM_TYPE_TCP) {
const struct rte_flow_item_tcp *tcp_spec = item->spec;
const struct rte_flow_item_tcp *tcp_mask = item->mask;
- ds_put_cstr(s, "rte flow tcp pattern:\n");
+ ds_put_cstr(s, "tcp ");
if (tcp_spec) {
- ds_put_format(s,
- " Spec: src_port=%"PRIu16", dst_port=%"PRIu16
- ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n",
- ntohs(tcp_spec->hdr.src_port),
- ntohs(tcp_spec->hdr.dst_port),
- tcp_spec->hdr.data_off,
- tcp_spec->hdr.tcp_flags);
- } else {
- ds_put_cstr(s, " Spec = null\n");
+ if (!tcp_mask) {
+ tcp_mask = &rte_flow_item_tcp_mask;
+ }
+ DUMP_PATTERN_ITEM(tcp_mask->hdr.src_port, "src", "%"PRIu16,
+ ntohs(tcp_spec->hdr.src_port),
+ ntohs(tcp_mask->hdr.src_port));
+ DUMP_PATTERN_ITEM(tcp_mask->hdr.dst_port, "dst", "%"PRIu16,
+ ntohs(tcp_spec->hdr.dst_port),
+ ntohs(tcp_mask->hdr.dst_port));
+ DUMP_PATTERN_ITEM(tcp_mask->hdr.tcp_flags, "flags", "0x%"PRIx8,
+ tcp_spec->hdr.tcp_flags,
+ tcp_mask->hdr.tcp_flags);
}
- if (tcp_mask) {
- ds_put_format(s,
- " Mask: src_port=%"PRIx16", dst_port=%"PRIx16
- ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n",
- ntohs(tcp_mask->hdr.src_port),
- ntohs(tcp_mask->hdr.dst_port),
- tcp_mask->hdr.data_off,
- tcp_mask->hdr.tcp_flags);
- } else {
- ds_put_cstr(s, " Mask = null\n");
+ ds_put_cstr(s, "/ ");
+ } else if (item->type == RTE_FLOW_ITEM_TYPE_IPV6) {
+ const struct rte_flow_item_ipv6 *ipv6_spec = item->spec;
+ const struct rte_flow_item_ipv6 *ipv6_mask = item->mask;
+
+ char addr_str[INET6_ADDRSTRLEN];
+ char mask_str[INET6_ADDRSTRLEN];
+ struct in6_addr addr, mask;
+
+ ds_put_cstr(s, "ipv6 ");
+ if (ipv6_spec) {
+ if (!ipv6_mask) {
+ ipv6_mask = &rte_flow_item_ipv6_mask;
+ }
+ memcpy(&addr, ipv6_spec->hdr.src_addr, sizeof addr);
+ memcpy(&mask, ipv6_mask->hdr.src_addr, sizeof mask);
+ ipv6_string_mapped(addr_str, &addr);
+ ipv6_string_mapped(mask_str, &mask);
+ DUMP_PATTERN_ITEM(mask, "src", "%s", addr_str, mask_str);
+
+ memcpy(&addr, ipv6_spec->hdr.dst_addr, sizeof addr);
+ memcpy(&mask, ipv6_mask->hdr.dst_addr, sizeof mask);
+ ipv6_string_mapped(addr_str, &addr);
+ ipv6_string_mapped(mask_str, &mask);
+ DUMP_PATTERN_ITEM(mask, "dst", "%s", addr_str, mask_str);
+
+ DUMP_PATTERN_ITEM(ipv6_mask->hdr.proto, "proto", "%"PRIu8,
+ ipv6_spec->hdr.proto, ipv6_mask->hdr.proto);
+ DUMP_PATTERN_ITEM(ipv6_mask->hdr.vtc_flow, "tc", "0x%"PRIx32,
+ ntohl(ipv6_spec->hdr.vtc_flow),
+ ntohl(ipv6_mask->hdr.vtc_flow));
+ DUMP_PATTERN_ITEM(ipv6_mask->hdr.hop_limits, "hop", "%"PRIu8,
+ ipv6_spec->hdr.hop_limits,
+ ipv6_mask->hdr.hop_limits);
}
+ ds_put_cstr(s, "/ ");
} else {
ds_put_format(s, "unknown rte flow pattern (%d)\n", item->type);
}
}
static void
-dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
+dump_vxlan_encap(struct ds *s, const struct rte_flow_item *items)
+{
+ const struct rte_flow_item_eth *eth = NULL;
+ const struct rte_flow_item_ipv4 *ipv4 = NULL;
+ const struct rte_flow_item_ipv6 *ipv6 = NULL;
+ const struct rte_flow_item_udp *udp = NULL;
+ const struct rte_flow_item_vxlan *vxlan = NULL;
+
+ for (; items && items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ if (items->type == RTE_FLOW_ITEM_TYPE_ETH) {
+ eth = items->spec;
+ } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV4) {
+ ipv4 = items->spec;
+ } else if (items->type == RTE_FLOW_ITEM_TYPE_IPV6) {
+ ipv6 = items->spec;
+ } else if (items->type == RTE_FLOW_ITEM_TYPE_UDP) {
+ udp = items->spec;
+ } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+ vxlan = items->spec;
+ }
+ }
+
+ ds_put_format(s, "set vxlan ip-version %s ",
+ ipv4 ? "ipv4" : ipv6 ? "ipv6" : "ERR");
+ if (vxlan) {
+ ds_put_format(s, "vni %"PRIu32" ",
+ ntohl(*(ovs_be32 *) vxlan->vni) >> 8);
+ }
+ if (udp) {
+ ds_put_format(s, "udp-src %"PRIu16" udp-dst %"PRIu16" ",
+ ntohs(udp->hdr.src_port), ntohs(udp->hdr.dst_port));
+ }
+ if (ipv4) {
+ ds_put_format(s, "ip-src "IP_FMT" ip-dst "IP_FMT" ",
+ IP_ARGS(ipv4->hdr.src_addr),
+ IP_ARGS(ipv4->hdr.dst_addr));
+ }
+ if (ipv6) {
+ struct in6_addr addr;
+
+ ds_put_cstr(s, "ip-src ");
+ memcpy(&addr, ipv6->hdr.src_addr, sizeof addr);
+ ipv6_format_mapped(&addr, s);
+ ds_put_cstr(s, " ip-dst ");
+ memcpy(&addr, ipv6->hdr.dst_addr, sizeof addr);
+ ipv6_format_mapped(&addr, s);
+ ds_put_cstr(s, " ");
+ }
+ if (eth) {
+ ds_put_format(s, "eth-src "ETH_ADDR_FMT" eth-dst "ETH_ADDR_FMT,
+ ETH_ADDR_BYTES_ARGS(eth->src.addr_bytes),
+ ETH_ADDR_BYTES_ARGS(eth->dst.addr_bytes));
+ }
+}
+
+static void
+dump_flow_action(struct ds *s, struct ds *s_extra,
+ const struct rte_flow_action *actions)
{
if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
const struct rte_flow_action_mark *mark = actions->conf;
- ds_put_cstr(s, "rte flow mark action:\n");
+ ds_put_cstr(s, "mark ");
if (mark) {
- ds_put_format(s, " Mark: id=%d\n", mark->id);
- } else {
- ds_put_cstr(s, " Mark = null\n");
+ ds_put_format(s, "id %d ", mark->id);
}
+ ds_put_cstr(s, "/ ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
- const struct rte_flow_action_rss *rss = actions->conf;
-
- ds_put_cstr(s, "rte flow RSS action:\n");
- if (rss) {
- ds_put_format(s, " RSS: queue_num=%d\n", rss->queue_num);
- } else {
- ds_put_cstr(s, " RSS = null\n");
- }
+ ds_put_cstr(s, "rss / ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
- const struct rte_flow_action_count *count = actions->conf;
-
- ds_put_cstr(s, "rte flow count action:\n");
- if (count) {
- ds_put_format(s, " Count: shared=%d, id=%d\n", count->shared,
- count->id);
- } else {
- ds_put_cstr(s, " Count = null\n");
- }
+ ds_put_cstr(s, "count / ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
const struct rte_flow_action_port_id *port_id = actions->conf;
- ds_put_cstr(s, "rte flow port-id action:\n");
+ ds_put_cstr(s, "port_id ");
if (port_id) {
- ds_put_format(s, " Port-id: original=%d, id=%d\n",
+ ds_put_format(s, "original %d id %d ",
port_id->original, port_id->id);
- } else {
- ds_put_cstr(s, " Port-id = null\n");
}
+ ds_put_cstr(s, "/ ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
- ds_put_cstr(s, "rte flow drop action\n");
+ ds_put_cstr(s, "drop / ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ||
actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) {
const struct rte_flow_action_set_mac *set_mac = actions->conf;
@@ -376,57 +427,114 @@ dump_flow_action(struct ds *s, const struct rte_flow_action *actions)
char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST
? "dst" : "src";
- ds_put_format(s, "rte flow set-mac-%s action:\n", dirstr);
+ ds_put_format(s, "set_mac_%s ", dirstr);
if (set_mac) {
- ds_put_format(s,
- " Set-mac-%s: "ETH_ADDR_FMT"\n", dirstr,
+ ds_put_format(s, "mac_addr "ETH_ADDR_FMT" ",
ETH_ADDR_BYTES_ARGS(set_mac->mac_addr));
- } else {
- ds_put_format(s, " Set-mac-%s = null\n", dirstr);
}
+ ds_put_cstr(s, "/ ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ||
actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) {
const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf;
char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
? "dst" : "src";
- ds_put_format(s, "rte flow set-ipv4-%s action:\n", dirstr);
+ ds_put_format(s, "set_ipv4_%s ", dirstr);
if (set_ipv4) {
- ds_put_format(s,
- " Set-ipv4-%s: "IP_FMT"\n", dirstr,
+ ds_put_format(s, "ipv4_addr "IP_FMT" ",
IP_ARGS(set_ipv4->ipv4_addr));
- } else {
- ds_put_format(s, " Set-ipv4-%s = null\n", dirstr);
}
+ ds_put_cstr(s, "/ ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) {
const struct rte_flow_action_set_ttl *set_ttl = actions->conf;
- ds_put_cstr(s, "rte flow set-ttl action:\n");
+ ds_put_cstr(s, "set_ttl ");
if (set_ttl) {
- ds_put_format(s, " Set-ttl: %d\n", set_ttl->ttl_value);
- } else {
- ds_put_cstr(s, " Set-ttl = null\n");
+ ds_put_format(s, "ttl_value %d ", set_ttl->ttl_value);
}
+ ds_put_cstr(s, "/ ");
} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ||
actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) {
const struct rte_flow_action_set_tp *set_tp = actions->conf;
char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST
? "dst" : "src";
- ds_put_format(s, "rte flow set-tcp/udp-port-%s action:\n", dirstr);
+ ds_put_format(s, "set_tp_%s ", dirstr);
if (set_tp) {
- ds_put_format(s, " Set-%s-tcp/udp-port: %"PRIu16"\n", dirstr,
- ntohs(set_tp->port));
- } else {
- ds_put_format(s, " Set-%s-tcp/udp-port = null\n", dirstr);
+ ds_put_format(s, "port %"PRIu16" ", ntohs(set_tp->port));
+ }
+ ds_put_cstr(s, "/ ");
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN) {
+ const struct rte_flow_action_of_push_vlan *of_push_vlan =
+ actions->conf;
+
+ ds_put_cstr(s, "of_push_vlan ");
+ if (of_push_vlan) {
+ ds_put_format(s, "ethertype 0x%"PRIx16" ",
+ ntohs(of_push_vlan->ethertype));
+ }
+ ds_put_cstr(s, "/ ");
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
+ const struct rte_flow_action_of_set_vlan_pcp *of_set_vlan_pcp =
+ actions->conf;
+
+ ds_put_cstr(s, "of_set_vlan_pcp ");
+ if (of_set_vlan_pcp) {
+ ds_put_format(s, "vlan_pcp %"PRIu8" ", of_set_vlan_pcp->vlan_pcp);
+ }
+ ds_put_cstr(s, "/ ");
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
+ const struct rte_flow_action_of_set_vlan_vid *of_set_vlan_vid =
+ actions->conf;
+
+ ds_put_cstr(s, "of_set_vlan_vid ");
+ if (of_set_vlan_vid) {
+ ds_put_format(s, "vlan_vid %"PRIu16" ",
+ ntohs(of_set_vlan_vid->vlan_vid));
+ }
+ ds_put_cstr(s, "/ ");
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_OF_POP_VLAN) {
+ ds_put_cstr(s, "of_pop_vlan / ");
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ||
+ actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) {
+ const struct rte_flow_action_set_ipv6 *set_ipv6 = actions->conf;
+
+ char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
+ ? "dst" : "src";
+
+ ds_put_format(s, "set_ipv6_%s ", dirstr);
+ if (set_ipv6) {
+ ds_put_cstr(s, "ipv6_addr ");
+ ipv6_format_addr((struct in6_addr *) &set_ipv6->ipv6_addr, s);
+ ds_put_cstr(s, " ");
}
+ ds_put_cstr(s, "/ ");
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
+ const struct rte_flow_action_raw_encap *raw_encap = actions->conf;
+
+ ds_put_cstr(s, "raw_encap index 0 / ");
+ if (raw_encap) {
+ ds_put_format(s_extra, "Raw-encap size=%ld set raw_encap 0 raw "
+ "pattern is ", raw_encap->size);
+ for (int i = 0; i < raw_encap->size; i++) {
+ ds_put_format(s_extra, "%02x", raw_encap->data[i]);
+ }
+ ds_put_cstr(s_extra, " / end_set;");
+ }
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP) {
+ const struct rte_flow_action_vxlan_encap *vxlan_encap = actions->conf;
+ const struct rte_flow_item *items = vxlan_encap->definition;
+
+ ds_put_cstr(s, "vxlan_encap / ");
+ dump_vxlan_encap(s_extra, items);
+ ds_put_cstr(s_extra, ";");
} else {
ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
}
}
static struct ds *
-dump_flow(struct ds *s,
+dump_flow(struct ds *s, struct ds *s_extra,
const struct rte_flow_attr *attr,
const struct rte_flow_item *items,
const struct rte_flow_action *actions)
@@ -434,12 +542,15 @@ dump_flow(struct ds *s,
if (attr) {
dump_flow_attr(s, attr);
}
+ ds_put_cstr(s, "pattern ");
while (items && items->type != RTE_FLOW_ITEM_TYPE_END) {
dump_flow_pattern(s, items++);
}
+ ds_put_cstr(s, "end actions ");
while (actions && actions->type != RTE_FLOW_ACTION_TYPE_END) {
- dump_flow_action(s, actions++);
+ dump_flow_action(s, s_extra, actions++);
}
+ ds_put_cstr(s, "end");
return s;
}
@@ -450,17 +561,19 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
const struct rte_flow_action *actions,
struct rte_flow_error *error)
{
+ struct ds s_extra = DS_EMPTY_INITIALIZER;
+ struct ds s = DS_EMPTY_INITIALIZER;
struct rte_flow *flow;
- struct ds s;
+ char *extra_str;
flow = netdev_dpdk_rte_flow_create(netdev, attr, items, actions, error);
if (flow) {
if (!VLOG_DROP_DBG(&rl)) {
- ds_init(&s);
- dump_flow(&s, attr, items, actions);
- VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" created:\n%s",
- netdev_get_name(netdev), (intptr_t) flow, ds_cstr(&s));
- ds_destroy(&s);
+ dump_flow(&s, &s_extra, attr, items, actions);
+ extra_str = ds_cstr(&s_extra);
+ VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s",
+ netdev_get_name(netdev), (intptr_t) flow, extra_str,
+ netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
}
} else {
enum vlog_level level = VLL_WARN;
@@ -471,12 +584,15 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
VLOG_RL(&rl, level, "%s: rte_flow creation failed: %d (%s).",
netdev_get_name(netdev), error->type, error->message);
if (!vlog_should_drop(&this_module, level, &rl)) {
- ds_init(&s);
- dump_flow(&s, attr, items, actions);
- VLOG_RL(&rl, level, "Failed flow:\n%s", ds_cstr(&s));
- ds_destroy(&s);
+ dump_flow(&s, &s_extra, attr, items, actions);
+ extra_str = ds_cstr(&s_extra);
+ VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
+ netdev_get_name(netdev), extra_str,
+ netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
}
}
+ ds_destroy(&s);
+ ds_destroy(&s_extra);
return flow;
}
@@ -559,14 +675,42 @@ free_flow_actions(struct flow_actions *actions)
static int
parse_flow_match(struct flow_patterns *patterns,
- const struct match *match)
+ struct match *match)
{
- uint8_t *next_proto_mask = NULL;
+ struct flow *consumed_masks;
uint8_t proto = 0;
+ consumed_masks = &match->wc.masks;
+
+ if (!flow_tnl_dst_is_set(&match->flow.tunnel)) {
+ memset(&consumed_masks->tunnel, 0, sizeof consumed_masks->tunnel);
+ }
+
+ memset(&consumed_masks->in_port, 0, sizeof consumed_masks->in_port);
+ /* recirc id must be zero. */
+ if (match->wc.masks.recirc_id & match->flow.recirc_id) {
+ return -1;
+ }
+ consumed_masks->recirc_id = 0;
+ consumed_masks->packet_type = 0;
+
/* Eth */
- if (!eth_addr_is_zero(match->wc.masks.dl_src) ||
- !eth_addr_is_zero(match->wc.masks.dl_dst)) {
+ if (match->wc.masks.dl_type == OVS_BE16_MAX && is_ip_any(&match->flow)
+ && eth_addr_is_zero(match->wc.masks.dl_dst)
+ && eth_addr_is_zero(match->wc.masks.dl_src)) {
+ /*
+ * This is a temporary work around to fix ethernet pattern for partial
+ * hardware offload for X710 devices. This fix will be reverted once
+ * the issue is fixed within the i40e PMD driver.
+ */
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
+
+ memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
+ memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
+ consumed_masks->dl_type = 0;
+ } else if (match->wc.masks.dl_type ||
+ !eth_addr_is_zero(match->wc.masks.dl_src) ||
+ !eth_addr_is_zero(match->wc.masks.dl_dst)) {
struct rte_flow_item_eth *spec, *mask;
spec = xzalloc(sizeof *spec);
@@ -580,16 +724,11 @@ parse_flow_match(struct flow_patterns *patterns,
memcpy(&mask->src, &match->wc.masks.dl_src, sizeof mask->src);
mask->type = match->wc.masks.dl_type;
+ memset(&consumed_masks->dl_dst, 0, sizeof consumed_masks->dl_dst);
+ memset(&consumed_masks->dl_src, 0, sizeof consumed_masks->dl_src);
+ consumed_masks->dl_type = 0;
+
add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
- } else {
- /*
- * If user specifies a flow (like UDP flow) without L2 patterns,
- * OVS will at least set the dl_type. Normally, it's enough to
- * create an eth pattern just with it. Unluckily, some Intel's
- * NIC (such as XL710) doesn't support that. Below is a workaround,
- * which simply matches any L2 pkts.
- */
- add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
}
/* VLAN */
@@ -607,6 +746,11 @@ parse_flow_match(struct flow_patterns *patterns,
add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
}
+ /* For untagged matching match->wc.masks.vlans[0].tci is 0xFFFF and
+ * match->flow.vlans[0].tci is 0. Consuming is needed outside of the if
+ * scope to handle that.
+ */
+ memset(&consumed_masks->vlans[0], 0, sizeof consumed_masks->vlans[0]);
/* IP v4 */
if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
@@ -627,12 +771,59 @@ parse_flow_match(struct flow_patterns *patterns,
mask->hdr.src_addr = match->wc.masks.nw_src;
mask->hdr.dst_addr = match->wc.masks.nw_dst;
+ consumed_masks->nw_tos = 0;
+ consumed_masks->nw_ttl = 0;
+ consumed_masks->nw_proto = 0;
+ consumed_masks->nw_src = 0;
+ consumed_masks->nw_dst = 0;
+
add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV4, spec, mask);
/* Save proto for L4 protocol setup. */
proto = spec->hdr.next_proto_id &
mask->hdr.next_proto_id;
- next_proto_mask = &mask->hdr.next_proto_id;
+ }
+ /* If fragmented, then don't HW accelerate - for now. */
+ if (match->wc.masks.nw_frag & match->flow.nw_frag) {
+ return -1;
+ }
+ consumed_masks->nw_frag = 0;
+
+ /* IP v6 */
+ if (match->flow.dl_type == htons(ETH_TYPE_IPV6)) {
+ struct rte_flow_item_ipv6 *spec, *mask;
+
+ spec = xzalloc(sizeof *spec);
+ mask = xzalloc(sizeof *mask);
+
+ spec->hdr.proto = match->flow.nw_proto;
+ spec->hdr.hop_limits = match->flow.nw_ttl;
+ spec->hdr.vtc_flow =
+ htonl((uint32_t) match->flow.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
+ memcpy(spec->hdr.src_addr, &match->flow.ipv6_src,
+ sizeof spec->hdr.src_addr);
+ memcpy(spec->hdr.dst_addr, &match->flow.ipv6_dst,
+ sizeof spec->hdr.dst_addr);
+
+ mask->hdr.proto = match->wc.masks.nw_proto;
+ mask->hdr.hop_limits = match->wc.masks.nw_ttl;
+ mask->hdr.vtc_flow =
+ htonl((uint32_t) match->wc.masks.nw_tos << RTE_IPV6_HDR_TC_SHIFT);
+ memcpy(mask->hdr.src_addr, &match->wc.masks.ipv6_src,
+ sizeof mask->hdr.src_addr);
+ memcpy(mask->hdr.dst_addr, &match->wc.masks.ipv6_dst,
+ sizeof mask->hdr.dst_addr);
+
+ consumed_masks->nw_proto = 0;
+ consumed_masks->nw_ttl = 0;
+ consumed_masks->nw_tos = 0;
+ memset(&consumed_masks->ipv6_src, 0, sizeof consumed_masks->ipv6_src);
+ memset(&consumed_masks->ipv6_dst, 0, sizeof consumed_masks->ipv6_dst);
+
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_IPV6, spec, mask);
+
+ /* Save proto for L4 protocol setup. */
+ proto = spec->hdr.proto & mask->hdr.proto;
}
if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP &&
@@ -644,11 +835,6 @@ parse_flow_match(struct flow_patterns *patterns,
return -1;
}
- if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) ||
- (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) {
- return -1;
- }
-
if (proto == IPPROTO_TCP) {
struct rte_flow_item_tcp *spec, *mask;
@@ -665,12 +851,11 @@ parse_flow_match(struct flow_patterns *patterns,
mask->hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8;
mask->hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff;
- add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask);
+ consumed_masks->tp_src = 0;
+ consumed_masks->tp_dst = 0;
+ consumed_masks->tcp_flags = 0;
- /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match. */
- if (next_proto_mask) {
- *next_proto_mask = 0;
- }
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_TCP, spec, mask);
} else if (proto == IPPROTO_UDP) {
struct rte_flow_item_udp *spec, *mask;
@@ -683,12 +868,10 @@ parse_flow_match(struct flow_patterns *patterns,
mask->hdr.src_port = match->wc.masks.tp_src;
mask->hdr.dst_port = match->wc.masks.tp_dst;
- add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask);
+ consumed_masks->tp_src = 0;
+ consumed_masks->tp_dst = 0;
- /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match. */
- if (next_proto_mask) {
- *next_proto_mask = 0;
- }
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_UDP, spec, mask);
} else if (proto == IPPROTO_SCTP) {
struct rte_flow_item_sctp *spec, *mask;
@@ -701,12 +884,10 @@ parse_flow_match(struct flow_patterns *patterns,
mask->hdr.src_port = match->wc.masks.tp_src;
mask->hdr.dst_port = match->wc.masks.tp_dst;
- add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask);
+ consumed_masks->tp_src = 0;
+ consumed_masks->tp_dst = 0;
- /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match. */
- if (next_proto_mask) {
- *next_proto_mask = 0;
- }
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_SCTP, spec, mask);
} else if (proto == IPPROTO_ICMP) {
struct rte_flow_item_icmp *spec, *mask;
@@ -719,16 +900,17 @@ parse_flow_match(struct flow_patterns *patterns,
mask->hdr.icmp_type = (uint8_t) ntohs(match->wc.masks.tp_src);
mask->hdr.icmp_code = (uint8_t) ntohs(match->wc.masks.tp_dst);
- add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask);
+ consumed_masks->tp_src = 0;
+ consumed_masks->tp_dst = 0;
- /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match. */
- if (next_proto_mask) {
- *next_proto_mask = 0;
- }
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ICMP, spec, mask);
}
add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL);
+ if (!is_all_zeros(consumed_masks, sizeof *consumed_masks)) {
+ return -1;
+ }
return 0;
}
@@ -825,15 +1007,14 @@ add_port_id_action(struct flow_actions *actions,
static int
add_output_action(struct netdev *netdev,
struct flow_actions *actions,
- const struct nlattr *nla,
- struct offload_info *info)
+ const struct nlattr *nla)
{
struct netdev *outdev;
odp_port_t port;
int ret = 0;
port = nl_attr_get_odp_port(nla);
- outdev = netdev_ports_get(port, info->dpif_class);
+ outdev = netdev_ports_get(port, netdev->dpif_type);
if (outdev == NULL) {
VLOG_DBG_RL(&rl, "Cannot find netdev for odp port %"PRIu32, port);
return -1;
@@ -889,6 +1070,12 @@ BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
+ MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_src));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv6) ==
+ MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_dst));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
+ MEMBER_SIZEOF(struct ovs_key_ipv6, ipv6_hlimit));
BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src));
BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
@@ -938,6 +1125,18 @@ parse_set_actions(struct flow_actions *actions,
VLOG_DBG_RL(&rl, "Unsupported IPv4 set action");
return -1;
}
+ } else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV6) {
+ const struct ovs_key_ipv6 *key = nl_attr_get(sa);
+ const struct ovs_key_ipv6 *mask = masked ? key + 1 : NULL;
+
+ add_set_flow_action(ipv6_src, RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC);
+ add_set_flow_action(ipv6_dst, RTE_FLOW_ACTION_TYPE_SET_IPV6_DST);
+ add_set_flow_action(ipv6_hlimit, RTE_FLOW_ACTION_TYPE_SET_TTL);
+
+ if (mask && !is_all_zeros(mask, sizeof *mask)) {
+ VLOG_DBG_RL(&rl, "Unsupported IPv6 set action");
+ return -1;
+ }
} else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) {
const struct ovs_key_tcp *key = nl_attr_get(sa);
const struct ovs_key_tcp *mask = masked ? key + 1 : NULL;
@@ -970,12 +1169,162 @@ parse_set_actions(struct flow_actions *actions,
return 0;
}
+/* Maximum number of items in struct rte_flow_action_vxlan_encap.
+ * ETH / IPv4(6) / UDP / VXLAN / END
+ */
+#define ACTION_VXLAN_ENCAP_ITEMS_NUM 5
+
+static int
+add_vxlan_encap_action(struct flow_actions *actions,
+ const void *header)
+{
+ const struct eth_header *eth;
+ const struct udp_header *udp;
+ struct vxlan_data {
+ struct rte_flow_action_vxlan_encap conf;
+ struct rte_flow_item items[ACTION_VXLAN_ENCAP_ITEMS_NUM];
+ } *vxlan_data;
+ BUILD_ASSERT_DECL(offsetof(struct vxlan_data, conf) == 0);
+ const void *vxlan;
+ const void *l3;
+ const void *l4;
+ int field;
+
+ vxlan_data = xzalloc(sizeof *vxlan_data);
+ field = 0;
+
+ eth = header;
+ /* Ethernet */
+ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_ETH;
+ vxlan_data->items[field].spec = eth;
+ vxlan_data->items[field].mask = &rte_flow_item_eth_mask;
+ field++;
+
+ l3 = eth + 1;
+ /* IP */
+ if (eth->eth_type == htons(ETH_TYPE_IP)) {
+ /* IPv4 */
+ const struct ip_header *ip = l3;
+
+ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV4;
+ vxlan_data->items[field].spec = ip;
+ vxlan_data->items[field].mask = &rte_flow_item_ipv4_mask;
+
+ if (ip->ip_proto != IPPROTO_UDP) {
+ goto err;
+ }
+ l4 = (ip + 1);
+ } else if (eth->eth_type == htons(ETH_TYPE_IPV6)) {
+ const struct ovs_16aligned_ip6_hdr *ip6 = l3;
+
+ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_IPV6;
+ vxlan_data->items[field].spec = ip6;
+ vxlan_data->items[field].mask = &rte_flow_item_ipv6_mask;
+
+ if (ip6->ip6_nxt != IPPROTO_UDP) {
+ goto err;
+ }
+ l4 = (ip6 + 1);
+ } else {
+ goto err;
+ }
+ field++;
+
+ udp = l4;
+ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_UDP;
+ vxlan_data->items[field].spec = udp;
+ vxlan_data->items[field].mask = &rte_flow_item_udp_mask;
+ field++;
+
+ vxlan = (udp + 1);
+ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_VXLAN;
+ vxlan_data->items[field].spec = vxlan;
+ vxlan_data->items[field].mask = &rte_flow_item_vxlan_mask;
+ field++;
+
+ vxlan_data->items[field].type = RTE_FLOW_ITEM_TYPE_END;
+
+ vxlan_data->conf.definition = vxlan_data->items;
+
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP, vxlan_data);
+
+ return 0;
+err:
+ free(vxlan_data);
+ return -1;
+}
+
+static int
+parse_vlan_push_action(struct flow_actions *actions,
+ const struct ovs_action_push_vlan *vlan_push)
+{
+ struct rte_flow_action_of_push_vlan *rte_push_vlan;
+ struct rte_flow_action_of_set_vlan_pcp *rte_vlan_pcp;
+ struct rte_flow_action_of_set_vlan_vid *rte_vlan_vid;
+
+ rte_push_vlan = xzalloc(sizeof *rte_push_vlan);
+ rte_push_vlan->ethertype = vlan_push->vlan_tpid;
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN, rte_push_vlan);
+
+ rte_vlan_pcp = xzalloc(sizeof *rte_vlan_pcp);
+ rte_vlan_pcp->vlan_pcp = vlan_tci_to_pcp(vlan_push->vlan_tci);
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
+ rte_vlan_pcp);
+
+ rte_vlan_vid = xzalloc(sizeof *rte_vlan_vid);
+ rte_vlan_vid->vlan_vid = htons(vlan_tci_to_vid(vlan_push->vlan_tci));
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
+ rte_vlan_vid);
+ return 0;
+}
+
+static int
+parse_clone_actions(struct netdev *netdev,
+ struct flow_actions *actions,
+ const struct nlattr *clone_actions,
+ const size_t clone_actions_len)
+{
+ const struct nlattr *ca;
+ unsigned int cleft;
+
+ NL_ATTR_FOR_EACH_UNSAFE (ca, cleft, clone_actions, clone_actions_len) {
+ int clone_type = nl_attr_type(ca);
+
+ if (clone_type == OVS_ACTION_ATTR_TUNNEL_PUSH) {
+ const struct ovs_action_push_tnl *tnl_push = nl_attr_get(ca);
+ struct rte_flow_action_raw_encap *raw_encap;
+
+ if (tnl_push->tnl_type == OVS_VPORT_TYPE_VXLAN &&
+ !add_vxlan_encap_action(actions, tnl_push->header)) {
+ continue;
+ }
+
+ raw_encap = xzalloc(sizeof *raw_encap);
+ raw_encap->data = (uint8_t *) tnl_push->header;
+ raw_encap->preserve = NULL;
+ raw_encap->size = tnl_push->header_len;
+
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
+ raw_encap);
+ } else if (clone_type == OVS_ACTION_ATTR_OUTPUT) {
+ if (add_output_action(netdev, actions, ca)) {
+ return -1;
+ }
+ } else {
+ VLOG_DBG_RL(&rl,
+ "Unsupported nested action inside clone(), "
+ "action type: %d", clone_type);
+ return -1;
+ }
+ }
+ return 0;
+}
+
static int
parse_flow_actions(struct netdev *netdev,
struct flow_actions *actions,
struct nlattr *nl_actions,
- size_t nl_actions_len,
- struct offload_info *info)
+ size_t nl_actions_len)
{
struct nlattr *nla;
size_t left;
@@ -983,7 +1332,7 @@ parse_flow_actions(struct netdev *netdev,
add_count_action(actions);
NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
- if (add_output_action(netdev, actions, nla, info)) {
+ if (add_output_action(netdev, actions, nla)) {
return -1;
}
} else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
@@ -998,6 +1347,23 @@ parse_flow_actions(struct netdev *netdev,
masked)) {
return -1;
}
+ } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_PUSH_VLAN) {
+ const struct ovs_action_push_vlan *vlan = nl_attr_get(nla);
+
+ if (parse_vlan_push_action(actions, vlan)) {
+ return -1;
+ }
+ } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_POP_VLAN) {
+ add_flow_action(actions, RTE_FLOW_ACTION_TYPE_OF_POP_VLAN, NULL);
+ } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_CLONE &&
+ left <= NLA_ALIGN(nla->nla_len)) {
+ const struct nlattr *clone_actions = nl_attr_get(nla);
+ size_t clone_actions_len = nl_attr_get_size(nla);
+
+ if (parse_clone_actions(netdev, actions, clone_actions,
+ clone_actions_len)) {
+ return -1;
+ }
} else {
VLOG_DBG_RL(&rl, "Unsupported action type %d", nl_attr_type(nla));
return -1;
@@ -1017,8 +1383,7 @@ static struct rte_flow *
netdev_offload_dpdk_actions(struct netdev *netdev,
struct flow_patterns *patterns,
struct nlattr *nl_actions,
- size_t actions_len,
- struct offload_info *info)
+ size_t actions_len)
{
const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
struct flow_actions actions = { .actions = NULL, .cnt = 0 };
@@ -1026,7 +1391,7 @@ netdev_offload_dpdk_actions(struct netdev *netdev,
struct rte_flow_error error;
int ret;
- ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len, info);
+ ret = parse_flow_actions(netdev, &actions, nl_actions, actions_len);
if (ret) {
goto out;
}
@@ -1037,26 +1402,27 @@ out:
return flow;
}
-static int
+static struct ufid_to_rte_flow_data *
netdev_offload_dpdk_add_flow(struct netdev *netdev,
- const struct match *match,
+ struct match *match,
struct nlattr *nl_actions,
size_t actions_len,
const ovs_u128 *ufid,
struct offload_info *info)
{
struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
+ struct ufid_to_rte_flow_data *flows_data = NULL;
bool actions_offloaded = true;
struct rte_flow *flow;
- int ret = 0;
- ret = parse_flow_match(&patterns, match);
- if (ret) {
+ if (parse_flow_match(&patterns, match)) {
+ VLOG_DBG_RL(&rl, "%s: matches of ufid "UUID_FMT" are not supported",
+ netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid));
goto out;
}
flow = netdev_offload_dpdk_actions(netdev, &patterns, nl_actions,
- actions_len, info);
+ actions_len);
if (!flow) {
/* If we failed to offload the rule actions fallback to MARK+RSS
* actions.
@@ -1067,88 +1433,15 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
}
if (!flow) {
- ret = -1;
goto out;
}
- ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
- VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n",
+ flows_data = ufid_to_rte_flow_associate(ufid, flow, actions_offloaded);
+ VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT,
netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid));
out:
free_flow_patterns(&patterns);
- return ret;
-}
-
-/*
- * Check if any unsupported flow patterns are specified.
- */
-static int
-netdev_offload_dpdk_validate_flow(const struct match *match)
-{
- struct match match_zero_wc;
- const struct flow *masks = &match->wc.masks;
-
- /* Create a wc-zeroed version of flow. */
- match_init(&match_zero_wc, &match->flow, &match->wc);
-
- if (!is_all_zeros(&match_zero_wc.flow.tunnel,
- sizeof match_zero_wc.flow.tunnel)) {
- goto err;
- }
-
- if (masks->metadata || masks->skb_priority ||
- masks->pkt_mark || masks->dp_hash) {
- goto err;
- }
-
- /* recirc id must be zero. */
- if (match_zero_wc.flow.recirc_id) {
- goto err;
- }
-
- if (masks->ct_state || masks->ct_nw_proto ||
- masks->ct_zone || masks->ct_mark ||
- !ovs_u128_is_zero(masks->ct_label)) {
- goto err;
- }
-
- if (masks->conj_id || masks->actset_output) {
- goto err;
- }
-
- /* Unsupported L2. */
- if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) {
- goto err;
- }
-
- /* Unsupported L3. */
- if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst ||
- !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) ||
- !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) ||
- !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) ||
- !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) ||
- !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) ||
- !is_all_zeros(&masks->nsh, sizeof masks->nsh) ||
- !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) ||
- !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) {
- goto err;
- }
-
- /* If fragmented, then don't HW accelerate - for now. */
- if (match_zero_wc.flow.nw_frag) {
- goto err;
- }
-
- /* Unsupported L4. */
- if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) {
- goto err;
- }
-
- return 0;
-
-err:
- VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols");
- return -1;
+ return flows_data;
}
static int
@@ -1161,12 +1454,15 @@ netdev_offload_dpdk_destroy_flow(struct netdev *netdev,
if (ret == 0) {
ufid_to_rte_flow_disassociate(ufid);
- VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n",
- netdev_get_name(netdev), rte_flow,
- UUID_ARGS((struct uuid *)ufid));
+ VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR
+ " flow destroy %d ufid " UUID_FMT,
+ netdev_get_name(netdev), (intptr_t) rte_flow,
+ netdev_dpdk_get_port_id(netdev),
+ UUID_ARGS((struct uuid *) ufid));
} else {
- VLOG_ERR("%s: Failed to destroy flow: %s (%u)\n",
- netdev_get_name(netdev), error.message, error.type);
+ VLOG_ERR("Failed flow: %s: flow destroy %d ufid " UUID_FMT,
+ netdev_get_name(netdev), netdev_dpdk_get_port_id(netdev),
+ UUID_ARGS((struct uuid *) ufid));
}
return ret;
@@ -1179,14 +1475,19 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
struct dpif_flow_stats *stats)
{
struct ufid_to_rte_flow_data *rte_flow_data;
+ struct dpif_flow_stats old_stats;
+ bool modification = false;
int ret;
/*
* If an old rte_flow exists, it means it's a flow modification.
* Here destroy the old rte flow first before adding a new one.
+ * Keep the stats for the newly created rule.
*/
rte_flow_data = ufid_to_rte_flow_data_find(ufid);
if (rte_flow_data && rte_flow_data->rte_flow) {
+ old_stats = rte_flow_data->stats;
+ modification = true;
ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
rte_flow_data->rte_flow);
if (ret < 0) {
@@ -1194,16 +1495,18 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, struct match *match,
}
}
- ret = netdev_offload_dpdk_validate_flow(match);
- if (ret < 0) {
- return ret;
+ rte_flow_data = netdev_offload_dpdk_add_flow(netdev, match, actions,
+ actions_len, ufid, info);
+ if (!rte_flow_data) {
+ return -1;
+ }
+ if (modification) {
+ rte_flow_data->stats = old_stats;
}
-
if (stats) {
- memset(stats, 0, sizeof *stats);
+ *stats = rte_flow_data->stats;
}
- return netdev_offload_dpdk_add_flow(netdev, match, actions,
- actions_len, ufid, info);
+ return 0;
}
static int
@@ -1260,7 +1563,7 @@ netdev_offload_dpdk_flow_get(struct netdev *netdev,
ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
&query, &error);
if (ret) {
- VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p\n",
+ VLOG_DBG_RL(&rl, "%s: Failed to query ufid "UUID_FMT" flow: %p",
netdev_get_name(netdev), UUID_ARGS((struct uuid *) ufid),
rte_flow_data->rte_flow);
goto out;
diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
index 5a809c0cdf1fbe8197763fc3aeb265942a5db24b..cf859d1b439261b811334fd11c494c886856a998 100644
--- a/lib/netdev-offload-provider.h
+++ b/lib/netdev-offload-provider.h
@@ -42,7 +42,8 @@ struct netdev_flow_api {
*
* On success returns 0 and allocates data, on failure returns
* positive errno. */
- int (*flow_dump_create)(struct netdev *, struct netdev_flow_dump **dump);
+ int (*flow_dump_create)(struct netdev *, struct netdev_flow_dump **dump,
+ bool terse);
int (*flow_dump_destroy)(struct netdev_flow_dump *);
/* Returns true if there are more flows to dump.
@@ -82,6 +83,10 @@ struct netdev_flow_api {
int (*flow_del)(struct netdev *, const ovs_u128 *ufid,
struct dpif_flow_stats *);
+ /* Get the number of flows offloaded to netdev.
+ * Return 0 if successful, otherwise returns a positive errno value. */
+ int (*flow_get_n_flows)(struct netdev *, uint64_t *n_flows);
+
/* Initializies the netdev flow api.
* Return 0 if successful, otherwise returns a positive errno value. */
int (*init_flow_api)(struct netdev *);
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 550e440b3a45a5291150c72e4639c40ad96688fe..717a987d14d87d25fb20cd3a9dae481c72b16f8c 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -198,7 +198,9 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
int err;
err = tc_del_filter(id);
- del_ufid_tc_mapping(ufid);
+ if (!err) {
+ del_ufid_tc_mapping(ufid);
+ }
return err;
}
@@ -366,7 +368,8 @@ netdev_tc_flow_flush(struct netdev *netdev)
static int
netdev_tc_flow_dump_create(struct netdev *netdev,
- struct netdev_flow_dump **dump_out)
+ struct netdev_flow_dump **dump_out,
+ bool terse)
{
enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev);
struct netdev_flow_dump *dump;
@@ -386,9 +389,10 @@ netdev_tc_flow_dump_create(struct netdev *netdev,
dump = xzalloc(sizeof *dump);
dump->nl_dump = xzalloc(sizeof *dump->nl_dump);
dump->netdev = netdev_ref(netdev);
+ dump->terse = terse;
id = tc_make_tcf_id(ifindex, block_id, prio, hook);
- tc_dump_flower_start(&id, dump->nl_dump);
+ tc_dump_flower_start(&id, dump->nl_dump, terse);
*dump_out = dump;
@@ -502,13 +506,53 @@ flower_tun_opt_to_match(struct match *match, struct tc_flower *flower)
match->wc.masks.tunnel.flags |= FLOW_TNL_F_UDPIF;
}
+static void
+parse_tc_flower_to_stats(struct tc_flower *flower,
+ struct dpif_flow_stats *stats)
+{
+ if (!stats) {
+ return;
+ }
+
+ memset(stats, 0, sizeof *stats);
+ stats->n_packets = get_32aligned_u64(&flower->stats.n_packets);
+ stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes);
+ stats->used = flower->lastused;
+}
+
+static void
+parse_tc_flower_to_attrs(struct tc_flower *flower,
+ struct dpif_flow_attrs *attrs)
+{
+ attrs->offloaded = (flower->offloaded_state == TC_OFFLOADED_STATE_IN_HW ||
+ flower->offloaded_state ==
+ TC_OFFLOADED_STATE_UNDEFINED);
+ attrs->dp_layer = "tc";
+ attrs->dp_extra_info = NULL;
+}
+
+static int
+parse_tc_flower_terse_to_match(struct tc_flower *flower,
+ struct match *match,
+ struct dpif_flow_stats *stats,
+ struct dpif_flow_attrs *attrs)
+{
+ match_init_catchall(match);
+
+ parse_tc_flower_to_stats(flower, stats);
+ parse_tc_flower_to_attrs(flower, attrs);
+
+ return 0;
+}
+
static int
parse_tc_flower_to_match(struct tc_flower *flower,
struct match *match,
struct nlattr **actions,
struct dpif_flow_stats *stats,
struct dpif_flow_attrs *attrs,
- struct ofpbuf *buf)
+ struct ofpbuf *buf,
+ bool terse)
{
size_t act_off;
struct tc_flower_key *key = &flower->key;
@@ -517,6 +561,10 @@ parse_tc_flower_to_match(struct tc_flower *flower,
struct tc_action *action;
int i;
+ if (terse) {
+ return parse_tc_flower_terse_to_match(flower, match, stats, attrs);
+ }
+
ofpbuf_clear(buf);
match_init_catchall(match);
@@ -543,6 +591,14 @@ parse_tc_flower_to_match(struct tc_flower *flower,
match->flow.mpls_lse[0] = key->mpls_lse & mask->mpls_lse;
match->wc.masks.mpls_lse[0] = mask->mpls_lse;
match_set_dl_type(match, key->encap_eth_type[0]);
+ } else if (key->eth_type == htons(ETH_TYPE_ARP)) {
+ match_set_arp_sha_masked(match, key->arp.sha, mask->arp.sha);
+ match_set_arp_tha_masked(match, key->arp.tha, mask->arp.tha);
+ match_set_arp_spa_masked(match, key->arp.spa, mask->arp.spa);
+ match_set_arp_tpa_masked(match, key->arp.tpa, mask->arp.tpa);
+ match_set_arp_opcode_masked(match, key->arp.opcode,
+ mask->arp.opcode);
+ match_set_dl_type(match, key->eth_type);
} else {
match_set_dl_type(match, key->eth_type);
}
@@ -633,13 +689,22 @@ parse_tc_flower_to_match(struct tc_flower *flower,
match_set_tun_id(match, flower->key.tunnel.id);
match->flow.tunnel.flags |= FLOW_TNL_F_KEY;
}
- if (flower->key.tunnel.ipv4.ipv4_dst) {
- match_set_tun_src(match, flower->key.tunnel.ipv4.ipv4_src);
- match_set_tun_dst(match, flower->key.tunnel.ipv4.ipv4_dst);
- } else if (!is_all_zeros(&flower->key.tunnel.ipv6.ipv6_dst,
- sizeof flower->key.tunnel.ipv6.ipv6_dst)) {
- match_set_tun_ipv6_src(match, &flower->key.tunnel.ipv6.ipv6_src);
- match_set_tun_ipv6_dst(match, &flower->key.tunnel.ipv6.ipv6_dst);
+ if (flower->mask.tunnel.ipv4.ipv4_dst ||
+ flower->mask.tunnel.ipv4.ipv4_src) {
+ match_set_tun_dst_masked(match,
+ flower->key.tunnel.ipv4.ipv4_dst,
+ flower->mask.tunnel.ipv4.ipv4_dst);
+ match_set_tun_src_masked(match,
+ flower->key.tunnel.ipv4.ipv4_src,
+ flower->mask.tunnel.ipv4.ipv4_src);
+ } else if (ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_dst) ||
+ ipv6_addr_is_set(&flower->mask.tunnel.ipv6.ipv6_src)) {
+ match_set_tun_ipv6_dst_masked(match,
+ &flower->key.tunnel.ipv6.ipv6_dst,
+ &flower->mask.tunnel.ipv6.ipv6_dst);
+ match_set_tun_ipv6_src_masked(match,
+ &flower->key.tunnel.ipv6.ipv6_src,
+ &flower->mask.tunnel.ipv6.ipv6_src);
}
if (flower->key.tunnel.tos) {
match_set_tun_tos_masked(match, flower->key.tunnel.tos,
@@ -734,13 +799,11 @@ parse_tc_flower_to_match(struct tc_flower *flower,
nl_msg_put_be32(buf, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
action->encap.ipv4.ipv4_dst);
}
- if (!is_all_zeros(&action->encap.ipv6.ipv6_src,
- sizeof action->encap.ipv6.ipv6_src)) {
+ if (ipv6_addr_is_set(&action->encap.ipv6.ipv6_src)) {
nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
&action->encap.ipv6.ipv6_src);
}
- if (!is_all_zeros(&action->encap.ipv6.ipv6_dst,
- sizeof action->encap.ipv6.ipv6_dst)) {
+ if (ipv6_addr_is_set(&action->encap.ipv6.ipv6_dst)) {
nl_msg_put_in6_addr(buf, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
&action->encap.ipv6.ipv6_dst);
}
@@ -863,17 +926,8 @@ parse_tc_flower_to_match(struct tc_flower *flower,
*actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
- if (stats) {
- memset(stats, 0, sizeof *stats);
- stats->n_packets = get_32aligned_u64(&flower->stats.n_packets);
- stats->n_bytes = get_32aligned_u64(&flower->stats.n_bytes);
- stats->used = flower->lastused;
- }
-
- attrs->offloaded = (flower->offloaded_state == TC_OFFLOADED_STATE_IN_HW)
- || (flower->offloaded_state == TC_OFFLOADED_STATE_UNDEFINED);
- attrs->dp_layer = "tc";
- attrs->dp_extra_info = NULL;
+ parse_tc_flower_to_stats(flower, stats);
+ parse_tc_flower_to_attrs(flower, attrs);
return 0;
}
@@ -900,12 +954,12 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
while (nl_dump_next(dump->nl_dump, &nl_flow, rbuffer)) {
struct tc_flower flower;
- if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower)) {
+ if (parse_netlink_to_tc_flower(&nl_flow, &id, &flower, dump->terse)) {
continue;
}
if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs,
- wbuffer)) {
+ wbuffer, dump->terse)) {
continue;
}
@@ -1388,7 +1442,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
chain = key->recirc_id;
mask->recirc_id = 0;
- if (flow_tnl_dst_is_set(&key->tunnel)) {
+ if (flow_tnl_dst_is_set(&key->tunnel) ||
+ flow_tnl_src_is_set(&key->tunnel)) {
VLOG_DBG_RL(&rl,
"tunnel: id %#" PRIx64 " src " IP_FMT
" dst " IP_FMT " tp_src %d tp_dst %d",
@@ -1404,6 +1459,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
flower.key.tunnel.ttl = tnl->ip_ttl;
flower.key.tunnel.tp_src = tnl->tp_src;
flower.key.tunnel.tp_dst = tnl->tp_dst;
+ flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src;
+ flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst;
+ flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src;
+ flower.mask.tunnel.ipv6.ipv6_dst = tnl_mask->ipv6_dst;
flower.mask.tunnel.tos = tnl_mask->ip_tos;
flower.mask.tunnel.ttl = tnl_mask->ip_ttl;
flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? tnl_mask->tun_id : 0;
@@ -1500,6 +1559,25 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
mask->dl_type = 0;
mask->in_port.odp_port = 0;
+ if (key->dl_type == htons(ETH_P_ARP)) {
+ flower.key.arp.spa = key->nw_src;
+ flower.key.arp.tpa = key->nw_dst;
+ flower.key.arp.sha = key->arp_sha;
+ flower.key.arp.tha = key->arp_tha;
+ flower.key.arp.opcode = key->nw_proto;
+ flower.mask.arp.spa = mask->nw_src;
+ flower.mask.arp.tpa = mask->nw_dst;
+ flower.mask.arp.sha = mask->arp_sha;
+ flower.mask.arp.tha = mask->arp_tha;
+ flower.mask.arp.opcode = mask->nw_proto;
+
+ mask->nw_src = 0;
+ mask->nw_dst = 0;
+ mask->nw_proto = 0;
+ memset(&mask->arp_sha, 0, sizeof mask->arp_sha);
+ memset(&mask->arp_tha, 0, sizeof mask->arp_tha);
+ }
+
if (is_ip_any(key)) {
flower.key.ip_proto = key->nw_proto;
flower.mask.ip_proto = mask->nw_proto;
@@ -1638,7 +1716,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
action = &flower.actions[flower.action_count];
if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
odp_port_t port = nl_attr_get_odp_port(nla);
- struct netdev *outdev = netdev_ports_get(port, info->dpif_class);
+ struct netdev *outdev = netdev_ports_get(
+ port, netdev_get_dpif_type(netdev));
if (!outdev) {
VLOG_DBG_RL(&rl, "Can't find netdev for output port %d", port);
@@ -1712,6 +1791,10 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
action->chain = nl_attr_get_u32(nla);
flower.action_count++;
recirc_act = true;
+ } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
+ action->type = TC_ACT_GOTO;
+ action->chain = 0; /* 0 is reserved and not used by recirc. */
+ flower.action_count++;
} else {
VLOG_DBG_RL(&rl, "unsupported put action type: %d",
nl_attr_type(nla));
@@ -1727,7 +1810,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match,
if (get_ufid_tc_mapping(ufid, &id) == 0) {
VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
id.handle, id.prio);
- del_filter_and_ufid_mapping(&id, ufid);
+ info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id, ufid);
}
prio = get_prio_for_tc_flower(&flower);
@@ -1784,7 +1867,7 @@ netdev_tc_flow_get(struct netdev *netdev,
}
in_port = netdev_ifindex_to_odp_port(id.ifindex);
- parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf);
+ parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf, false);
match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
match->flow.in_port.odp_port = in_port;
@@ -1821,6 +1904,24 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
return error;
}
+static int
+netdev_tc_get_n_flows(struct netdev *netdev, uint64_t *n_flows)
+{
+ struct ufid_tc_data *data;
+ uint64_t total = 0;
+
+ ovs_mutex_lock(&ufid_lock);
+ HMAP_FOR_EACH (data, tc_to_ufid_node, &tc_to_ufid) {
+ if (data->netdev == netdev) {
+ total++;
+ }
+ }
+ ovs_mutex_unlock(&ufid_lock);
+
+ *n_flows = total;
+ return 0;
+}
+
static void
probe_multi_mask_per_prio(int ifindex)
{
@@ -1837,6 +1938,7 @@ probe_multi_mask_per_prio(int ifindex)
memset(&flower, 0, sizeof flower);
+ flower.tc_policy = TC_POLICY_SKIP_HW;
flower.key.eth_type = htons(ETH_P_IP);
flower.mask.eth_type = OVS_BE16_MAX;
memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
@@ -1884,6 +1986,7 @@ probe_tc_block_support(int ifindex)
memset(&flower, 0, sizeof flower);
+ flower.tc_policy = TC_POLICY_SKIP_HW;
flower.key.eth_type = htons(ETH_P_IP);
flower.mask.eth_type = OVS_BE16_MAX;
memset(&flower.key.dst_mac, 0x11, sizeof flower.key.dst_mac);
@@ -1903,10 +2006,10 @@ probe_tc_block_support(int ifindex)
static int
netdev_tc_init_flow_api(struct netdev *netdev)
{
- static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER;
- static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER;
+ static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
enum tc_qdisc_hook hook = get_tc_qdisc_hook(netdev);
uint32_t block_id = 0;
+ struct tcf_id id;
int ifindex;
int error;
@@ -1917,20 +2020,26 @@ netdev_tc_init_flow_api(struct netdev *netdev)
return -ifindex;
}
+ block_id = get_block_id_from_netdev(netdev);
+
+ /* Flush rules explicitly needed when we work with ingress_block,
+ * so we will not fail with reattaching block to bond iface, for ex.
+ */
+ id = tc_make_tcf_id(ifindex, block_id, 0, hook);
+ tc_del_filter(&id);
+
/* make sure there is no ingress/egress qdisc */
tc_add_del_qdisc(ifindex, false, 0, hook);
- if (ovsthread_once_start(&block_once)) {
+ if (ovsthread_once_start(&once)) {
probe_tc_block_support(ifindex);
- ovsthread_once_done(&block_once);
- }
+ /* Need to re-fetch block id as it depends on feature availability. */
+ block_id = get_block_id_from_netdev(netdev);
- if (ovsthread_once_start(&multi_mask_once)) {
probe_multi_mask_per_prio(ifindex);
- ovsthread_once_done(&multi_mask_once);
+ ovsthread_once_done(&once);
}
- block_id = get_block_id_from_netdev(netdev);
error = tc_add_del_qdisc(ifindex, true, block_id, hook);
if (error && error != EEXIST) {
@@ -1953,5 +2062,6 @@ const struct netdev_flow_api netdev_offload_tc = {
.flow_put = netdev_tc_flow_put,
.flow_get = netdev_tc_flow_get,
.flow_del = netdev_tc_flow_del,
+ .flow_get_n_flows = netdev_tc_get_n_flows,
.init_flow_api = netdev_tc_init_flow_api,
};
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 32eab5910760f1123d8809c0ac3d370f4499c436..6237667c307ba7b30640d68e35da8b58960fb90c 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -201,13 +201,14 @@ netdev_flow_flush(struct netdev *netdev)
}
int
-netdev_flow_dump_create(struct netdev *netdev, struct netdev_flow_dump **dump)
+netdev_flow_dump_create(struct netdev *netdev, struct netdev_flow_dump **dump,
+ bool terse)
{
const struct netdev_flow_api *flow_api =
ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
return (flow_api && flow_api->flow_dump_create)
- ? flow_api->flow_dump_create(netdev, dump)
+ ? flow_api->flow_dump_create(netdev, dump, terse)
: EOPNOTSUPP;
}
@@ -279,6 +280,17 @@ netdev_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
: EOPNOTSUPP;
}
+int
+netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows)
+{
+ const struct netdev_flow_api *flow_api =
+ ovsrcu_get(const struct netdev_flow_api *, &netdev->flow_api);
+
+ return (flow_api && flow_api->flow_get_n_flows)
+ ? flow_api->flow_get_n_flows(netdev, n_flows)
+ : EOPNOTSUPP;
+}
+
int
netdev_init_flow_api(struct netdev *netdev)
{
@@ -382,11 +394,10 @@ static struct hmap ifindex_to_port OVS_GUARDED_BY(netdev_hmap_rwlock)
= HMAP_INITIALIZER(&ifindex_to_port);
struct port_to_netdev_data {
- struct hmap_node portno_node; /* By (dpif_class, dpif_port.port_no). */
- struct hmap_node ifindex_node; /* By (dpif_class, ifindex). */
+ struct hmap_node portno_node; /* By (dpif_type, dpif_port.port_no). */
+ struct hmap_node ifindex_node; /* By (dpif_type, ifindex). */
struct netdev *netdev;
struct dpif_port dpif_port;
- const struct dpif_class *dpif_class;
int ifindex;
};
@@ -422,13 +433,13 @@ netdev_is_flow_api_enabled(void)
}
void
-netdev_ports_flow_flush(const struct dpif_class *dpif_class)
+netdev_ports_flow_flush(const char *dpif_type)
{
struct port_to_netdev_data *data;
ovs_rwlock_rdlock(&netdev_hmap_rwlock);
HMAP_FOR_EACH (data, portno_node, &port_to_netdev) {
- if (data->dpif_class == dpif_class) {
+ if (netdev_get_dpif_type(data->netdev) == dpif_type) {
netdev_flow_flush(data->netdev);
}
}
@@ -436,7 +447,7 @@ netdev_ports_flow_flush(const struct dpif_class *dpif_class)
}
struct netdev_flow_dump **
-netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports)
+netdev_ports_flow_dump_create(const char *dpif_type, int *ports, bool terse)
{
struct port_to_netdev_data *data;
struct netdev_flow_dump **dumps;
@@ -445,7 +456,7 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports)
ovs_rwlock_rdlock(&netdev_hmap_rwlock);
HMAP_FOR_EACH (data, portno_node, &port_to_netdev) {
- if (data->dpif_class == dpif_class) {
+ if (netdev_get_dpif_type(data->netdev) == dpif_type) {
count++;
}
}
@@ -453,8 +464,8 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports)
dumps = count ? xzalloc(sizeof *dumps * count) : NULL;
HMAP_FOR_EACH (data, portno_node, &port_to_netdev) {
- if (data->dpif_class == dpif_class) {
- if (netdev_flow_dump_create(data->netdev, &dumps[i])) {
+ if (netdev_get_dpif_type(data->netdev) == dpif_type) {
+ if (netdev_flow_dump_create(data->netdev, &dumps[i], terse)) {
continue;
}
@@ -469,15 +480,14 @@ netdev_ports_flow_dump_create(const struct dpif_class *dpif_class, int *ports)
}
int
-netdev_ports_flow_del(const struct dpif_class *dpif_class,
- const ovs_u128 *ufid,
+netdev_ports_flow_del(const char *dpif_type, const ovs_u128 *ufid,
struct dpif_flow_stats *stats)
{
struct port_to_netdev_data *data;
ovs_rwlock_rdlock(&netdev_hmap_rwlock);
HMAP_FOR_EACH (data, portno_node, &port_to_netdev) {
- if (data->dpif_class == dpif_class
+ if (netdev_get_dpif_type(data->netdev) == dpif_type
&& !netdev_flow_del(data->netdev, ufid, stats)) {
ovs_rwlock_unlock(&netdev_hmap_rwlock);
return 0;
@@ -489,7 +499,7 @@ netdev_ports_flow_del(const struct dpif_class *dpif_class,
}
int
-netdev_ports_flow_get(const struct dpif_class *dpif_class, struct match *match,
+netdev_ports_flow_get(const char *dpif_type, struct match *match,
struct nlattr **actions, const ovs_u128 *ufid,
struct dpif_flow_stats *stats,
struct dpif_flow_attrs *attrs, struct ofpbuf *buf)
@@ -498,7 +508,7 @@ netdev_ports_flow_get(const struct dpif_class *dpif_class, struct match *match,
ovs_rwlock_rdlock(&netdev_hmap_rwlock);
HMAP_FOR_EACH (data, portno_node, &port_to_netdev) {
- if (data->dpif_class == dpif_class
+ if (netdev_get_dpif_type(data->netdev) == dpif_type
&& !netdev_flow_get(data->netdev, match, actions,
ufid, stats, attrs, buf)) {
ovs_rwlock_unlock(&netdev_hmap_rwlock);
@@ -510,21 +520,21 @@ netdev_ports_flow_get(const struct dpif_class *dpif_class, struct match *match,
}
static uint32_t
-netdev_ports_hash(odp_port_t port, const struct dpif_class *dpif_class)
+netdev_ports_hash(odp_port_t port, const char *dpif_type)
{
- return hash_int(odp_to_u32(port), hash_pointer(dpif_class, 0));
+ return hash_int(odp_to_u32(port), hash_pointer(dpif_type, 0));
}
static struct port_to_netdev_data *
-netdev_ports_lookup(odp_port_t port_no, const struct dpif_class *dpif_class)
+netdev_ports_lookup(odp_port_t port_no, const char *dpif_type)
OVS_REQ_RDLOCK(netdev_hmap_rwlock)
{
struct port_to_netdev_data *data;
HMAP_FOR_EACH_WITH_HASH (data, portno_node,
- netdev_ports_hash(port_no, dpif_class),
+ netdev_ports_hash(port_no, dpif_type),
&port_to_netdev) {
- if (data->dpif_class == dpif_class
+ if (netdev_get_dpif_type(data->netdev) == dpif_type
&& data->dpif_port.port_no == port_no) {
return data;
}
@@ -533,7 +543,7 @@ netdev_ports_lookup(odp_port_t port_no, const struct dpif_class *dpif_class)
}
int
-netdev_ports_insert(struct netdev *netdev, const struct dpif_class *dpif_class,
+netdev_ports_insert(struct netdev *netdev, const char *dpif_type,
struct dpif_port *dpif_port)
{
struct port_to_netdev_data *data;
@@ -544,19 +554,20 @@ netdev_ports_insert(struct netdev *netdev, const struct dpif_class *dpif_class,
}
ovs_rwlock_wrlock(&netdev_hmap_rwlock);
- if (netdev_ports_lookup(dpif_port->port_no, dpif_class)) {
+ if (netdev_ports_lookup(dpif_port->port_no, dpif_type)) {
ovs_rwlock_unlock(&netdev_hmap_rwlock);
return EEXIST;
}
data = xzalloc(sizeof *data);
data->netdev = netdev_ref(netdev);
- data->dpif_class = dpif_class;
dpif_port_clone(&data->dpif_port, dpif_port);
data->ifindex = ifindex;
+ netdev_set_dpif_type(netdev, dpif_type);
+
hmap_insert(&port_to_netdev, &data->portno_node,
- netdev_ports_hash(dpif_port->port_no, dpif_class));
+ netdev_ports_hash(dpif_port->port_no, dpif_type));
hmap_insert(&ifindex_to_port, &data->ifindex_node, ifindex);
ovs_rwlock_unlock(&netdev_hmap_rwlock);
@@ -566,13 +577,13 @@ netdev_ports_insert(struct netdev *netdev, const struct dpif_class *dpif_class,
}
struct netdev *
-netdev_ports_get(odp_port_t port_no, const struct dpif_class *dpif_class)
+netdev_ports_get(odp_port_t port_no, const char *dpif_type)
{
struct port_to_netdev_data *data;
struct netdev *ret = NULL;
ovs_rwlock_rdlock(&netdev_hmap_rwlock);
- data = netdev_ports_lookup(port_no, dpif_class);
+ data = netdev_ports_lookup(port_no, dpif_type);
if (data) {
ret = netdev_ref(data->netdev);
}
@@ -582,13 +593,13 @@ netdev_ports_get(odp_port_t port_no, const struct dpif_class *dpif_class)
}
int
-netdev_ports_remove(odp_port_t port_no, const struct dpif_class *dpif_class)
+netdev_ports_remove(odp_port_t port_no, const char *dpif_type)
{
struct port_to_netdev_data *data;
int ret = ENOENT;
ovs_rwlock_wrlock(&netdev_hmap_rwlock);
- data = netdev_ports_lookup(port_no, dpif_class);
+ data = netdev_ports_lookup(port_no, dpif_type);
if (data) {
dpif_port_destroy(&data->dpif_port);
netdev_close(data->netdev); /* unref and possibly close */
@@ -602,6 +613,22 @@ netdev_ports_remove(odp_port_t port_no, const struct dpif_class *dpif_class)
return ret;
}
+int
+netdev_ports_get_n_flows(const char *dpif_type, odp_port_t port_no,
+ uint64_t *n_flows)
+{
+ struct port_to_netdev_data *data;
+ int ret = EOPNOTSUPP;
+
+ ovs_rwlock_rdlock(&netdev_hmap_rwlock);
+ data = netdev_ports_lookup(port_no, dpif_type);
+ if (data) {
+ ret = netdev_flow_get_n_flows(data->netdev, n_flows);
+ }
+ ovs_rwlock_unlock(&netdev_hmap_rwlock);
+ return ret;
+}
+
odp_port_t
netdev_ifindex_to_odp_port(int ifindex)
{
diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
index cd6dfdfff4ca008a44cd7a37301528f981b60477..18b48790f5f67c7e4391f703a011ae31c0dcf463 100644
--- a/lib/netdev-offload.h
+++ b/lib/netdev-offload.h
@@ -62,7 +62,6 @@ struct netdev_flow_dump {
/* Flow offloading. */
struct offload_info {
- const struct dpif_class *dpif_class;
ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */
uint8_t tunnel_csum_on; /* Tunnel header with checksum */
@@ -74,10 +73,14 @@ struct offload_info {
* it will be in the pkt meta data.
*/
uint32_t flow_mark;
+
+ bool tc_modify_flow_deleted; /* Indicate the tc modify flow put success
+ * to delete the original flow. */
};
int netdev_flow_flush(struct netdev *);
-int netdev_flow_dump_create(struct netdev *, struct netdev_flow_dump **dump);
+int netdev_flow_dump_create(struct netdev *, struct netdev_flow_dump **dump,
+ bool terse);
int netdev_flow_dump_destroy(struct netdev_flow_dump *);
bool netdev_flow_dump_next(struct netdev_flow_dump *, struct match *,
struct nlattr **actions, struct dpif_flow_stats *,
@@ -100,27 +103,30 @@ bool netdev_any_oor(void);
bool netdev_is_flow_api_enabled(void);
void netdev_set_flow_api_enabled(const struct smap *ovs_other_config);
bool netdev_is_offload_rebalance_policy_enabled(void);
+int netdev_flow_get_n_flows(struct netdev *netdev, uint64_t *n_flows);
-struct dpif_class;
struct dpif_port;
-int netdev_ports_insert(struct netdev *, const struct dpif_class *,
+int netdev_ports_insert(struct netdev *, const char *dpif_type,
struct dpif_port *);
-struct netdev *netdev_ports_get(odp_port_t port, const struct dpif_class *);
-int netdev_ports_remove(odp_port_t port, const struct dpif_class *);
+struct netdev *netdev_ports_get(odp_port_t port, const char *dpif_type);
+int netdev_ports_remove(odp_port_t port, const char *dpif_type);
odp_port_t netdev_ifindex_to_odp_port(int ifindex);
struct netdev_flow_dump **netdev_ports_flow_dump_create(
- const struct dpif_class *,
- int *ports);
-void netdev_ports_flow_flush(const struct dpif_class *);
-int netdev_ports_flow_del(const struct dpif_class *, const ovs_u128 *ufid,
+ const char *dpif_type,
+ int *ports,
+ bool terse);
+void netdev_ports_flow_flush(const char *dpif_type);
+int netdev_ports_flow_del(const char *dpif_type, const ovs_u128 *ufid,
struct dpif_flow_stats *stats);
-int netdev_ports_flow_get(const struct dpif_class *, struct match *match,
+int netdev_ports_flow_get(const char *dpif_type, struct match *match,
struct nlattr **actions,
const ovs_u128 *ufid,
struct dpif_flow_stats *stats,
struct dpif_flow_attrs *attrs,
struct ofpbuf *buf);
+int netdev_ports_get_n_flows(const char *dpif_type,
+ odp_port_t port_no, uint64_t *n_flows);
#ifdef __cplusplus
}
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 22f4cde3337a81eb9fbc8a1bd06eed01caf26256..73dce2fcab8d93c9776fe50ee7d5d5d483c1121c 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -40,7 +40,9 @@ struct netdev_tnl_build_header_params;
enum netdev_ol_flags {
NETDEV_TX_OFFLOAD_IPV4_CKSUM = 1 << 0,
NETDEV_TX_OFFLOAD_TCP_CKSUM = 1 << 1,
- NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 2,
+ NETDEV_TX_OFFLOAD_UDP_CKSUM = 1 << 2,
+ NETDEV_TX_OFFLOAD_SCTP_CKSUM = 1 << 3,
+ NETDEV_TX_OFFLOAD_TCP_TSO = 1 << 4,
};
/* A network device (e.g. an Ethernet device).
@@ -94,7 +96,8 @@ struct netdev {
/* Functions to control flow offloading. */
OVSRCU_TYPE(const struct netdev_flow_api *) flow_api;
- struct netdev_hw_info hw_info; /* offload-capable netdev info */
+ const char *dpif_type; /* Type of dpif this netdev belongs to. */
+ struct netdev_hw_info hw_info; /* Offload-capable netdev info. */
};
static inline void
@@ -848,6 +851,7 @@ extern const struct netdev_class netdev_tap_class;
#ifdef HAVE_AF_XDP
extern const struct netdev_class netdev_afxdp_class;
+extern const struct netdev_class netdev_afxdp_nonpmd_class;
#endif
#ifdef __cplusplus
}
diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
index b57d21ff8d411e6d2dd0321eecc798718a2f00e5..15567e52445308c920efc417367012ead0c99ad2 100644
--- a/lib/netdev-vport.c
+++ b/lib/netdev-vport.c
@@ -47,6 +47,7 @@
#include "unaligned.h"
#include "unixctl.h"
#include "openvswitch/vlog.h"
+#include "openvswitch/ofp-parse.h"
#ifdef __linux__
#include "netdev-linux.h"
#endif
@@ -111,7 +112,8 @@ netdev_vport_needs_dst_port(const struct netdev *dev)
return (class->get_config == get_tunnel_config &&
(!strcmp("geneve", type) || !strcmp("vxlan", type) ||
- !strcmp("lisp", type) || !strcmp("stt", type)) );
+ !strcmp("lisp", type) || !strcmp("stt", type) ||
+ !strcmp("gtpu", type) || !strcmp("bareudp",type)));
}
const char *
@@ -216,6 +218,10 @@ netdev_vport_construct(struct netdev *netdev_)
dev->tnl_cfg.dst_port = port ? htons(port) : htons(LISP_DST_PORT);
} else if (!strcmp(type, "stt")) {
dev->tnl_cfg.dst_port = port ? htons(port) : htons(STT_DST_PORT);
+ } else if (!strcmp(type, "gtpu")) {
+ dev->tnl_cfg.dst_port = port ? htons(port) : htons(GTPU_DST_PORT);
+ } else if (!strcmp(type, "bareudp")) {
+ dev->tnl_cfg.dst_port = htons(port);
}
dev->tnl_cfg.dont_fragment = true;
@@ -433,6 +439,10 @@ tunnel_supported_layers(const char *type,
} else if (!strcmp(type, "vxlan")
&& tnl_cfg->exts & (1 << OVS_VXLAN_EXT_GPE)) {
return TNL_L2 | TNL_L3;
+ } else if (!strcmp(type, "gtpu")) {
+ return TNL_L3;
+ } else if (!strcmp(type, "bareudp")) {
+ return TNL_L3;
} else {
return TNL_L2;
}
@@ -589,6 +599,10 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
tnl_cfg.dst_port = htons(STT_DST_PORT);
}
+ if (!strcmp(type, "gtpu")) {
+ tnl_cfg.dst_port = htons(GTPU_DST_PORT);
+ }
+
needs_dst_port = netdev_vport_needs_dst_port(dev_);
tnl_cfg.dont_fragment = true;
@@ -736,6 +750,23 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
goto out;
}
}
+ } else if (!strcmp(node->key, "payload_type")) {
+ if (!strcmp(node->value, "mpls")) {
+ tnl_cfg.payload_ethertype = htons(ETH_TYPE_MPLS);
+ tnl_cfg.exts |= (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE);
+ } else if (!strcmp(node->value, "ip")) {
+ tnl_cfg.payload_ethertype = htons(ETH_TYPE_IP);
+ tnl_cfg.exts |= (1 << OVS_BAREUDP_EXT_MULTIPROTO_MODE);
+ } else {
+ uint16_t payload_ethertype;
+
+ if (str_to_u16(node->value, "payload_type",
+ &payload_ethertype)) {
+ err = EINVAL;
+ goto out;
+ }
+ tnl_cfg.payload_ethertype = htons(payload_ethertype);
+ }
} else {
ds_put_format(&errors, "%s: unknown %s argument '%s'\n", name,
type, node->key);
@@ -745,7 +776,7 @@ set_tunnel_config(struct netdev *dev_, const struct smap *args, char **errp)
enum tunnel_layers layers = tunnel_supported_layers(type, &tnl_cfg);
const char *full_type = (strcmp(type, "vxlan") ? type
: (tnl_cfg.exts & (1 << OVS_VXLAN_EXT_GPE)
- ? "VXLAN-GPE" : "VXLAN (without GPE"));
+ ? "VXLAN-GPE" : "VXLAN (without GPE)"));
const char *packet_type = smap_get(args, "packet_type");
if (!packet_type) {
tnl_cfg.pt_mode = default_pt_mode(layers);
@@ -907,7 +938,9 @@ get_tunnel_config(const struct netdev *dev, struct smap *args)
if ((!strcmp("geneve", type) && dst_port != GENEVE_DST_PORT) ||
(!strcmp("vxlan", type) && dst_port != VXLAN_DST_PORT) ||
(!strcmp("lisp", type) && dst_port != LISP_DST_PORT) ||
- (!strcmp("stt", type) && dst_port != STT_DST_PORT)) {
+ (!strcmp("stt", type) && dst_port != STT_DST_PORT) ||
+ (!strcmp("gtpu", type) && dst_port != GTPU_DST_PORT) ||
+ !strcmp("bareudp", type)) {
smap_add_format(args, "dst_port", "%d", dst_port);
}
}
@@ -1223,6 +1256,25 @@ netdev_vport_tunnel_register(void)
},
{{NULL, NULL, 0, 0}}
},
+ { "gtpu_sys",
+ {
+ TUNNEL_FUNCTIONS_COMMON,
+ .type = "gtpu",
+ .build_header = netdev_gtpu_build_header,
+ .push_header = netdev_gtpu_push_header,
+ .pop_header = netdev_gtpu_pop_header,
+ },
+ {{NULL, NULL, 0, 0}}
+ },
+ { "udp_sys",
+ {
+ TUNNEL_FUNCTIONS_COMMON,
+ .type = "bareudp",
+ .get_ifindex = NETDEV_VPORT_GET_IFINDEX,
+ },
+ {{NULL, NULL, 0, 0}}
+ },
+
};
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
diff --git a/lib/netdev.c b/lib/netdev.c
index f95b19af4da0f3685cc2dec918653d633fb3c8b3..91e91955c09be022ba4d56e6a0d4aeab77d8bf44 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -154,6 +154,7 @@ netdev_initialize(void)
netdev_register_flow_api_provider(&netdev_offload_tc);
#ifdef HAVE_AF_XDP
netdev_register_provider(&netdev_afxdp_class);
+ netdev_register_provider(&netdev_afxdp_nonpmd_class);
#endif
#endif
#if defined(__FreeBSD__) || defined(__NetBSD__)
@@ -791,6 +792,8 @@ static bool
netdev_send_prepare_packet(const uint64_t netdev_flags,
struct dp_packet *packet, char **errormsg)
{
+ uint64_t l4_mask;
+
if (dp_packet_hwol_is_tso(packet)
&& !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_TSO)) {
/* Fall back to GSO in software. */
@@ -798,11 +801,31 @@ netdev_send_prepare_packet(const uint64_t netdev_flags,
return false;
}
- if (dp_packet_hwol_l4_mask(packet)
- && !(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
- /* Fall back to L4 csum in software. */
- VLOG_ERR_BUF(errormsg, "No L4 checksum support");
+ l4_mask = dp_packet_hwol_l4_mask(packet);
+ if (l4_mask) {
+ if (dp_packet_hwol_l4_is_tcp(packet)) {
+ if (!(netdev_flags & NETDEV_TX_OFFLOAD_TCP_CKSUM)) {
+ /* Fall back to TCP csum in software. */
+ VLOG_ERR_BUF(errormsg, "No TCP checksum support");
+ return false;
+ }
+ } else if (dp_packet_hwol_l4_is_udp(packet)) {
+ if (!(netdev_flags & NETDEV_TX_OFFLOAD_UDP_CKSUM)) {
+ /* Fall back to UDP csum in software. */
+ VLOG_ERR_BUF(errormsg, "No UDP checksum support");
+ return false;
+ }
+ } else if (dp_packet_hwol_l4_is_sctp(packet)) {
+ if (!(netdev_flags & NETDEV_TX_OFFLOAD_SCTP_CKSUM)) {
+ /* Fall back to SCTP csum in software. */
+ VLOG_ERR_BUF(errormsg, "No SCTP checksum support");
+ return false;
+ }
+ } else {
+ VLOG_ERR_BUF(errormsg, "No L4 checksum support: mask: %"PRIu64,
+ l4_mask);
return false;
+ }
}
return true;
@@ -1961,6 +1984,22 @@ netdev_get_class(const struct netdev *netdev)
return netdev->netdev_class;
}
+/* Set the type of 'dpif' this 'netdev' belongs to. */
+void
+netdev_set_dpif_type(struct netdev *netdev, const char *type)
+{
+ netdev->dpif_type = type;
+}
+
+/* Returns the type of 'dpif' this 'netdev' belongs to.
+ *
+ * The caller must not free the returned value. */
+const char *
+netdev_get_dpif_type(const struct netdev *netdev)
+{
+ return netdev->dpif_type;
+}
+
/* Returns the netdev with 'name' or NULL if there is none.
*
* The caller must free the returned netdev with netdev_close(). */
diff --git a/lib/netdev.h b/lib/netdev.h
index fdbe0e1f58c52c355054a5c8060a910d412e6a9c..b705a9e56ddbeeb0ec2df676013cba7f18dcc70b 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -107,6 +107,7 @@ struct netdev_tunnel_config {
bool out_key_flow;
ovs_be64 out_key;
+ ovs_be16 payload_ethertype;
ovs_be16 dst_port;
bool ip_src_flow;
@@ -179,6 +180,8 @@ bool netdev_mtu_is_user_config(struct netdev *);
int netdev_get_ifindex(const struct netdev *);
int netdev_set_tx_multiq(struct netdev *, unsigned int n_txq);
enum netdev_pt_mode netdev_get_pt_mode(const struct netdev *);
+void netdev_set_dpif_type(struct netdev *, const char *);
+const char *netdev_get_dpif_type(const struct netdev *);
/* Packet reception. */
int netdev_rxq_open(struct netdev *, struct netdev_rxq **, int id);
diff --git a/lib/netlink-conntrack.c b/lib/netlink-conntrack.c
index 86ab866cf591d3bde6a05d79fa71600f23e3845c..78f1bf60bc29bda6cd8cceccf9c5cbea79f2943f 100644
--- a/lib/netlink-conntrack.c
+++ b/lib/netlink-conntrack.c
@@ -237,7 +237,7 @@ nl_ct_flush(void)
ofpbuf_uninit(&buf);
/* Expectations are flushed automatically, because they do not
- * have a master connection anymore */
+ * have a parent connection anymore */
return err;
}
@@ -344,7 +344,7 @@ nl_ct_flush_zone(uint16_t flush_zone)
ofpbuf_uninit(&buf);
/* Expectations are flushed automatically, because they do not
- * have a master connection anymore */
+ * have a parent connection anymore */
return 0;
}
#endif
@@ -1263,7 +1263,7 @@ nl_ct_attrs_to_ct_dpif_entry(struct ct_dpif_entry *entry,
return false;
}
if (attrs[CTA_TUPLE_MASTER] &&
- !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_master,
+ !nl_ct_parse_tuple(attrs[CTA_TUPLE_MASTER], &entry->tuple_parent,
nfgen_family)) {
return false;
}
diff --git a/lib/netlink.c b/lib/netlink.c
index de3ebcd0e7925d38addd3e5470453918edc9113d..26ab20bb4b4db1b6b6d29b9d4a99dd7560680ea7 100644
--- a/lib/netlink.c
+++ b/lib/netlink.c
@@ -498,6 +498,7 @@ void
nl_msg_end_nested(struct ofpbuf *msg, size_t offset)
{
struct nlattr *attr = ofpbuf_at_assert(msg, offset, sizeof *attr);
+ ovs_assert(!nl_attr_oversized(msg->size - offset - NLA_HDRLEN));
attr->nla_len = msg->size - offset;
}
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 0432ad4de6a7b82334b04a7f08c42c6c9b1fe567..440f5f7630c9557a227985b24401ce39c879b3c5 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017 Nicira, Inc.
+ * Copyright (c) 2010-2017, 2020 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1051,7 +1051,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
ovs_be32 spi_mask;
int match_len;
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
struct nxm_put_ctx ctx = { .output = b, .implied_ethernet = false };
@@ -1133,6 +1133,11 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
mpls_lse_to_bos(flow->mpls_lse[0]));
}
+ if (match->wc.masks.mpls_lse[0] & htonl(MPLS_TTL_MASK)) {
+ nxm_put_8(&ctx, MFF_MPLS_TTL, oxm,
+ mpls_lse_to_ttl(flow->mpls_lse[0]));
+ }
+
if (match->wc.masks.mpls_lse[0] & htonl(MPLS_LABEL_MASK)) {
nxm_put_32(&ctx, MFF_MPLS_LABEL, oxm,
htonl(mpls_lse_to_label(flow->mpls_lse[0])));
@@ -1191,6 +1196,12 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match,
nxm_put_8m(&ctx, MFF_TUN_ERSPAN_HWID, oxm,
flow->tunnel.erspan_hwid, match->wc.masks.tunnel.erspan_hwid);
+ /* GTP-U */
+ nxm_put_8m(&ctx, MFF_TUN_GTPU_FLAGS, oxm, flow->tunnel.gtpu_flags,
+ match->wc.masks.tunnel.gtpu_flags);
+ nxm_put_8m(&ctx, MFF_TUN_GTPU_MSGTYPE, oxm, flow->tunnel.gtpu_msgtype,
+ match->wc.masks.tunnel.gtpu_msgtype);
+
/* Network Service Header */
nxm_put_8m(&ctx, MFF_NSH_FLAGS, oxm, flow->nsh.flags,
match->wc.masks.nsh.flags);
@@ -1988,6 +1999,24 @@ nxm_execute_stack_pop(const struct ofpact_stack *pop,
}
}
+/* Parses a field from '*s' into '*field'. If successful, stores the
+ * reference to the field in '*field', and returns NULL. On failure,
+ * returns a malloc()'ed error message.
+ */
+char * OVS_WARN_UNUSED_RESULT
+mf_parse_field(const struct mf_field **field, const char *s)
+{
+ const struct nxm_field *f;
+ int s_len = strlen(s);
+
+ f = nxm_field_by_name(s, s_len);
+ (*field) = f ? mf_from_id(f->id) : mf_from_name_len(s, s_len);
+ if (!*field) {
+ return xasprintf("unknown field `%s'", s);
+ }
+ return NULL;
+}
+
/* Formats 'sf' into 's' in a format normally acceptable to
* mf_parse_subfield(). (It won't be acceptable if sf->field is NULL or if
* sf->field has no NXM name.) */
diff --git a/lib/nx-match.h b/lib/nx-match.h
index 9be40a98150e511312af41ce108f523dcc255869..3120ac0a0d5fa04578a22e1d8411d95cb18d7ef2 100644
--- a/lib/nx-match.h
+++ b/lib/nx-match.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2017 Nicira, Inc.
+ * Copyright (c) 2010-2017, 2020 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -42,6 +42,8 @@ struct vl_mff_map;
* See include/openflow/nicira-ext.h for NXM specification.
*/
+char * mf_parse_field(const struct mf_field **field, const char *s)
+ OVS_WARN_UNUSED_RESULT;
void mf_format_subfield(const struct mf_subfield *, struct ds *);
char *mf_parse_subfield__(struct mf_subfield *sf, const char **s)
OVS_WARN_UNUSED_RESULT;
diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 42d3335f0fb9e1c95dadca431e23fd16061fed3f..6eeda2a6170fbbf55045ebff46b15b2e9dea4e11 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -761,10 +761,11 @@ odp_execute_check_pkt_len(void *dp, struct dp_packet *packet, bool steal,
const struct nlattr *a;
struct dp_packet_batch pb;
+ uint32_t size = dp_packet_get_send_len(packet)
+ - dp_packet_l2_pad_size(packet);
a = attrs[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN];
- bool is_greater = dp_packet_size(packet) > nl_attr_get_u16(a);
- if (is_greater) {
+ if (size > nl_attr_get_u16(a)) {
a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER];
} else {
a = attrs[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL];
@@ -793,6 +794,7 @@ requires_datapath_assistance(const struct nlattr *a)
switch (type) {
/* These only make sense in the context of a datapath. */
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
@@ -1068,6 +1070,7 @@ odp_execute_actions(void *dp, struct dp_packet_batch *batch, bool steal,
return;
}
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
case OVS_ACTION_ATTR_TUNNEL_PUSH:
case OVS_ACTION_ATTR_TUNNEL_POP:
case OVS_ACTION_ATTR_USERSPACE:
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 746d1e97d474fe4f2fd0d43b4282164193bbef05..d65ebb541680f24695ab9c67668986739b4b78be 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -119,6 +119,7 @@ odp_action_len(uint16_t type)
switch ((enum ovs_action_attr) type) {
case OVS_ACTION_ATTR_OUTPUT: return sizeof(uint32_t);
+ case OVS_ACTION_ATTR_LB_OUTPUT: return sizeof(uint32_t);
case OVS_ACTION_ATTR_TRUNC: return sizeof(struct ovs_action_trunc);
case OVS_ACTION_ATTR_TUNNEL_PUSH: return ATTR_LEN_VARIABLE;
case OVS_ACTION_ATTR_TUNNEL_POP: return sizeof(uint32_t);
@@ -756,7 +757,17 @@ format_odp_tnl_push_header(struct ds *ds, struct ovs_action_push_tnl *data)
} else {
VLOG_WARN("%s Invalid ERSPAN version %d\n", __func__, ersh->ver);
}
+ } else if (data->tnl_type == OVS_VPORT_TYPE_GTPU) {
+ const struct gtpuhdr *gtph;
+
+ gtph = format_udp_tnl_push_header(ds, udp);
+
+ ds_put_format(ds, "gtpu(flags=0x%"PRIx8
+ ",msgtype=%"PRIu8",teid=0x%"PRIx32")",
+ gtph->md.flags, gtph->md.msgtype,
+ ntohl(get_16aligned_be32(>ph->teid)));
}
+
ds_put_format(ds, ")");
}
@@ -1122,6 +1133,9 @@ format_odp_action(struct ds *ds, const struct nlattr *a,
case OVS_ACTION_ATTR_OUTPUT:
odp_portno_name_format(portno_names, nl_attr_get_odp_port(a), ds);
break;
+ case OVS_ACTION_ATTR_LB_OUTPUT:
+ ds_put_format(ds, "lb_output(%"PRIu32")", nl_attr_get_u32(a));
+ break;
case OVS_ACTION_ATTR_TRUNC: {
const struct ovs_action_trunc *trunc =
nl_attr_get_unspec(a, sizeof *trunc);
@@ -1441,14 +1455,20 @@ parse_odp_userspace_action(const char *s, struct ofpbuf *actions)
int n1 = -1;
if (ovs_scan(&s[n], ",tunnel_out_port=%"SCNi32")%n",
&tunnel_out_port, &n1)) {
- odp_put_userspace_action(pid, user_data, user_data_size,
- tunnel_out_port, include_actions, actions);
- res = n + n1;
+ res = odp_put_userspace_action(pid, user_data, user_data_size,
+ tunnel_out_port, include_actions,
+ actions, NULL);
+ if (!res) {
+ res = n + n1;
+ }
goto out;
} else if (s[n] == ')') {
- odp_put_userspace_action(pid, user_data, user_data_size,
- ODPP_NONE, include_actions, actions);
- res = n + 1;
+ res = odp_put_userspace_action(pid, user_data, user_data_size,
+ ODPP_NONE, include_actions,
+ actions, NULL);
+ if (!res) {
+ res = n + 1;
+ }
goto out;
}
}
@@ -1500,6 +1520,8 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
void *l3, *l4;
int n = 0;
uint8_t hwid, dir;
+ uint32_t teid;
+ uint8_t gtpu_flags, gtpu_msgtype;
if (!ovs_scan_len(s, &n, "tnl_push(tnl_port(%"SCNi32"),", &data->tnl_port)) {
return -EINVAL;
@@ -1729,6 +1751,18 @@ ovs_parse_tnl_push(const char *s, struct ovs_action_push_tnl *data)
header_len = sizeof *eth + ip_len + ERSPAN_GREHDR_LEN +
sizeof *ersh + ERSPAN_V2_MDSIZE;
+
+ } else if (ovs_scan_len(s, &n, "gtpu(flags=%"SCNi8",msgtype=%"
+ SCNu8",teid=0x%"SCNx32"))",
+ >pu_flags, >pu_msgtype, &teid)) {
+ struct gtpuhdr *gtph = (struct gtpuhdr *) (udp + 1);
+
+ gtph->md.flags = gtpu_flags;
+ gtph->md.msgtype = gtpu_msgtype;
+ put_16aligned_be32(>ph->teid, htonl(teid));
+ tnl_type = OVS_VPORT_TYPE_GTPU;
+ header_len = sizeof *eth + ip_len +
+ sizeof *udp + sizeof *gtph;
} else {
return -EINVAL;
}
@@ -2281,6 +2315,16 @@ parse_odp_action__(struct parse_odp_context *context, const char *s,
}
}
+ {
+ uint32_t bond_id;
+ int n;
+
+ if (ovs_scan(s, "lb_output(%"PRIu32")%n", &bond_id, &n)) {
+ nl_msg_put_u32(actions, OVS_ACTION_ATTR_LB_OUTPUT, bond_id);
+ return n;
+ }
+ }
+
{
uint32_t max_len;
int n;
@@ -2630,6 +2674,7 @@ static const struct attr_len_tbl ovs_tun_key_attr_lens[OVS_TUNNEL_KEY_ATTR_MAX +
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = 16 },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = 16 },
[OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = ATTR_LEN_VARIABLE },
+ [OVS_TUNNEL_KEY_ATTR_GTPU_OPTS] = { .len = ATTR_LEN_VARIABLE },
};
const struct attr_len_tbl ovs_flow_key_attr_lens[OVS_KEY_ATTR_MAX + 1] = {
@@ -3035,6 +3080,13 @@ odp_tun_key_from_attr__(const struct nlattr *attr, bool is_mask,
}
break;
}
+ case OVS_TUNNEL_KEY_ATTR_GTPU_OPTS: {
+ const struct gtpu_metadata *opts = nl_attr_get(a);
+
+ tun->gtpu_flags = opts->flags;
+ tun->gtpu_msgtype = opts->msgtype;
+ break;
+ }
default:
/* Allow this to show up as unexpected, if there are unknown
@@ -3149,6 +3201,15 @@ tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key,
&opts, sizeof(opts));
}
+ if ((!tnl_type || !strcmp(tnl_type, "gtpu")) &&
+ (tun_key->gtpu_flags && tun_key->gtpu_msgtype)) {
+ struct gtpu_metadata opts;
+
+ opts.flags = tun_key->gtpu_flags;
+ opts.msgtype = tun_key->gtpu_msgtype;
+ nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+ &opts, sizeof(opts));
+ }
nl_msg_end_nested(a, tun_key_ofs);
}
@@ -3645,6 +3706,22 @@ format_odp_tun_erspan_opt(const struct nlattr *attr,
ds_chomp(ds, ',');
}
+static void
+format_odp_tun_gtpu_opt(const struct nlattr *attr,
+ const struct nlattr *mask_attr, struct ds *ds,
+ bool verbose)
+{
+ const struct gtpu_metadata *opts, *mask;
+
+ opts = nl_attr_get(attr);
+ mask = mask_attr ? nl_attr_get(mask_attr) : NULL;
+
+ format_u8x(ds, "flags", opts->flags, mask ? &mask->flags : NULL, verbose);
+ format_u8u(ds, "msgtype", opts->msgtype, mask ? &mask->msgtype : NULL,
+ verbose);
+ ds_chomp(ds, ',');
+}
+
#define MASK(PTR, FIELD) PTR ? &PTR->FIELD : NULL
static void
@@ -3897,6 +3974,11 @@ format_odp_tun_attr(const struct nlattr *attr, const struct nlattr *mask_attr,
format_odp_tun_erspan_opt(a, ma, ds, verbose);
ds_put_cstr(ds, "),");
break;
+ case OVS_TUNNEL_KEY_ATTR_GTPU_OPTS:
+ ds_put_cstr(ds, "gtpu(");
+ format_odp_tun_gtpu_opt(a, ma, ds, verbose);
+ ds_put_cstr(ds, "),");
+ break;
case __OVS_TUNNEL_KEY_ATTR_MAX:
default:
format_unknown_key(ds, a, ma);
@@ -5104,6 +5186,50 @@ scan_vxlan_gbp(const char *s, uint32_t *key, uint32_t *mask)
return 0;
}
+static int
+scan_gtpu_metadata(const char *s,
+ struct gtpu_metadata *key,
+ struct gtpu_metadata *mask)
+{
+ const char *s_base = s;
+ uint8_t flags = 0, flags_ma = 0;
+ uint8_t msgtype = 0, msgtype_ma = 0;
+ int len;
+
+ if (!strncmp(s, "flags=", 6)) {
+ s += 6;
+ len = scan_u8(s, &flags, mask ? &flags_ma : NULL);
+ if (len == 0) {
+ return 0;
+ }
+ s += len;
+ }
+
+ if (s[0] == ',') {
+ s++;
+ }
+
+ if (!strncmp(s, "msgtype=", 8)) {
+ s += 8;
+ len = scan_u8(s, &msgtype, mask ? &msgtype_ma : NULL);
+ if (len == 0) {
+ return 0;
+ }
+ s += len;
+ }
+
+ if (!strncmp(s, ")", 1)) {
+ s += 1;
+ key->flags = flags;
+ key->msgtype = msgtype;
+ if (mask) {
+ mask->flags = flags_ma;
+ mask->msgtype = msgtype_ma;
+ }
+ }
+ return s - s_base;
+}
+
static int
scan_erspan_metadata(const char *s,
struct erspan_metadata *key,
@@ -5344,6 +5470,15 @@ erspan_to_attr(struct ofpbuf *a, const void *data_)
sizeof *md);
}
+static void
+gtpu_to_attr(struct ofpbuf *a, const void *data_)
+{
+ const struct gtpu_metadata *md = data_;
+
+ nl_msg_put_unspec(a, OVS_TUNNEL_KEY_ATTR_GTPU_OPTS, md,
+ sizeof *md);
+}
+
#define SCAN_PUT_ATTR(BUF, ATTR, DATA, FUNC) \
{ \
unsigned long call_fn = (unsigned long)FUNC; \
@@ -5428,13 +5563,16 @@ erspan_to_attr(struct ofpbuf *a, const void *data_)
do { \
len = 0;
-#define SCAN_END_NESTED() \
- SCAN_FINISH(); \
- nl_msg_end_nested(key, key_offset); \
- if (mask) { \
- nl_msg_end_nested(mask, mask_offset); \
- } \
- return s - start; \
+#define SCAN_END_NESTED() \
+ SCAN_FINISH(); \
+ if (nl_attr_oversized(key->size - key_offset - NLA_HDRLEN)) { \
+ return -E2BIG; \
+ } \
+ nl_msg_end_nested(key, key_offset); \
+ if (mask) { \
+ nl_msg_end_nested(mask, mask_offset); \
+ } \
+ return s - start; \
}
#define SCAN_FIELD_NESTED__(NAME, TYPE, SCAN_AS, ATTR, FUNC) \
@@ -5730,6 +5868,8 @@ parse_odp_key_mask_attr__(struct parse_odp_context *context, const char *s,
SCAN_FIELD_NESTED_FUNC("vxlan(gbp(", uint32_t, vxlan_gbp, vxlan_gbp_to_attr);
SCAN_FIELD_NESTED_FUNC("geneve(", struct geneve_scan, geneve,
geneve_to_attr);
+ SCAN_FIELD_NESTED_FUNC("gtpu(", struct gtpu_metadata, gtpu_metadata,
+ gtpu_to_attr);
SCAN_FIELD_NESTED_FUNC("flags(", uint16_t, tun_flags, tun_flags_to_attr);
} SCAN_END_NESTED();
@@ -5997,7 +6137,7 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
/* New "struct flow" fields that are visible to the datapath (including all
* data fields) should be translated into equivalent datapath flow fields
* here (you will have to add a OVS_KEY_ATTR_* for them). */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
struct ovs_key_ethernet *eth_key;
size_t encap[FLOW_MAX_VLAN_HEADERS] = {0};
@@ -6008,7 +6148,8 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority);
- if (flow_tnl_dst_is_set(&flow->tunnel) || export_mask) {
+ if (flow_tnl_dst_is_set(&flow->tunnel) ||
+ flow_tnl_src_is_set(&flow->tunnel) || export_mask) {
tun_key_to_attr(buf, &data->tunnel, &parms->flow->tunnel,
parms->key_buf, NULL);
}
@@ -6225,7 +6366,9 @@ odp_flow_key_from_flow__(const struct odp_flow_key_parms *parms,
struct ovs_key_nd_extensions *nd_ext_key;
if (data->igmp_group_ip4 != 0 || data->tcp_flags != 0) {
- nd_ext_key = nl_msg_put_unspec_uninit(buf,
+ /* 'struct ovs_key_nd_extensions' has padding,
+ * clear it. */
+ nd_ext_key = nl_msg_put_unspec_zero(buf,
OVS_KEY_ATTR_ND_EXTENSIONS,
sizeof *nd_ext_key);
nd_ext_key->nd_reserved = data->igmp_group_ip4;
@@ -6275,6 +6418,10 @@ odp_key_from_dp_packet(struct ofpbuf *buf, const struct dp_packet *packet)
nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, md->skb_priority);
+ if (md->dp_hash) {
+ nl_msg_put_u32(buf, OVS_KEY_ATTR_DP_HASH, md->dp_hash);
+ }
+
if (flow_tnl_dst_is_set(&md->tunnel)) {
tun_key_to_attr(buf, &md->tunnel, &md->tunnel, NULL, NULL);
}
@@ -7096,7 +7243,7 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len,
/* New "struct flow" fields that are visible to the datapath (including all
* data fields) should be translated from equivalent datapath flow fields
* here (you will have to add a OVS_KEY_ATTR_* for them). */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
enum odp_key_fitness fitness = ODP_FIT_ERROR;
if (errorp) {
@@ -7416,15 +7563,18 @@ odp_key_fitness_to_string(enum odp_key_fitness fitness)
/* Appends an OVS_ACTION_ATTR_USERSPACE action to 'odp_actions' that specifies
* Netlink PID 'pid'. If 'userdata' is nonnull, adds a userdata attribute
- * whose contents are the 'userdata_size' bytes at 'userdata' and returns the
- * offset within 'odp_actions' of the start of the cookie. (If 'userdata' is
- * null, then the return value is not meaningful.) */
-size_t
+ * whose contents are the 'userdata_size' bytes at 'userdata' and sets
+ * 'odp_actions_ofs' if nonnull with the offset within 'odp_actions' of the
+ * start of the cookie. (If 'userdata' is null, then the 'odp_actions_ofs'
+ * value is not meaningful.)
+ *
+ * Returns negative error code on failure. */
+int
odp_put_userspace_action(uint32_t pid,
const void *userdata, size_t userdata_size,
odp_port_t tunnel_out_port,
bool include_actions,
- struct ofpbuf *odp_actions)
+ struct ofpbuf *odp_actions, size_t *odp_actions_ofs)
{
size_t userdata_ofs;
size_t offset;
@@ -7432,6 +7582,9 @@ odp_put_userspace_action(uint32_t pid,
offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_USERSPACE);
nl_msg_put_u32(odp_actions, OVS_USERSPACE_ATTR_PID, pid);
if (userdata) {
+ if (nl_attr_oversized(userdata_size)) {
+ return -E2BIG;
+ }
userdata_ofs = odp_actions->size + NLA_HDRLEN;
/* The OVS kernel module before OVS 1.11 and the upstream Linux kernel
@@ -7457,9 +7610,16 @@ odp_put_userspace_action(uint32_t pid,
if (include_actions) {
nl_msg_put_flag(odp_actions, OVS_USERSPACE_ATTR_ACTIONS);
}
+ if (nl_attr_oversized(odp_actions->size - offset - NLA_HDRLEN)) {
+ return -E2BIG;
+ }
nl_msg_end_nested(odp_actions, offset);
- return userdata_ofs;
+ if (odp_actions_ofs) {
+ *odp_actions_ofs = userdata_ofs;
+ }
+
+ return 0;
}
void
@@ -7565,6 +7725,28 @@ struct offsetof_sizeof {
int size;
};
+
+/* Performs bitwise OR over the fields in 'dst_' and 'src_' specified in
+ * 'offsetof_sizeof_arr' array. Result is stored in 'dst_'. */
+static void
+or_masks(void *dst_, const void *src_,
+ struct offsetof_sizeof *offsetof_sizeof_arr)
+{
+ int field, size, offset;
+ const uint8_t *src = src_;
+ uint8_t *dst = dst_;
+
+ for (field = 0; ; field++) {
+ size = offsetof_sizeof_arr[field].size;
+ offset = offsetof_sizeof_arr[field].offset;
+
+ if (!size) {
+ return;
+ }
+ or_bytes(dst + offset, src + offset, size);
+ }
+}
+
/* Compares each of the fields in 'key0' and 'key1'. The fields are specified
* in 'offsetof_sizeof_arr', which is an array terminated by a 0-size field.
* Returns true if all of the fields are equal, false if at least one differs.
@@ -7643,9 +7825,10 @@ commit_set_ether_action(const struct flow *flow, struct flow *base_flow,
struct flow_wildcards *wc,
bool use_masked)
{
- struct ovs_key_ethernet key, base, mask;
+ struct ovs_key_ethernet key, base, mask, orig_mask;
struct offsetof_sizeof ovs_key_ethernet_offsetof_sizeof_arr[] =
OVS_KEY_ETHERNET_OFFSETOF_SIZEOF_ARR;
+
if (flow->packet_type != htonl(PT_ETH)) {
return;
}
@@ -7653,11 +7836,13 @@ commit_set_ether_action(const struct flow *flow, struct flow *base_flow,
get_ethernet_key(flow, &key);
get_ethernet_key(base_flow, &base);
get_ethernet_key(&wc->masks, &mask);
+ memcpy(&orig_mask, &mask, sizeof mask);
if (commit(OVS_KEY_ATTR_ETHERNET, use_masked,
&key, &base, &mask, sizeof key,
ovs_key_ethernet_offsetof_sizeof_arr, odp_actions)) {
put_ethernet_key(&base, base_flow);
+ or_masks(&mask, &orig_mask, ovs_key_ethernet_offsetof_sizeof_arr);
put_ethernet_key(&mask, &wc->masks);
}
}
@@ -7781,7 +7966,7 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow,
struct ofpbuf *odp_actions, struct flow_wildcards *wc,
bool use_masked)
{
- struct ovs_key_ipv4 key, mask, base;
+ struct ovs_key_ipv4 key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_ipv4_offsetof_sizeof_arr[] =
OVS_KEY_IPV4_OFFSETOF_SIZEOF_ARR;
@@ -7792,6 +7977,7 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow,
get_ipv4_key(flow, &key, false);
get_ipv4_key(base_flow, &base, false);
get_ipv4_key(&wc->masks, &mask, true);
+ memcpy(&orig_mask, &mask, sizeof mask);
mask.ipv4_proto = 0; /* Not writeable. */
mask.ipv4_frag = 0; /* Not writable. */
@@ -7803,9 +7989,8 @@ commit_set_ipv4_action(const struct flow *flow, struct flow *base_flow,
if (commit(OVS_KEY_ATTR_IPV4, use_masked, &key, &base, &mask, sizeof key,
ovs_key_ipv4_offsetof_sizeof_arr, odp_actions)) {
put_ipv4_key(&base, base_flow, false);
- if (mask.ipv4_proto != 0) { /* Mask was changed by commit(). */
- put_ipv4_key(&mask, &wc->masks, true);
- }
+ or_masks(&mask, &orig_mask, ovs_key_ipv4_offsetof_sizeof_arr);
+ put_ipv4_key(&mask, &wc->masks, true);
}
}
@@ -7838,7 +8023,7 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow,
struct ofpbuf *odp_actions, struct flow_wildcards *wc,
bool use_masked)
{
- struct ovs_key_ipv6 key, mask, base;
+ struct ovs_key_ipv6 key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_ipv6_offsetof_sizeof_arr[] =
OVS_KEY_IPV6_OFFSETOF_SIZEOF_ARR;
@@ -7849,6 +8034,7 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow,
get_ipv6_key(flow, &key, false);
get_ipv6_key(base_flow, &base, false);
get_ipv6_key(&wc->masks, &mask, true);
+ memcpy(&orig_mask, &mask, sizeof mask);
mask.ipv6_proto = 0; /* Not writeable. */
mask.ipv6_frag = 0; /* Not writable. */
mask.ipv6_label &= htonl(IPV6_LABEL_MASK); /* Not writable. */
@@ -7861,9 +8047,8 @@ commit_set_ipv6_action(const struct flow *flow, struct flow *base_flow,
if (commit(OVS_KEY_ATTR_IPV6, use_masked, &key, &base, &mask, sizeof key,
ovs_key_ipv6_offsetof_sizeof_arr, odp_actions)) {
put_ipv6_key(&base, base_flow, false);
- if (mask.ipv6_proto != 0) { /* Mask was changed by commit(). */
- put_ipv6_key(&mask, &wc->masks, true);
- }
+ or_masks(&mask, &orig_mask, ovs_key_ipv6_offsetof_sizeof_arr);
+ put_ipv6_key(&mask, &wc->masks, true);
}
}
@@ -7875,7 +8060,8 @@ get_arp_key(const struct flow *flow, struct ovs_key_arp *arp)
arp->arp_sip = flow->nw_src;
arp->arp_tip = flow->nw_dst;
- arp->arp_op = htons(flow->nw_proto);
+ arp->arp_op = flow->nw_proto == UINT8_MAX ?
+ OVS_BE16_MAX : htons(flow->nw_proto);
arp->arp_sha = flow->arp_sha;
arp->arp_tha = flow->arp_tha;
}
@@ -7894,17 +8080,19 @@ static enum slow_path_reason
commit_set_arp_action(const struct flow *flow, struct flow *base_flow,
struct ofpbuf *odp_actions, struct flow_wildcards *wc)
{
- struct ovs_key_arp key, mask, base;
+ struct ovs_key_arp key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_arp_offsetof_sizeof_arr[] =
OVS_KEY_ARP_OFFSETOF_SIZEOF_ARR;
get_arp_key(flow, &key);
get_arp_key(base_flow, &base);
get_arp_key(&wc->masks, &mask);
+ memcpy(&orig_mask, &mask, sizeof mask);
if (commit(OVS_KEY_ATTR_ARP, true, &key, &base, &mask, sizeof key,
ovs_key_arp_offsetof_sizeof_arr, odp_actions)) {
put_arp_key(&base, base_flow);
+ or_masks(&mask, &orig_mask, ovs_key_arp_offsetof_sizeof_arr);
put_arp_key(&mask, &wc->masks);
return SLOW_ACTION;
}
@@ -7931,7 +8119,7 @@ static enum slow_path_reason
commit_set_icmp_action(const struct flow *flow, struct flow *base_flow,
struct ofpbuf *odp_actions, struct flow_wildcards *wc)
{
- struct ovs_key_icmp key, mask, base;
+ struct ovs_key_icmp key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_icmp_offsetof_sizeof_arr[] =
OVS_KEY_ICMP_OFFSETOF_SIZEOF_ARR;
enum ovs_key_attr attr;
@@ -7947,10 +8135,12 @@ commit_set_icmp_action(const struct flow *flow, struct flow *base_flow,
get_icmp_key(flow, &key);
get_icmp_key(base_flow, &base);
get_icmp_key(&wc->masks, &mask);
+ memcpy(&orig_mask, &mask, sizeof mask);
if (commit(attr, false, &key, &base, &mask, sizeof key,
ovs_key_icmp_offsetof_sizeof_arr, odp_actions)) {
put_icmp_key(&base, base_flow);
+ or_masks(&mask, &orig_mask, ovs_key_icmp_offsetof_sizeof_arr);
put_icmp_key(&mask, &wc->masks);
return SLOW_ACTION;
}
@@ -7998,17 +8188,19 @@ commit_set_nd_action(const struct flow *flow, struct flow *base_flow,
struct ofpbuf *odp_actions,
struct flow_wildcards *wc, bool use_masked)
{
- struct ovs_key_nd key, mask, base;
+ struct ovs_key_nd key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_nd_offsetof_sizeof_arr[] =
OVS_KEY_ND_OFFSETOF_SIZEOF_ARR;
get_nd_key(flow, &key);
get_nd_key(base_flow, &base);
get_nd_key(&wc->masks, &mask);
+ memcpy(&orig_mask, &mask, sizeof mask);
if (commit(OVS_KEY_ATTR_ND, use_masked, &key, &base, &mask, sizeof key,
ovs_key_nd_offsetof_sizeof_arr, odp_actions)) {
put_nd_key(&base, base_flow);
+ or_masks(&mask, &orig_mask, ovs_key_nd_offsetof_sizeof_arr);
put_nd_key(&mask, &wc->masks);
return SLOW_ACTION;
}
@@ -8022,18 +8214,20 @@ commit_set_nd_extensions_action(const struct flow *flow,
struct ofpbuf *odp_actions,
struct flow_wildcards *wc, bool use_masked)
{
- struct ovs_key_nd_extensions key, mask, base;
+ struct ovs_key_nd_extensions key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_nd_extensions_offsetof_sizeof_arr[] =
OVS_KEY_ND_EXTENSIONS_OFFSETOF_SIZEOF_ARR;
get_nd_extensions_key(flow, &key);
get_nd_extensions_key(base_flow, &base);
get_nd_extensions_key(&wc->masks, &mask);
+ memcpy(&orig_mask, &mask, sizeof mask);
if (commit(OVS_KEY_ATTR_ND_EXTENSIONS, use_masked, &key, &base, &mask,
sizeof key, ovs_key_nd_extensions_offsetof_sizeof_arr,
odp_actions)) {
put_nd_extensions_key(&base, base_flow);
+ or_masks(&mask, &orig_mask, ovs_key_nd_extensions_offsetof_sizeof_arr);
put_nd_extensions_key(&mask, &wc->masks);
return SLOW_ACTION;
}
@@ -8248,7 +8442,7 @@ commit_set_port_action(const struct flow *flow, struct flow *base_flow,
bool use_masked)
{
enum ovs_key_attr key_type;
- union ovs_key_tp key, mask, base;
+ union ovs_key_tp key, mask, orig_mask, base;
struct offsetof_sizeof ovs_key_tp_offsetof_sizeof_arr[] =
OVS_KEY_TCP_OFFSETOF_SIZEOF_ARR;
@@ -8274,10 +8468,12 @@ commit_set_port_action(const struct flow *flow, struct flow *base_flow,
get_tp_key(flow, &key);
get_tp_key(base_flow, &base);
get_tp_key(&wc->masks, &mask);
+ memcpy(&orig_mask, &mask, sizeof mask);
if (commit(key_type, use_masked, &key, &base, &mask, sizeof key,
ovs_key_tp_offsetof_sizeof_arr, odp_actions)) {
put_tp_key(&base, base_flow);
+ or_masks(&mask, &orig_mask, ovs_key_tp_offsetof_sizeof_arr);
put_tp_key(&mask, &wc->masks);
}
}
@@ -8301,7 +8497,7 @@ commit_set_priority_action(const struct flow *flow, struct flow *base_flow,
if (commit(OVS_KEY_ATTR_PRIORITY, use_masked, &key, &base, &mask,
sizeof key, ovs_key_prio_offsetof_sizeof_arr, odp_actions)) {
base_flow->skb_priority = base;
- wc->masks.skb_priority = mask;
+ wc->masks.skb_priority |= mask;
}
}
@@ -8325,7 +8521,7 @@ commit_set_pkt_mark_action(const struct flow *flow, struct flow *base_flow,
sizeof key, ovs_key_pkt_mark_offsetof_sizeof_arr,
odp_actions)) {
base_flow->pkt_mark = base;
- wc->masks.pkt_mark = mask;
+ wc->masks.pkt_mark |= mask;
}
}
@@ -8445,7 +8641,7 @@ commit_odp_actions(const struct flow *flow, struct flow *base,
/* If you add a field that OpenFlow actions can change, and that is visible
* to the datapath (including all data fields), then you should also add
* code here to commit changes to the field. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
enum slow_path_reason slow1, slow2;
bool mpls_done = false;
diff --git a/lib/odp-util.h b/lib/odp-util.h
index 4ecce1aac5d6774bfe409b03208a4b7626d2b908..a1d0d0fba5decb1c8dd2c74cf78349fc7e5d0241 100644
--- a/lib/odp-util.h
+++ b/lib/odp-util.h
@@ -147,7 +147,7 @@ void odp_portno_name_format(const struct hmap *portno_names,
* add another field and forget to adjust this value.
*/
#define ODPUTIL_FLOW_KEY_BYTES 640
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
/* A buffer with sufficient size and alignment to hold an nlattr-formatted flow
* key. An array of "struct nlattr" might not, in theory, be sufficiently
@@ -356,11 +356,12 @@ struct user_action_cookie {
};
BUILD_ASSERT_DECL(sizeof(struct user_action_cookie) == 48);
-size_t odp_put_userspace_action(uint32_t pid,
- const void *userdata, size_t userdata_size,
- odp_port_t tunnel_out_port,
- bool include_actions,
- struct ofpbuf *odp_actions);
+int odp_put_userspace_action(uint32_t pid,
+ const void *userdata, size_t userdata_size,
+ odp_port_t tunnel_out_port,
+ bool include_actions,
+ struct ofpbuf *odp_actions,
+ size_t *odp_actions_ofs);
void odp_put_tunnel_action(const struct flow_tnl *tunnel,
struct ofpbuf *odp_actions,
const char *tnl_type);
diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
index ddef3b0c8780d4b76125605621b4999776e0dd2d..e2e829772a5193c82d776f0b60a34b8ef29a87a2 100644
--- a/lib/ofp-actions.c
+++ b/lib/ofp-actions.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2008-2017, 2019 Nicira, Inc.
+ * Copyright (c) 2008-2017, 2019-2020 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -361,6 +361,9 @@ enum ofp_raw_action_type {
/* NX1.0+(49): struct nx_action_check_pkt_larger, ... VLMFF */
NXAST_RAW_CHECK_PKT_LARGER,
+ /* NX1.0+(50): struct nx_action_delete_field. VLMFF */
+ NXAST_RAW_DELETE_FIELD,
+
/* ## ------------------ ## */
/* ## Debugging actions. ## */
/* ## ------------------ ## */
@@ -500,6 +503,7 @@ ofpact_next_flattened(const struct ofpact *ofpact)
case OFPACT_DECAP:
case OFPACT_DEC_NSH_TTL:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
return ofpact_next(ofpact);
case OFPACT_CLONE:
@@ -1330,39 +1334,39 @@ check_OUTPUT_REG(const struct ofpact_output_reg *a,
/* Action structure for NXAST_BUNDLE and NXAST_BUNDLE_LOAD.
*
- * The bundle actions choose a slave from a supplied list of options.
+ * The bundle actions choose a member from a supplied list of options.
* NXAST_BUNDLE outputs to its selection. NXAST_BUNDLE_LOAD writes its
* selection to a register.
*
- * The list of possible slaves follows the nx_action_bundle structure. The size
- * of each slave is governed by its type as indicated by the 'slave_type'
- * parameter. The list of slaves should be padded at its end with zeros to make
- * the total length of the action a multiple of 8.
+ * The list of possible members follows the nx_action_bundle structure. The
+ * size of each member is governed by its type as indicated by the
+ * 'member_type' parameter. The list of members should be padded at its end
+ * with zeros to make the total length of the action a multiple of 8.
*
- * Switches infer from the 'slave_type' parameter the size of each slave. All
- * implementations must support the NXM_OF_IN_PORT 'slave_type' which indicates
- * that the slaves are OpenFlow port numbers with NXM_LENGTH(NXM_OF_IN_PORT) ==
- * 2 byte width. Switches should reject actions which indicate unknown or
- * unsupported slave types.
+ * Switches infer from the 'member_type' parameter the size of each member.
+ * All implementations must support the NXM_OF_IN_PORT 'member_type' which
+ * indicates that the members are OpenFlow port numbers with
+ * NXM_LENGTH(NXM_OF_IN_PORT) == 2 byte width. Switches should reject actions
+ * which indicate unknown or unsupported member types.
*
* Switches use a strategy dictated by the 'algorithm' parameter to choose a
- * slave. If the switch does not support the specified 'algorithm' parameter,
+ * member. If the switch does not support the specified 'algorithm' parameter,
* it should reject the action.
*
- * Several algorithms take into account liveness when selecting slaves. The
- * liveness of a slave is implementation defined (with one exception), but will
- * generally take into account things like its carrier status and the results
- * of any link monitoring protocols which happen to be running on it. In order
- * to give controllers a place-holder value, the OFPP_NONE port is always
- * considered live, that is, NXAST_BUNDLE_LOAD stores OFPP_NONE in the output
- * register if no slave is live.
- *
- * Some slave selection strategies require the use of a hash function, in which
- * case the 'fields' and 'basis' parameters should be populated. The 'fields'
- * parameter (one of NX_HASH_FIELDS_*) designates which parts of the flow to
- * hash. Refer to the definition of "enum nx_hash_fields" for details. The
- * 'basis' parameter is used as a universal hash parameter. Different values
- * of 'basis' yield different hash results.
+ * Several algorithms take into account liveness when selecting members. The
+ * liveness of a member is implementation defined (with one exception), but
+ * will generally take into account things like its carrier status and the
+ * results of any link monitoring protocols which happen to be running on it.
+ * In order to give controllers a place-holder value, the OFPP_NONE port is
+ * always considered live, that is, NXAST_BUNDLE_LOAD stores OFPP_NONE in the
+ * output register if no member is live.
+ *
+ * Some member selection strategies require the use of a hash function, in
+ * which case the 'fields' and 'basis' parameters should be populated. The
+ * 'fields' parameter (one of NX_HASH_FIELDS_*) designates which parts of the
+ * flow to hash. Refer to the definition of "enum nx_hash_fields" for details.
+ * The 'basis' parameter is used as a universal hash parameter. Different
+ * values of 'basis' yield different hash results.
*
* The 'zero' parameter at the end of the action structure is reserved for
* future use. Switches are required to reject actions which have nonzero
@@ -1371,24 +1375,24 @@ check_OUTPUT_REG(const struct ofpact_output_reg *a,
* NXAST_BUNDLE actions should have 'ofs_nbits' and 'dst' zeroed. Switches
* should reject actions which have nonzero bytes in either of these fields.
*
- * NXAST_BUNDLE_LOAD stores the OpenFlow port number of the selected slave in
+ * NXAST_BUNDLE_LOAD stores the OpenFlow port number of the selected member in
* dst[ofs:ofs+n_bits]. The format and semantics of 'dst' and 'ofs_nbits' are
* similar to those for the NXAST_REG_LOAD action. */
struct nx_action_bundle {
ovs_be16 type; /* OFPAT_VENDOR. */
- ovs_be16 len; /* Length including slaves. */
+ ovs_be16 len; /* Length including members. */
ovs_be32 vendor; /* NX_VENDOR_ID. */
ovs_be16 subtype; /* NXAST_BUNDLE or NXAST_BUNDLE_LOAD. */
- /* Slave choice algorithm to apply to hash value. */
+ /* Member choice algorithm to apply to hash value. */
ovs_be16 algorithm; /* One of NX_BD_ALG_*. */
/* What fields to hash and how. */
ovs_be16 fields; /* One of NX_HASH_FIELDS_*. */
ovs_be16 basis; /* Universal hash parameter. */
- ovs_be32 slave_type; /* NXM_OF_IN_PORT. */
- ovs_be16 n_slaves; /* Number of slaves. */
+ ovs_be32 member_type; /* NXM_OF_IN_PORT. */
+ ovs_be16 n_members; /* Number of members. */
ovs_be16 ofs_nbits; /* (ofs << 6) | (n_bits - 1). */
ovs_be32 dst; /* Destination. */
@@ -1404,29 +1408,29 @@ decode_bundle(bool load, const struct nx_action_bundle *nab,
{
static struct vlog_rate_limit rll = VLOG_RATE_LIMIT_INIT(1, 5);
struct ofpact_bundle *bundle;
- uint32_t slave_type;
- size_t slaves_size, i;
+ uint32_t member_type;
+ size_t members_size, i;
enum ofperr error;
bundle = ofpact_put_BUNDLE(ofpacts);
- bundle->n_slaves = ntohs(nab->n_slaves);
+ bundle->n_members = ntohs(nab->n_members);
bundle->basis = ntohs(nab->basis);
bundle->fields = ntohs(nab->fields);
bundle->algorithm = ntohs(nab->algorithm);
- slave_type = ntohl(nab->slave_type);
- slaves_size = ntohs(nab->len) - sizeof *nab;
+ member_type = ntohl(nab->member_type);
+ members_size = ntohs(nab->len) - sizeof *nab;
error = OFPERR_OFPBAC_BAD_ARGUMENT;
if (!flow_hash_fields_valid(bundle->fields)) {
VLOG_WARN_RL(&rll, "unsupported fields %d", (int) bundle->fields);
- } else if (bundle->n_slaves > BUNDLE_MAX_SLAVES) {
- VLOG_WARN_RL(&rll, "too many slaves");
+ } else if (bundle->n_members > BUNDLE_MAX_MEMBERS) {
+ VLOG_WARN_RL(&rll, "too many members");
} else if (bundle->algorithm != NX_BD_ALG_HRW
&& bundle->algorithm != NX_BD_ALG_ACTIVE_BACKUP) {
VLOG_WARN_RL(&rll, "unsupported algorithm %d", (int) bundle->algorithm);
- } else if (slave_type != mf_nxm_header(MFF_IN_PORT)) {
- VLOG_WARN_RL(&rll, "unsupported slave type %"PRIu32, slave_type);
+ } else if (member_type != mf_nxm_header(MFF_IN_PORT)) {
+ VLOG_WARN_RL(&rll, "unsupported member type %"PRIu32, member_type);
} else {
error = 0;
}
@@ -1457,15 +1461,15 @@ decode_bundle(bool load, const struct nx_action_bundle *nab,
}
}
- if (slaves_size < bundle->n_slaves * sizeof(ovs_be16)) {
+ if (members_size < bundle->n_members * sizeof(ovs_be16)) {
VLOG_WARN_RL(&rll, "Nicira action %s only has %"PRIuSIZE" bytes "
- "allocated for slaves. %"PRIuSIZE" bytes are required "
- "for %u slaves.",
- load ? "bundle_load" : "bundle", slaves_size,
- bundle->n_slaves * sizeof(ovs_be16), bundle->n_slaves);
+ "allocated for members. %"PRIuSIZE" bytes are "
+ "required for %u members.",
+ load ? "bundle_load" : "bundle", members_size,
+ bundle->n_members * sizeof(ovs_be16), bundle->n_members);
error = OFPERR_OFPBAC_BAD_LEN;
} else {
- for (i = 0; i < bundle->n_slaves; i++) {
+ for (i = 0; i < bundle->n_members; i++) {
ofp_port_t ofp_port
= u16_to_ofp(ntohs(((ovs_be16 *)(nab + 1))[i]));
ofpbuf_put(ofpacts, &ofp_port, sizeof ofp_port);
@@ -1502,29 +1506,29 @@ encode_BUNDLE(const struct ofpact_bundle *bundle,
enum ofp_version ofp_version OVS_UNUSED,
struct ofpbuf *out)
{
- int slaves_len = ROUND_UP(2 * bundle->n_slaves, OFP_ACTION_ALIGN);
+ int members_len = ROUND_UP(2 * bundle->n_members, OFP_ACTION_ALIGN);
struct nx_action_bundle *nab;
- ovs_be16 *slaves;
+ ovs_be16 *members;
size_t i;
nab = (bundle->dst.field
? put_NXAST_BUNDLE_LOAD(out)
: put_NXAST_BUNDLE(out));
- nab->len = htons(ntohs(nab->len) + slaves_len);
+ nab->len = htons(ntohs(nab->len) + members_len);
nab->algorithm = htons(bundle->algorithm);
nab->fields = htons(bundle->fields);
nab->basis = htons(bundle->basis);
- nab->slave_type = htonl(mf_nxm_header(MFF_IN_PORT));
- nab->n_slaves = htons(bundle->n_slaves);
+ nab->member_type = htonl(mf_nxm_header(MFF_IN_PORT));
+ nab->n_members = htons(bundle->n_members);
if (bundle->dst.field) {
nab->ofs_nbits = nxm_encode_ofs_nbits(bundle->dst.ofs,
bundle->dst.n_bits);
nab->dst = htonl(nxm_header_from_mff(bundle->dst.field));
}
- slaves = ofpbuf_put_zeros(out, slaves_len);
- for (i = 0; i < bundle->n_slaves; i++) {
- slaves[i] = htons(ofp_to_u16(bundle->slaves[i]));
+ members = ofpbuf_put_zeros(out, members_len);
+ for (i = 0; i < bundle->n_members; i++) {
+ members[i] = htons(ofp_to_u16(bundle->members[i]));
}
}
@@ -3581,7 +3585,7 @@ check_STACK_POP(const struct ofpact_stack *a,
*/
struct nx_action_cnt_ids {
ovs_be16 type; /* OFPAT_VENDOR. */
- ovs_be16 len; /* Length including slaves. */
+ ovs_be16 len; /* Length including cnt_ids. */
ovs_be32 vendor; /* NX_VENDOR_ID. */
ovs_be16 subtype; /* NXAST_DEC_TTL_CNT_IDS. */
@@ -4140,6 +4144,87 @@ check_SET_TUNNEL(const struct ofpact_tunnel *a OVS_UNUSED,
return 0;
}
+/* Delete field action. */
+
+/* Action structure for DELETE_FIELD */
+struct nx_action_delete_field {
+ ovs_be16 type; /* OFPAT_VENDOR */
+ ovs_be16 len; /* Length is 24. */
+ ovs_be32 vendor; /* NX_VENDOR_ID. */
+ ovs_be16 subtype; /* NXAST_DELETE_FIELD. */
+ /* Followed by:
+ * - OXM/NXM header for field to delete (4 or 8 bytes).
+ * - Enough 0-bytes to pad out the action to 24 bytes. */
+ uint8_t pad[14];
+};
+OFP_ASSERT(sizeof(struct nx_action_delete_field ) == 24);
+
+static enum ofperr
+decode_NXAST_RAW_DELETE_FIELD(const struct nx_action_delete_field *nadf,
+ enum ofp_version ofp_version OVS_UNUSED,
+ const struct vl_mff_map *vl_mff_map,
+ uint64_t *tlv_bitmap, struct ofpbuf *out)
+{
+ struct ofpact_delete_field *delete_field;
+ enum ofperr err;
+
+ delete_field = ofpact_put_DELETE_FIELD(out);
+ delete_field->ofpact.raw = NXAST_RAW_DELETE_FIELD;
+
+ struct ofpbuf b = ofpbuf_const_initializer(nadf, ntohs(nadf->len));
+ ofpbuf_pull(&b, OBJECT_OFFSETOF(nadf, pad));
+
+ err = mf_vl_mff_nx_pull_header(&b, vl_mff_map, &delete_field->field,
+ NULL, tlv_bitmap);
+ if (err) {
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+encode_DELETE_FIELD(const struct ofpact_delete_field *delete_field,
+ enum ofp_version ofp_version OVS_UNUSED,
+ struct ofpbuf *out)
+{
+ struct nx_action_delete_field *nadf = put_NXAST_DELETE_FIELD(out);
+ size_t size = out->size;
+
+ out->size = size - sizeof nadf->pad;
+ nx_put_mff_header(out, delete_field->field, 0, false);
+ out->size = size;
+}
+
+static char * OVS_WARN_UNUSED_RESULT
+parse_DELETE_FIELD(char *arg, const struct ofpact_parse_params *pp)
+{
+ struct ofpact_delete_field *delete_field;
+
+ delete_field = ofpact_put_DELETE_FIELD(pp->ofpacts);
+ return mf_parse_field(&delete_field->field, arg);
+}
+
+static void
+format_DELETE_FIELD(const struct ofpact_delete_field *odf,
+ const struct ofpact_format_params *fp)
+{
+ ds_put_format(fp->s, "%sdelete_field:%s", colors.param,
+ colors.end);
+ ds_put_format(fp->s, "%s", odf->field->name);
+}
+
+static enum ofperr
+check_DELETE_FIELD(const struct ofpact_delete_field *odf,
+ struct ofpact_check_params *cp OVS_UNUSED)
+{
+ if (odf->field->id < MFF_TUN_METADATA0 ||
+ odf->field->id > MFF_TUN_METADATA63) {
+ return OFPERR_OFPBAC_BAD_ARGUMENT;
+ }
+ return 0;
+}
+
/* Set queue action. */
static enum ofperr
@@ -5966,6 +6051,7 @@ parse_CLONE(char *arg, const struct ofpact_parse_params *pp)
clone = pp->ofpacts->header;
if (ofpbuf_oversized(pp->ofpacts)) {
+ free(error);
return xasprintf("input too big");
}
@@ -6657,6 +6743,7 @@ parse_CT(char *arg, const struct ofpact_parse_params *pp)
}
if (ofpbuf_oversized(pp->ofpacts)) {
+ free(error);
return xasprintf("input too big");
}
@@ -7868,6 +7955,7 @@ action_set_classify(const struct ofpact *a)
case OFPACT_DEBUG_RECIRC:
case OFPACT_DEBUG_SLOW:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
return ACTION_SLOT_INVALID;
default:
@@ -8071,6 +8159,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type,
case OFPACT_DECAP:
case OFPACT_DEC_NSH_TTL:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
default:
return OVSINST_OFPIT11_APPLY_ACTIONS;
}
@@ -8982,6 +9071,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port)
case OFPACT_DECAP:
case OFPACT_DEC_NSH_TTL:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
default:
return false;
}
diff --git a/lib/ofp-connection.c b/lib/ofp-connection.c
index 23b80ff39dc42d1c8ae880410ab658f40fb3af43..3a7611b00cc93007d18890937e5e8faf7c5b9d9c 100644
--- a/lib/ofp-connection.c
+++ b/lib/ofp-connection.c
@@ -48,8 +48,8 @@ ofputil_decode_role_message(const struct ofp_header *oh,
if (orr->role != htonl(OFPCR12_ROLE_NOCHANGE) &&
orr->role != htonl(OFPCR12_ROLE_EQUAL) &&
- orr->role != htonl(OFPCR12_ROLE_MASTER) &&
- orr->role != htonl(OFPCR12_ROLE_SLAVE)) {
+ orr->role != htonl(OFPCR12_ROLE_PRIMARY) &&
+ orr->role != htonl(OFPCR12_ROLE_SECONDARY)) {
return OFPERR_OFPRRFC_BAD_ROLE;
}
@@ -68,12 +68,12 @@ ofputil_decode_role_message(const struct ofp_header *oh,
const struct nx_role_request *nrr = b.msg;
BUILD_ASSERT(NX_ROLE_OTHER + 1 == OFPCR12_ROLE_EQUAL);
- BUILD_ASSERT(NX_ROLE_MASTER + 1 == OFPCR12_ROLE_MASTER);
- BUILD_ASSERT(NX_ROLE_SLAVE + 1 == OFPCR12_ROLE_SLAVE);
+ BUILD_ASSERT(NX_ROLE_PRIMARY + 1 == OFPCR12_ROLE_PRIMARY);
+ BUILD_ASSERT(NX_ROLE_SECONDARY + 1 == OFPCR12_ROLE_SECONDARY);
if (nrr->role != htonl(NX_ROLE_OTHER) &&
- nrr->role != htonl(NX_ROLE_MASTER) &&
- nrr->role != htonl(NX_ROLE_SLAVE)) {
+ nrr->role != htonl(NX_ROLE_PRIMARY) &&
+ nrr->role != htonl(NX_ROLE_SECONDARY)) {
return OFPERR_OFPRRFC_BAD_ROLE;
}
@@ -100,11 +100,11 @@ format_role_generic(struct ds *string, enum ofp12_controller_role role,
case OFPCR12_ROLE_EQUAL:
ds_put_cstr(string, "equal"); /* OF 1.2 wording */
break;
- case OFPCR12_ROLE_MASTER:
- ds_put_cstr(string, "master");
+ case OFPCR12_ROLE_PRIMARY:
+ ds_put_cstr(string, "primary");
break;
- case OFPCR12_ROLE_SLAVE:
- ds_put_cstr(string, "slave");
+ case OFPCR12_ROLE_SECONDARY:
+ ds_put_cstr(string, "secondary");
break;
default:
OVS_NOT_REACHED();
@@ -148,8 +148,8 @@ ofputil_encode_role_reply(const struct ofp_header *request,
struct nx_role_request *nrr;
BUILD_ASSERT(NX_ROLE_OTHER == OFPCR12_ROLE_EQUAL - 1);
- BUILD_ASSERT(NX_ROLE_MASTER == OFPCR12_ROLE_MASTER - 1);
- BUILD_ASSERT(NX_ROLE_SLAVE == OFPCR12_ROLE_SLAVE - 1);
+ BUILD_ASSERT(NX_ROLE_PRIMARY == OFPCR12_ROLE_PRIMARY - 1);
+ BUILD_ASSERT(NX_ROLE_SECONDARY == OFPCR12_ROLE_SECONDARY - 1);
buf = ofpraw_alloc_reply(OFPRAW_NXT_ROLE_REPLY, request, 0);
nrr = ofpbuf_put_zeros(buf, sizeof *nrr);
@@ -197,8 +197,8 @@ ofputil_decode_role_status(const struct ofp_header *oh,
const struct ofp14_role_status *r = b.msg;
if (r->role != htonl(OFPCR12_ROLE_NOCHANGE) &&
r->role != htonl(OFPCR12_ROLE_EQUAL) &&
- r->role != htonl(OFPCR12_ROLE_MASTER) &&
- r->role != htonl(OFPCR12_ROLE_SLAVE)) {
+ r->role != htonl(OFPCR12_ROLE_PRIMARY) &&
+ r->role != htonl(OFPCR12_ROLE_SECONDARY)) {
return OFPERR_OFPRRFC_BAD_ROLE;
}
@@ -218,8 +218,8 @@ ofputil_format_role_status(struct ds *string,
ds_put_cstr(string, " reason=");
switch (rs->reason) {
- case OFPCRR_MASTER_REQUEST:
- ds_put_cstr(string, "master_request");
+ case OFPCRR_PRIMARY_REQUEST:
+ ds_put_cstr(string, "primary_request");
break;
case OFPCRR_CONFIG:
ds_put_cstr(string, "configuration_changed");
@@ -254,13 +254,13 @@ ofputil_async_msg_type_to_string(enum ofputil_async_msg_type type)
struct ofp14_async_prop {
uint64_t prop_type;
enum ofputil_async_msg_type oam;
- bool master;
+ bool primary;
uint32_t allowed10, allowed14;
};
-#define AP_PAIR(SLAVE_PROP_TYPE, OAM, A10, A14) \
- { SLAVE_PROP_TYPE, OAM, false, A10, (A14) ? (A14) : (A10) }, \
- { (SLAVE_PROP_TYPE + 1), OAM, true, A10, (A14) ? (A14) : (A10) }
+#define AP_PAIR(SECONDARY_PROP_TYPE, OAM, A10, A14) \
+ { SECONDARY_PROP_TYPE, OAM, false, A10, (A14) ? (A14) : (A10) }, \
+ { (SECONDARY_PROP_TYPE + 1), OAM, true, A10, (A14) ? (A14) : (A10) }
static const struct ofp14_async_prop async_props[] = {
AP_PAIR( 0, OAM_PACKET_IN, OFPR10_BITS, OFPR14_BITS),
@@ -288,10 +288,10 @@ get_ofp14_async_config_prop_by_prop_type(uint64_t prop_type)
static const struct ofp14_async_prop *
get_ofp14_async_config_prop_by_oam(enum ofputil_async_msg_type oam,
- bool master)
+ bool primary)
{
FOR_EACH_ASYNC_PROP (ap) {
- if (ap->oam == oam && ap->master == master) {
+ if (ap->oam == oam && ap->primary == primary) {
return ap;
}
}
@@ -310,7 +310,9 @@ encode_async_mask(const struct ofputil_async_cfg *src,
const struct ofp14_async_prop *ap,
enum ofp_version version)
{
- uint32_t mask = ap->master ? src->master[ap->oam] : src->slave[ap->oam];
+ uint32_t mask = (ap->primary
+ ? src->primary[ap->oam]
+ : src->secondary[ap->oam]);
return htonl(mask & ofp14_async_prop_allowed(ap, version));
}
@@ -342,7 +344,7 @@ decode_async_mask(ovs_be32 src,
}
}
- uint32_t *array = ap->master ? dst->master : dst->slave;
+ uint32_t *array = ap->primary ? dst->primary : dst->secondary;
array[ap->oam] = mask;
return 0;
}
@@ -362,20 +364,20 @@ parse_async_tlv(const struct ofpbuf *property,
}
if (ofpprop_is_experimenter(ap->prop_type)) {
- /* For experimenter properties, whether a property is for the master or
- * slave role is indicated by both 'type' and 'exp_type' in struct
+ /* For experimenter properties, whether a property is for the primary or
+ * secondary role is indicated by both 'type' and 'exp_type' in struct
* ofp_prop_experimenter. Check that these are consistent. */
const struct ofp_prop_experimenter *ope = property->data;
- bool should_be_master = ope->type == htons(0xffff);
- if (should_be_master != ap->master) {
+ bool should_be_primary = ope->type == htons(0xffff);
+ if (should_be_primary != ap->primary) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
VLOG_WARN_RL(&rl, "async property type %#"PRIx16" "
"indicates %s role but exp_type %"PRIu32" indicates "
"%s role",
ntohs(ope->type),
- should_be_master ? "master" : "slave",
+ should_be_primary ? "primary" : "secondary",
ntohl(ope->exp_type),
- ap->master ? "master" : "slave");
+ ap->primary ? "primary" : "secondary");
return OFPERR_OFPBPC_BAD_EXP_TYPE;
}
}
@@ -390,9 +392,9 @@ decode_legacy_async_masks(const ovs_be32 masks[2],
struct ofputil_async_cfg *dst)
{
for (int i = 0; i < 2; i++) {
- bool master = i == 0;
+ bool primary = i == 0;
const struct ofp14_async_prop *ap
- = get_ofp14_async_config_prop_by_oam(oam, master);
+ = get_ofp14_async_config_prop_by_oam(oam, primary);
decode_async_mask(masks[i], ap, version, true, dst);
}
}
@@ -479,9 +481,9 @@ encode_legacy_async_masks(const struct ofputil_async_cfg *ac,
ovs_be32 masks[2])
{
for (int i = 0; i < 2; i++) {
- bool master = i == 0;
+ bool primary = i == 0;
const struct ofp14_async_prop *ap
- = get_ofp14_async_config_prop_by_oam(oam, master);
+ = get_ofp14_async_config_prop_by_oam(oam, primary);
masks[i] = encode_async_mask(ac, ap, version);
}
}
@@ -507,11 +509,11 @@ ofputil_put_async_config__(const struct ofputil_async_cfg *ac,
encode_async_mask(ac, ap, version));
/* For experimenter properties, we need to use type 0xfffe for
- * master and 0xffff for slaves. */
+ * primary and 0xffff for secondaries. */
if (ofpprop_is_experimenter(ap->prop_type)) {
struct ofp_prop_experimenter *ope
= ofpbuf_at_assert(buf, ofs, sizeof *ope);
- ope->type = ap->master ? htons(0xffff) : htons(0xfffe);
+ ope->type = ap->primary ? htons(0xffff) : htons(0xfffe);
}
}
}
@@ -592,8 +594,8 @@ ofp_role_reason_to_string(enum ofp14_controller_role_reason reason,
char *reasonbuf, size_t bufsize)
{
switch (reason) {
- case OFPCRR_MASTER_REQUEST:
- return "master_request";
+ case OFPCRR_PRIMARY_REQUEST:
+ return "primary_request";
case OFPCRR_CONFIG:
return "configuration_changed";
@@ -664,12 +666,12 @@ ofputil_format_set_async_config(struct ds *string,
const struct ofputil_async_cfg *ac)
{
for (int i = 0; i < 2; i++) {
- ds_put_format(string, "\n %s:\n", i == 0 ? "master" : "slave");
+ ds_put_format(string, "\n %s:\n", i == 0 ? "primary" : "secondary");
for (uint32_t type = 0; type < OAM_N_TYPES; type++) {
ds_put_format(string, "%16s:",
ofputil_async_msg_type_to_string(type));
- uint32_t role = i == 0 ? ac->master[type] : ac->slave[type];
+ uint32_t role = i == 0 ? ac->primary[type] : ac->secondary[type];
for (int j = 0; j < 32; j++) {
if (role & (1u << j)) {
char reasonbuf[INT_STRLEN(int) + 1];
@@ -705,17 +707,17 @@ ofputil_async_cfg_default(enum ofp_version version)
}
struct ofputil_async_cfg oac = {
- .master[OAM_PACKET_IN] = pin,
- .master[OAM_PORT_STATUS] = OFPPR_BITS,
- .slave[OAM_PORT_STATUS] = OFPPR_BITS
+ .primary[OAM_PACKET_IN] = pin,
+ .primary[OAM_PORT_STATUS] = OFPPR_BITS,
+ .secondary[OAM_PORT_STATUS] = OFPPR_BITS
};
if (version >= OFP14_VERSION) {
- oac.master[OAM_FLOW_REMOVED] = OFPRR14_BITS;
+ oac.primary[OAM_FLOW_REMOVED] = OFPRR14_BITS;
} else if (version == OFP13_VERSION) {
- oac.master[OAM_FLOW_REMOVED] = OFPRR13_BITS;
+ oac.primary[OAM_FLOW_REMOVED] = OFPRR13_BITS;
} else {
- oac.master[OAM_FLOW_REMOVED] = OFPRR10_BITS;
+ oac.primary[OAM_FLOW_REMOVED] = OFPRR10_BITS;
}
return oac;
diff --git a/lib/ofp-ed-props.c b/lib/ofp-ed-props.c
index 28382e01235cd81c4fcfc31de2d8cc05da61c2e8..02a9235d510c97325fb0a8edb427ab1543e37e22 100644
--- a/lib/ofp-ed-props.c
+++ b/lib/ofp-ed-props.c
@@ -49,7 +49,7 @@ decode_ed_prop(const struct ofp_ed_prop_header **ofp_prop,
return OFPERR_NXBAC_BAD_ED_PROP;
}
struct ofpact_ed_prop_nsh_md_type *pnmt =
- ofpbuf_put_uninit(out, sizeof(*pnmt));
+ ofpbuf_put_zeros(out, sizeof *pnmt);
pnmt->header.prop_class = prop_class;
pnmt->header.type = prop_type;
pnmt->header.len = len;
@@ -108,6 +108,7 @@ encode_ed_prop(const struct ofpact_ed_prop **prop,
opnmt->header.len =
offsetof(struct ofp_ed_prop_nsh_md_type, pad);
opnmt->md_type = pnmt->md_type;
+ memset(opnmt->pad, 0, sizeof opnmt->pad);
prop_len = sizeof(*pnmt);
break;
}
diff --git a/lib/ofp-group.c b/lib/ofp-group.c
index b675e802c3171af3196babe90a2459fd584e5999..bf0f8af544c9a18c3a368253a63a045eb365aec3 100644
--- a/lib/ofp-group.c
+++ b/lib/ofp-group.c
@@ -660,7 +660,8 @@ parse_bucket_str(struct ofputil_bucket *bucket, char *str_,
} else if (!strcasecmp(key, "watch_port")) {
if (!ofputil_port_from_string(value, port_map, &bucket->watch_port)
|| (ofp_to_u16(bucket->watch_port) >= ofp_to_u16(OFPP_MAX)
- && bucket->watch_port != OFPP_ANY)) {
+ && bucket->watch_port != OFPP_ANY
+ && bucket->watch_port != OFPP_CONTROLLER)) {
error = xasprintf("%s: invalid watch_port", value);
}
} else if (!strcasecmp(key, "watch_group")) {
diff --git a/lib/ofp-match.c b/lib/ofp-match.c
index 2ec28f8036c028fff477d96b69e41a8c7959422c..86a082dde14161811465fd3f7b495749d7e9533f 100644
--- a/lib/ofp-match.c
+++ b/lib/ofp-match.c
@@ -65,7 +65,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask)
void
ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc)
{
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
/* Initialize most of wc. */
flow_wildcards_init_catchall(wc);
diff --git a/lib/ovs-actions.xml b/lib/ovs-actions.xml
index ab8e08b84d8b558f7c481e0cc0ade4f94d29a76b..a2778de4bcd6a51dd44b182f06202eff19794cc1 100644
--- a/lib/ovs-actions.xml
+++ b/lib/ovs-actions.xml
@@ -789,15 +789,16 @@ $ ovs-ofctl -O OpenFlow10 add-flow br0 actions=mod_nw_src:1.2.3.4
The bundle and bundle_load actions
- bundle(fields, basis, algorithm, ofport, slaves:port...)
- bundle_load(fields, basis, algorithm, ofport, dst, slaves:port...)
+ bundle(fields, basis, algorithm, ofport, members:port...)
+ bundle_load(fields, basis, algorithm, ofport, dst, members:port...)
- These actions choose a port (``slave'') from a comma-separated OpenFlow
- port list. After selecting the port, bundle
- outputs to it, whereas bundle_load writes its port number
- to dst, which must be a 16-bit or wider field or subfield in
- the syntax described under ``Field Specifications'' above.
+ These actions choose a port (a ``member'') from a
+ comma-separated OpenFlow port list. After selecting the
+ port, bundle outputs to it, whereas
+ bundle_load writes its port number to dst,
+ which must be a 16-bit or wider field or subfield in the syntax
+ described under ``Field Specifications'' above.
@@ -854,20 +855,20 @@ $ ovs-ofctl -O OpenFlow10 add-flow br0 actions=mod_nw_src:1.2.3.4
active_backup
-
- Chooses the first live port listed in slaves.
+ Chooses the first live port listed in members.
hrw (Highest Random Weight)
-
Computes the following, considering only the live ports in
- slaves:
+ members:
-for i in [1,n_slaves]:
+for i in [1,n_members]:
weights[i] = hash(flow, i)
-slave = { i such that weights[i] >= weights[j] for all j != i }
+member = { i such that weights[i] >= weights[j] for all j != i }
@@ -877,17 +878,17 @@ for i in [1,n_slaves]:
- The algorithms take port liveness into account when selecting slaves.
- The definition of whether a port is live is subject to change. It
- currently takes into account carrier status and link monitoring
- protocols such as BFD and CFM. If none of the slaves is live,
- bundle does not output the packet and
+ The algorithms take port liveness into account when selecting
+ members. The definition of whether a port is live is subject to
+ change. It currently takes into account carrier status and link
+ monitoring protocols such as BFD and CFM. If none of the members is
+ live, bundle does not output the packet and
bundle_load stores OFPP_NONE (65535) in the
output field.
- Example: bundle(eth_src,0,hrw,ofport,slaves:4,8) uses an
+ Example: bundle(eth_src,0,hrw,ofport,members:4,8) uses an
Ethernet source hash with basis 0, to select between OpenFlow ports 4
and 8 using the Highest Random Weight algorithm.
@@ -1529,7 +1530,7 @@ for i in [1,n_slaves]:
- The packet length to check againt the argument pkt_len
+ The packet length to check against the argument pkt_len
includes the L2 header and L2 payload of the packet, but not the VLAN
tag (if present).
@@ -1552,6 +1553,22 @@ for i in [1,n_slaves]:
This action was added in Open vSwitch 2.11.90.
+
+
+ The delete_field action
+ delete_field:field
+
+
+ The delete_field action deletes a field in the syntax
+ described under ``Field Specifications'' above. Currently, only
+ the tun_metadta fields are supported.
+
+
+
+ This action was added in Open vSwitch 2.13.90.
+
+
+
diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c
index ebc8120f0fd33812c02f7c9a7e3bc6fb47d59c95..cde1e925ba9481ee281564591a23023b14f37a14 100644
--- a/lib/ovs-rcu.c
+++ b/lib/ovs-rcu.c
@@ -30,6 +30,8 @@
VLOG_DEFINE_THIS_MODULE(ovs_rcu);
+#define MIN_CBS 16
+
struct ovsrcu_cb {
void (*function)(void *aux);
void *aux;
@@ -37,7 +39,8 @@ struct ovsrcu_cb {
struct ovsrcu_cbset {
struct ovs_list list_node;
- struct ovsrcu_cb cbs[16];
+ struct ovsrcu_cb *cbs;
+ size_t n_allocated;
int n_cbs;
};
@@ -310,16 +313,19 @@ ovsrcu_postpone__(void (*function)(void *aux), void *aux)
cbset = perthread->cbset;
if (!cbset) {
cbset = perthread->cbset = xmalloc(sizeof *perthread->cbset);
+ cbset->cbs = xmalloc(MIN_CBS * sizeof *cbset->cbs);
+ cbset->n_allocated = MIN_CBS;
cbset->n_cbs = 0;
}
+ if (cbset->n_cbs == cbset->n_allocated) {
+ cbset->cbs = x2nrealloc(cbset->cbs, &cbset->n_allocated,
+ sizeof *cbset->cbs);
+ }
+
cb = &cbset->cbs[cbset->n_cbs++];
cb->function = function;
cb->aux = aux;
-
- if (cbset->n_cbs >= ARRAY_SIZE(cbset->cbs)) {
- ovsrcu_flush_cbset(perthread);
- }
}
static bool
@@ -341,6 +347,7 @@ ovsrcu_call_postponed(void)
for (cb = cbset->cbs; cb < &cbset->cbs[cbset->n_cbs]; cb++) {
cb->function(cb->aux);
}
+ free(cbset->cbs);
free(cbset);
}
diff --git a/lib/ovs-router.c b/lib/ovs-router.c
index bfb2b7071bc08c582a4f42685dfcadaa269dc914..09b81c6e5a786bca57cb87747c042e23d59e5058 100644
--- a/lib/ovs-router.c
+++ b/lib/ovs-router.c
@@ -505,7 +505,7 @@ ovs_router_flush(void)
ovs_mutex_lock(&mutex);
classifier_defer(&cls);
CLS_FOR_EACH(rt, cr, &cls) {
- if (rt->priority == rt->plen) {
+ if (rt->priority == rt->plen || rt->local) {
rt_entry_delete__(&rt->cr);
}
}
diff --git a/lib/ovsdb-data.c b/lib/ovsdb-data.c
index 4828624f658d38ccd6aaae8e42509264ddbca408..c145f5ad97221c1a3f6cfc8909dd6bc3b14d4cf6 100644
--- a/lib/ovsdb-data.c
+++ b/lib/ovsdb-data.c
@@ -1017,6 +1017,10 @@ static void
free_data(enum ovsdb_atomic_type type,
union ovsdb_atom *atoms, size_t n_atoms)
{
+ if (!atoms) {
+ return;
+ }
+
if (ovsdb_atom_needs_destruction(type)) {
unsigned int i;
for (i = 0; i < n_atoms; i++) {
diff --git a/lib/ovsdb-idl-provider.h b/lib/ovsdb-idl-provider.h
index 30d1d08ebac1b952bd6216ce53541b9e973fc650..00497d940c324864c3bd3895faaa0f11bfcaa03e 100644
--- a/lib/ovsdb-idl-provider.h
+++ b/lib/ovsdb-idl-provider.h
@@ -122,8 +122,12 @@ struct ovsdb_idl_table {
unsigned int change_seqno[OVSDB_IDL_CHANGE_MAX];
struct ovs_list indexes; /* Contains "struct ovsdb_idl_index"s */
struct ovs_list track_list; /* Tracked rows (ovsdb_idl_row.track_node). */
- struct ovsdb_idl_condition condition;
- bool cond_changed;
+ struct ovsdb_idl_condition *ack_cond; /* Last condition acked by the
+ * server. */
+ struct ovsdb_idl_condition *req_cond; /* Last condition requested to the
+ * server. */
+ struct ovsdb_idl_condition *new_cond; /* Latest condition set by the IDL
+ * client. */
};
struct ovsdb_idl_class {
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 190143f3638426c752ee28cbac70cd680b2b27a9..ee2fbfa3f2244af07e62e3c1927ecfe8a4640182 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -221,7 +221,7 @@ struct ovsdb_idl_db {
struct uuid last_id;
};
-static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *);
+static void ovsdb_idl_db_track_clear(struct ovsdb_idl_db *, bool flush_all);
static void ovsdb_idl_db_add_column(struct ovsdb_idl_db *,
const struct ovsdb_idl_column *);
static void ovsdb_idl_db_omit(struct ovsdb_idl_db *,
@@ -240,6 +240,10 @@ static void ovsdb_idl_send_monitor_request(struct ovsdb_idl *,
struct ovsdb_idl_db *,
enum ovsdb_idl_monitor_method);
static void ovsdb_idl_db_clear(struct ovsdb_idl_db *db);
+static void ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db);
+static void ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db);
+static void ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst,
+ struct ovsdb_idl_condition **src);
struct ovsdb_idl {
struct ovsdb_idl_db server;
@@ -317,14 +321,19 @@ static bool ovsdb_idl_handle_monitor_canceled(struct ovsdb_idl *,
static void ovsdb_idl_db_parse_update(struct ovsdb_idl_db *,
const struct json *table_updates,
enum ovsdb_idl_monitor_method method);
-static bool ovsdb_idl_process_update(struct ovsdb_idl_table *,
- const struct uuid *,
- const struct json *old,
- const struct json *new);
-static bool ovsdb_idl_process_update2(struct ovsdb_idl_table *,
- const struct uuid *,
- const char *operation,
- const struct json *row);
+enum update_result {
+ OVSDB_IDL_UPDATE_DB_CHANGED,
+ OVSDB_IDL_UPDATE_NO_CHANGES,
+ OVSDB_IDL_UPDATE_INCONSISTENT,
+};
+static enum update_result ovsdb_idl_process_update(struct ovsdb_idl_table *,
+ const struct uuid *,
+ const struct json *old,
+ const struct json *new);
+static enum update_result ovsdb_idl_process_update2(struct ovsdb_idl_table *,
+ const struct uuid *,
+ const char *operation,
+ const struct json *row);
static void ovsdb_idl_insert_row(struct ovsdb_idl_row *, const struct json *);
static void ovsdb_idl_delete_row(struct ovsdb_idl_row *);
static bool ovsdb_idl_modify_row(struct ovsdb_idl_row *, const struct json *);
@@ -385,6 +394,8 @@ static void ovsdb_idl_send_cond_change(struct ovsdb_idl *idl);
static void ovsdb_idl_destroy_indexes(struct ovsdb_idl_table *);
static void ovsdb_idl_add_to_indexes(const struct ovsdb_idl_row *);
static void ovsdb_idl_remove_from_indexes(const struct ovsdb_idl_row *);
+static int ovsdb_idl_try_commit_loop_txn(struct ovsdb_idl_loop *loop,
+ bool *may_need_wakeup);
static void
ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class,
@@ -422,9 +433,11 @@ ovsdb_idl_db_init(struct ovsdb_idl_db *db, const struct ovsdb_idl_class *class,
= table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
= table->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
table->db = db;
- ovsdb_idl_condition_init(&table->condition);
- ovsdb_idl_condition_add_clause_true(&table->condition);
- table->cond_changed = false;
+ table->ack_cond = NULL;
+ table->req_cond = NULL;
+ table->new_cond = xmalloc(sizeof *table->new_cond);
+ ovsdb_idl_condition_init(table->new_cond);
+ ovsdb_idl_condition_add_clause_true(table->new_cond);
}
db->monitor_id = json_array_create_2(json_string_create("monid"),
json_string_create(class->database));
@@ -553,15 +566,28 @@ ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *idl, bool shuffle)
idl->shuffle_remotes = shuffle;
}
+/* Reset min_index to 0. This prevents a situation where the client
+ * thinks all databases have stale data, when they actually have all
+ * been destroyed and rebuilt from scratch.
+ */
+void
+ovsdb_idl_reset_min_index(struct ovsdb_idl *idl)
+{
+ idl->min_index = 0;
+}
+
static void
ovsdb_idl_db_destroy(struct ovsdb_idl_db *db)
{
+ struct ovsdb_idl_condition *null_cond = NULL;
ovs_assert(!db->txn);
ovsdb_idl_db_txn_abort_all(db);
ovsdb_idl_db_clear(db);
for (size_t i = 0; i < db->class_->n_tables; i++) {
struct ovsdb_idl_table *table = &db->tables[i];
- ovsdb_idl_condition_destroy(&table->condition);
+ ovsdb_idl_condition_move(&table->ack_cond, &null_cond);
+ ovsdb_idl_condition_move(&table->req_cond, &null_cond);
+ ovsdb_idl_condition_move(&table->new_cond, &null_cond);
ovsdb_idl_destroy_indexes(table);
shash_destroy(&table->columns);
hmap_destroy(&table->rows);
@@ -591,6 +617,14 @@ ovsdb_idl_destroy(struct ovsdb_idl *idl)
}
}
+/* By default, or if 'leader_only' is true, when 'idl' connects to a clustered
+ * database, the IDL will avoid servers other than the cluster leader. This
+ * ensures that any data that it reads and reports is up-to-date. If
+ * 'leader_only' is false, the IDL will accept any server in the cluster, which
+ * means that for read-only transactions it can report and act on stale data
+ * (transactions that modify the database are always serialized even with false
+ * 'leader_only'). Refer to Understanding Cluster Consistency in ovsdb(7) for
+ * more information. */
void
ovsdb_idl_set_leader_only(struct ovsdb_idl *idl, bool leader_only)
{
@@ -610,7 +644,6 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
struct ovsdb_idl_table *table = &db->tables[i];
struct ovsdb_idl_row *row, *next_row;
- table->cond_changed = false;
if (hmap_is_empty(&table->rows)) {
continue;
}
@@ -634,9 +667,8 @@ ovsdb_idl_db_clear(struct ovsdb_idl_db *db)
}
ovsdb_idl_row_destroy_postprocess(db);
- db->cond_changed = false;
db->cond_seqno = 0;
- ovsdb_idl_db_track_clear(db);
+ ovsdb_idl_db_track_clear(db, true);
if (changed) {
db->change_seqno++;
@@ -686,12 +718,20 @@ ovsdb_idl_send_request(struct ovsdb_idl *idl, struct jsonrpc_msg *request)
idl->request_id = json_clone(request->id);
if (idl->session) {
jsonrpc_session_send(idl->session, request);
+ } else {
+ jsonrpc_msg_destroy(request);
}
}
static void
ovsdb_idl_restart_fsm(struct ovsdb_idl *idl)
{
+ /* Resync data DB table conditions to avoid missing updates due to
+ * conditions that were in flight or changed locally while the connection
+ * was down.
+ */
+ ovsdb_idl_db_sync_condition(&idl->data);
+
ovsdb_idl_send_schema_request(idl, &idl->server);
ovsdb_idl_transition(idl, IDL_S_SERVER_SCHEMA_REQUESTED);
idl->data.monitoring = OVSDB_IDL_NOT_MONITORING;
@@ -789,9 +829,6 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg)
ovsdb_idl_db_parse_monitor_reply(&idl->data, msg->result,
OVSDB_IDL_MM_MONITOR);
idl->data.change_seqno++;
- ovsdb_idl_clear(idl);
- ovsdb_idl_db_parse_update(&idl->data, msg->result,
- OVSDB_IDL_MM_MONITOR);
break;
case IDL_S_MONITORING:
@@ -799,7 +836,9 @@ ovsdb_idl_process_response(struct ovsdb_idl *idl, struct jsonrpc_msg *msg)
* do, it's a "monitor_cond_change", which means that the conditional
* monitor clauses were updated.
*
- * If further condition changes were pending, send them now. */
+ * Mark the last requested conditions as acked and if further
+ * condition changes were pending, send them now. */
+ ovsdb_idl_db_ack_condition(&idl->data);
ovsdb_idl_send_cond_change(idl);
idl->data.cond_seqno++;
break;
@@ -1495,17 +1534,34 @@ ovsdb_idl_condition_equals(const struct ovsdb_idl_condition *a,
}
static void
-ovsdb_idl_condition_clone(struct ovsdb_idl_condition *dst,
+ovsdb_idl_condition_clone(struct ovsdb_idl_condition **dst,
const struct ovsdb_idl_condition *src)
{
- ovsdb_idl_condition_init(dst);
+ if (*dst) {
+ ovsdb_idl_condition_destroy(*dst);
+ } else {
+ *dst = xmalloc(sizeof **dst);
+ }
+ ovsdb_idl_condition_init(*dst);
- dst->is_true = src->is_true;
+ (*dst)->is_true = src->is_true;
const struct ovsdb_idl_clause *clause;
HMAP_FOR_EACH (clause, hmap_node, &src->clauses) {
- ovsdb_idl_condition_add_clause__(dst, clause, clause->hmap_node.hash);
+ ovsdb_idl_condition_add_clause__(*dst, clause, clause->hmap_node.hash);
+ }
+}
+
+static void
+ovsdb_idl_condition_move(struct ovsdb_idl_condition **dst,
+ struct ovsdb_idl_condition **src)
+{
+ if (*dst) {
+ ovsdb_idl_condition_destroy(*dst);
+ free(*dst);
}
+ *dst = *src;
+ *src = NULL;
}
static unsigned int
@@ -1513,17 +1569,31 @@ ovsdb_idl_db_set_condition(struct ovsdb_idl_db *db,
const struct ovsdb_idl_table_class *tc,
const struct ovsdb_idl_condition *condition)
{
+ struct ovsdb_idl_condition *table_cond;
struct ovsdb_idl_table *table = ovsdb_idl_db_table_from_class(db, tc);
- unsigned int seqno = db->cond_seqno;
- if (!ovsdb_idl_condition_equals(condition, &table->condition)) {
- ovsdb_idl_condition_destroy(&table->condition);
- ovsdb_idl_condition_clone(&table->condition, condition);
- db->cond_changed = table->cond_changed = true;
+ unsigned int curr_seqno = db->cond_seqno;
+
+ /* Compare the new condition to the last known condition which can be
+ * either "new" (not sent yet), "requested" or "acked", in this order.
+ */
+ if (table->new_cond) {
+ table_cond = table->new_cond;
+ } else if (table->req_cond) {
+ table_cond = table->req_cond;
+ } else {
+ table_cond = table->ack_cond;
+ }
+ ovs_assert(table_cond);
+
+ if (!ovsdb_idl_condition_equals(condition, table_cond)) {
+ ovsdb_idl_condition_clone(&table->new_cond, condition);
+ db->cond_changed = true;
poll_immediate_wake();
- return seqno + 1;
}
- return seqno;
+ /* Conditions will be up to date when we receive replies for already
+ * requested and new conditions, if any. */
+ return curr_seqno + (table->new_cond ? 1 : 0) + (table->req_cond ? 1 : 0);
}
/* Sets the replication condition for 'tc' in 'idl' to 'condition' and
@@ -1563,9 +1633,8 @@ ovsdb_idl_condition_to_json(const struct ovsdb_idl_condition *cnd)
}
static struct json *
-ovsdb_idl_create_cond_change_req(struct ovsdb_idl_table *table)
+ovsdb_idl_create_cond_change_req(const struct ovsdb_idl_condition *cond)
{
- const struct ovsdb_idl_condition *cond = &table->condition;
struct json *monitor_cond_change_request = json_object_create();
struct json *cond_json = ovsdb_idl_condition_to_json(cond);
@@ -1585,8 +1654,12 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
for (size_t i = 0; i < db->class_->n_tables; i++) {
struct ovsdb_idl_table *table = &db->tables[i];
- if (table->cond_changed) {
- struct json *req = ovsdb_idl_create_cond_change_req(table);
+ /* Always use the most recent conditions set by the IDL client when
+ * requesting monitor_cond_change, i.e., table->new_cond.
+ */
+ if (table->new_cond) {
+ struct json *req =
+ ovsdb_idl_create_cond_change_req(table->new_cond);
if (req) {
if (!monitor_cond_change_requests) {
monitor_cond_change_requests = json_object_create();
@@ -1595,7 +1668,11 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
table->class_->name,
json_array_create_1(req));
}
- table->cond_changed = false;
+ /* Mark the new condition as requested by moving it to req_cond.
+ * If there's already requested condition that's a bug.
+ */
+ ovs_assert(table->req_cond == NULL);
+ ovsdb_idl_condition_move(&table->req_cond, &table->new_cond);
}
}
@@ -1610,6 +1687,73 @@ ovsdb_idl_db_compose_cond_change(struct ovsdb_idl_db *db)
return jsonrpc_create_request("monitor_cond_change", params, NULL);
}
+/* Marks all requested table conditions in 'db' as acked by the server.
+ * It should be called when the server replies to monitor_cond_change
+ * requests.
+ */
+static void
+ovsdb_idl_db_ack_condition(struct ovsdb_idl_db *db)
+{
+ for (size_t i = 0; i < db->class_->n_tables; i++) {
+ struct ovsdb_idl_table *table = &db->tables[i];
+
+ if (table->req_cond) {
+ ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond);
+ }
+ }
+}
+
+/* Should be called when the IDL fsm is restarted and resyncs table conditions
+ * based on the state the DB is in:
+ * - if a non-zero last_id is available for the DB then upon reconnect
+ * the IDL should first request acked conditions to avoid missing updates
+ * about records that were added before the transaction with
+ * txn-id == last_id. If there were requested condition changes in flight
+ * (i.e., req_cond not NULL) and the IDL client didn't set new conditions
+ * (i.e., new_cond is NULL) then move req_cond to new_cond to trigger a
+ * follow up monitor_cond_change request.
+ * - if there's no last_id available for the DB then it's safe to use the
+ * latest conditions set by the IDL client even if they weren't acked yet.
+ */
+static void
+ovsdb_idl_db_sync_condition(struct ovsdb_idl_db *db)
+{
+ bool ack_all = uuid_is_zero(&db->last_id);
+
+ db->cond_changed = false;
+ for (size_t i = 0; i < db->class_->n_tables; i++) {
+ struct ovsdb_idl_table *table = &db->tables[i];
+
+ /* When monitor_cond_since requests will be issued, the
+ * table->ack_cond condition will be added to the "where" clause".
+ * Follow up monitor_cond_change requests will use table->new_cond.
+ */
+ if (ack_all) {
+ if (table->new_cond) {
+ ovsdb_idl_condition_move(&table->req_cond, &table->new_cond);
+ }
+
+ if (table->req_cond) {
+ ovsdb_idl_condition_move(&table->ack_cond, &table->req_cond);
+ }
+ } else {
+ /* If there was no "unsent" condition but instead a
+ * monitor_cond_change request was in flight, move table->req_cond
+ * to table->new_cond and set db->cond_changed to trigger a new
+ * monitor_cond_change request.
+ *
+ * However, if a new condition has been set by the IDL client,
+ * monitor_cond_change will be sent anyway and will use the most
+ * recent table->new_cond so there's no need to update it here.
+ */
+ if (table->req_cond && !table->new_cond) {
+ ovsdb_idl_condition_move(&table->new_cond, &table->req_cond);
+ db->cond_changed = true;
+ }
+ }
+ }
+}
+
static void
ovsdb_idl_send_cond_change(struct ovsdb_idl *idl)
{
@@ -1753,29 +1897,37 @@ ovsdb_idl_track_is_set(struct ovsdb_idl_table *table)
}
/* Returns the first tracked row in table with class 'table_class'
- * for the specified 'idl'. Returns NULL if there are no tracked rows */
+ * for the specified 'idl'. Returns NULL if there are no tracked rows.
+ * Pure orphan rows, i.e. rows that never had any datum, are skipped. */
const struct ovsdb_idl_row *
ovsdb_idl_track_get_first(const struct ovsdb_idl *idl,
const struct ovsdb_idl_table_class *table_class)
{
struct ovsdb_idl_table *table
= ovsdb_idl_db_table_from_class(&idl->data, table_class);
+ struct ovsdb_idl_row *row;
- if (!ovs_list_is_empty(&table->track_list)) {
- return CONTAINER_OF(ovs_list_front(&table->track_list), struct ovsdb_idl_row, track_node);
+ LIST_FOR_EACH (row, track_node, &table->track_list) {
+ if (!ovsdb_idl_row_is_orphan(row) || row->tracked_old_datum) {
+ return row;
+ }
}
return NULL;
}
/* Returns the next tracked row in table after the specified 'row'
- * (in no particular order). Returns NULL if there are no tracked rows */
+ * (in no particular order). Returns NULL if there are no tracked rows.
+ * Pure orphan rows, i.e. rows that never had any datum, are skipped.*/
const struct ovsdb_idl_row *
ovsdb_idl_track_get_next(const struct ovsdb_idl_row *row)
{
- if (row->track_node.next != &row->table->track_list) {
- return CONTAINER_OF(row->track_node.next, struct ovsdb_idl_row, track_node);
- }
+ struct ovsdb_idl_table *table = row->table;
+ LIST_FOR_EACH_CONTINUE (row, track_node, &table->track_list) {
+ if (!ovsdb_idl_row_is_orphan(row) || row->tracked_old_datum) {
+ return row;
+ }
+ }
return NULL;
}
@@ -1808,7 +1960,7 @@ ovsdb_idl_track_is_updated(const struct ovsdb_idl_row *row,
* loop when it is ready to do ovsdb_idl_run() again.
*/
static void
-ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
+ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db, bool flush_all)
{
size_t i;
@@ -1823,19 +1975,39 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
free(row->updated);
row->updated = NULL;
}
+
+ row->change_seqno[OVSDB_IDL_CHANGE_INSERT] =
+ row->change_seqno[OVSDB_IDL_CHANGE_MODIFY] =
+ row->change_seqno[OVSDB_IDL_CHANGE_DELETE] = 0;
+
ovs_list_remove(&row->track_node);
ovs_list_init(&row->track_node);
- if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) {
+ if (ovsdb_idl_row_is_orphan(row)) {
ovsdb_idl_row_unparse(row);
- const struct ovsdb_idl_table_class *class =
- row->table->class_;
- for (size_t c = 0; c < class->n_columns; c++) {
- ovsdb_datum_destroy(&row->tracked_old_datum[c],
- &class->columns[c].type);
+ if (row->tracked_old_datum) {
+ const struct ovsdb_idl_table_class *class =
+ row->table->class_;
+ for (size_t c = 0; c < class->n_columns; c++) {
+ ovsdb_datum_destroy(&row->tracked_old_datum[c],
+ &class->columns[c].type);
+ }
+ free(row->tracked_old_datum);
+ row->tracked_old_datum = NULL;
+ }
+
+ /* Rows that were reused as orphan after being processed
+ * for deletion are still in the table hmap and will be
+ * cleaned up when their src arcs are removed. These rows
+ * will not be reported anymore as "deleted" to IDL
+ * clients.
+ *
+ * The exception is when 'destroy' is explicitly set to
+ * 'true' which usually happens when the complete IDL
+ * contents are being flushed.
+ */
+ if (flush_all || ovs_list_is_empty(&row->dst_arcs)) {
+ free(row);
}
- free(row->tracked_old_datum);
- row->tracked_old_datum = NULL;
- free(row);
}
}
}
@@ -1850,7 +2022,7 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
void
ovsdb_idl_track_clear(struct ovsdb_idl *idl)
{
- ovsdb_idl_db_track_clear(&idl->data);
+ ovsdb_idl_db_track_clear(&idl->data, false);
}
static void
@@ -2064,13 +2236,15 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db,
monitor_request = json_object_create();
json_object_put(monitor_request, "columns", columns);
- const struct ovsdb_idl_condition *cond = &table->condition;
+ /* Always use acked conditions when requesting
+ * monitor_cond/monitor_cond_since.
+ */
+ const struct ovsdb_idl_condition *cond = table->ack_cond;
if ((monitor_method == OVSDB_IDL_MM_MONITOR_COND ||
monitor_method == OVSDB_IDL_MM_MONITOR_COND_SINCE) &&
- !ovsdb_idl_condition_is_true(cond)) {
+ cond && !ovsdb_idl_condition_is_true(cond)) {
json_object_put(monitor_request, "where",
ovsdb_idl_condition_to_json(cond));
- table->cond_changed = false;
}
json_object_put(monitor_requests, tc->name,
json_array_create_1(monitor_request));
@@ -2078,8 +2252,6 @@ ovsdb_idl_send_monitor_request(struct ovsdb_idl *idl, struct ovsdb_idl_db *db,
}
free_schema(schema);
- db->cond_changed = false;
-
struct json *params = json_array_create_3(
json_string_create(db->class_->database),
json_clone(db->monitor_id),
@@ -2287,6 +2459,7 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
version_suffix, table->class_->name);
}
SHASH_FOR_EACH (table_node, json_object(table_update)) {
+ enum update_result result = OVSDB_IDL_UPDATE_NO_CHANGES;
const struct json *row_update = table_node->data;
struct uuid uuid;
@@ -2319,13 +2492,13 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
operation = ops[i];
row = shash_find_data(json_object(row_update), operation);
- if (row) {
- if (ovsdb_idl_process_update2(table, &uuid, operation,
- row)) {
- db->change_seqno++;
- }
- break;
+ if (!row) {
+ continue;
}
+
+ result = ovsdb_idl_process_update2(table, &uuid,
+ operation, row);
+ break;
}
/* row_update2 should contain one of the objects */
@@ -2356,10 +2529,24 @@ ovsdb_idl_db_parse_update__(struct ovsdb_idl_db *db,
"and \"new\" members");
}
- if (ovsdb_idl_process_update(table, &uuid, old_json,
- new_json)) {
- db->change_seqno++;
- }
+ result = ovsdb_idl_process_update(table, &uuid, old_json,
+ new_json);
+ }
+
+ switch (result) {
+ case OVSDB_IDL_UPDATE_DB_CHANGED:
+ db->change_seqno++;
+ break;
+ case OVSDB_IDL_UPDATE_NO_CHANGES:
+ break;
+ case OVSDB_IDL_UPDATE_INCONSISTENT:
+ memset(&db->last_id, 0, sizeof db->last_id);
+ ovsdb_idl_retry(db->idl);
+ return ovsdb_error(NULL,
+ " received for inconsistent "
+ "IDL: reconnecting IDL and resync all "
+ "data",
+ version_suffix);
}
}
}
@@ -2392,9 +2579,22 @@ ovsdb_idl_get_row(struct ovsdb_idl_table *table, const struct uuid *uuid)
return NULL;
}
-/* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
- * otherwise. */
-static bool
+/* Returns OVSDB_IDL_UPDATE_DB_CHANGED if a column with mode
+ * OVSDB_IDL_MODE_RW changed.
+ *
+ * Some IDL inconsistencies can be detected when processing updates:
+ * - trying to insert an already existing row
+ * - trying to update a missing row
+ * - trying to delete a non existent row
+ *
+ * In such cases OVSDB_IDL_UPDATE_INCONSISTENT is returned.
+ * Even though the IDL client could recover, it's best to report the
+ * inconsistent state because the state the server is in is unknown so the
+ * safest thing to do is to retry (potentially connecting to a new server).
+ *
+ * Returns OVSDB_IDL_UPDATE_NO_CHANGES otherwise.
+ */
+static enum update_result
ovsdb_idl_process_update(struct ovsdb_idl_table *table,
const struct uuid *uuid, const struct json *old,
const struct json *new)
@@ -2408,10 +2608,10 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table,
/* XXX perhaps we should check the 'old' values? */
ovsdb_idl_delete_row(row);
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" "
- "from table %s",
- UUID_ARGS(uuid), table->class_->name);
- return false;
+ VLOG_ERR_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" "
+ "from table %s",
+ UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else if (!old) {
/* Insert row. */
@@ -2420,35 +2620,50 @@ ovsdb_idl_process_update(struct ovsdb_idl_table *table,
} else if (ovsdb_idl_row_is_orphan(row)) {
ovsdb_idl_insert_row(row, new);
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to "
- "table %s", UUID_ARGS(uuid), table->class_->name);
- return ovsdb_idl_modify_row(row, new);
+ VLOG_ERR_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to "
+ "table %s", UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else {
/* Modify row. */
if (row) {
/* XXX perhaps we should check the 'old' values? */
if (!ovsdb_idl_row_is_orphan(row)) {
- return ovsdb_idl_modify_row(row, new);
+ return ovsdb_idl_modify_row(row, new)
+ ? OVSDB_IDL_UPDATE_DB_CHANGED
+ : OVSDB_IDL_UPDATE_NO_CHANGES;
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
- "referenced row "UUID_FMT" in table %s",
- UUID_ARGS(uuid), table->class_->name);
- ovsdb_idl_insert_row(row, new);
+ VLOG_ERR_RL(&semantic_rl, "cannot modify missing but "
+ "referenced row "UUID_FMT" in table %s",
+ UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
- "in table %s", UUID_ARGS(uuid), table->class_->name);
- ovsdb_idl_insert_row(ovsdb_idl_row_create(table, uuid), new);
+ VLOG_ERR_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
+ "in table %s", UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
}
- return true;
+ return OVSDB_IDL_UPDATE_DB_CHANGED;
}
-/* Returns true if a column with mode OVSDB_IDL_MODE_RW changed, false
- * otherwise. */
-static bool
+/* Returns OVSDB_IDL_UPDATE_DB_CHANGED if a column with mode
+ * OVSDB_IDL_MODE_RW changed.
+ *
+ * Some IDL inconsistencies can be detected when processing updates:
+ * - trying to insert an already existing row
+ * - trying to update a missing row
+ * - trying to delete a non existent row
+ *
+ * In such cases OVSDB_IDL_UPDATE_INCONSISTENT is returned.
+ * Even though the IDL client could recover, it's best to report the
+ * inconsistent state because the state the server is in is unknown so the
+ * safest thing to do is to retry (potentially connecting to a new server).
+ *
+ * Otherwise OVSDB_IDL_UPDATE_NO_CHANGES is returned.
+ */
+static enum update_result
ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
const struct uuid *uuid,
const char *operation,
@@ -2462,10 +2677,10 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
if (row && !ovsdb_idl_row_is_orphan(row)) {
ovsdb_idl_delete_row(row);
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" "
- "from table %s",
- UUID_ARGS(uuid), table->class_->name);
- return false;
+ VLOG_ERR_RL(&semantic_rl, "cannot delete missing row "UUID_FMT" "
+ "from table %s",
+ UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else if (!strcmp(operation, "insert") || !strcmp(operation, "initial")) {
/* Insert row. */
@@ -2474,52 +2689,56 @@ ovsdb_idl_process_update2(struct ovsdb_idl_table *table,
} else if (ovsdb_idl_row_is_orphan(row)) {
ovsdb_idl_insert_row(row, json_row);
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to "
- "table %s", UUID_ARGS(uuid), table->class_->name);
- ovsdb_idl_delete_row(row);
- ovsdb_idl_insert_row(row, json_row);
+ VLOG_ERR_RL(&semantic_rl, "cannot add existing row "UUID_FMT" to "
+ "table %s", UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else if (!strcmp(operation, "modify")) {
/* Modify row. */
if (row) {
if (!ovsdb_idl_row_is_orphan(row)) {
- return ovsdb_idl_modify_row_by_diff(row, json_row);
+ return ovsdb_idl_modify_row_by_diff(row, json_row)
+ ? OVSDB_IDL_UPDATE_DB_CHANGED
+ : OVSDB_IDL_UPDATE_NO_CHANGES;
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot modify missing but "
- "referenced row "UUID_FMT" in table %s",
- UUID_ARGS(uuid), table->class_->name);
- return false;
+ VLOG_ERR_RL(&semantic_rl, "cannot modify missing but "
+ "referenced row "UUID_FMT" in table %s",
+ UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else {
- VLOG_WARN_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
- "in table %s", UUID_ARGS(uuid), table->class_->name);
- return false;
+ VLOG_ERR_RL(&semantic_rl, "cannot modify missing row "UUID_FMT" "
+ "in table %s", UUID_ARGS(uuid), table->class_->name);
+ return OVSDB_IDL_UPDATE_INCONSISTENT;
}
} else {
- VLOG_WARN_RL(&semantic_rl, "unknown operation %s to "
- "table %s", operation, table->class_->name);
- return false;
+ VLOG_ERR_RL(&semantic_rl, "unknown operation %s to "
+ "table %s", operation, table->class_->name);
+ return OVSDB_IDL_UPDATE_NO_CHANGES;
}
- return true;
+ return OVSDB_IDL_UPDATE_DB_CHANGED;
}
-/* Recursively add rows to tracked change lists for current row
- * and the rows that reference this row. */
+/* Recursively add rows to tracked change lists for all rows that reference
+ 'row'. */
static void
add_tracked_change_for_references(struct ovsdb_idl_row *row)
{
- if (ovs_list_is_empty(&row->track_node) &&
- ovsdb_idl_track_is_set(row->table)) {
- ovs_list_push_back(&row->table->track_list,
- &row->track_node);
- row->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
- = row->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
- = row->table->db->change_seqno + 1;
-
- const struct ovsdb_idl_arc *arc;
- LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
- add_tracked_change_for_references(arc->src);
+ const struct ovsdb_idl_arc *arc;
+ LIST_FOR_EACH (arc, dst_node, &row->dst_arcs) {
+ struct ovsdb_idl_row *ref = arc->src;
+
+ if (ovs_list_is_empty(&ref->track_node) &&
+ ovsdb_idl_track_is_set(ref->table)) {
+ ovs_list_push_back(&ref->table->track_list,
+ &ref->track_node);
+
+ ref->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
+ = ref->table->change_seqno[OVSDB_IDL_CHANGE_MODIFY]
+ = ref->table->db->change_seqno + 1;
+
+ add_tracked_change_for_references(ref);
}
}
}
@@ -2587,7 +2806,14 @@ ovsdb_idl_row_change__(struct ovsdb_idl_row *row, const struct json *row_json,
row->change_seqno[change]
= row->table->change_seqno[change]
= row->table->db->change_seqno + 1;
+
if (table->modes[column_idx] & OVSDB_IDL_TRACK) {
+ if (ovs_list_is_empty(&row->track_node) &&
+ ovsdb_idl_track_is_set(row->table)) {
+ ovs_list_push_back(&row->table->track_list,
+ &row->track_node);
+ }
+
add_tracked_change_for_references(row);
if (!row->updated) {
row->updated = bitmap_allocate(class->n_columns);
@@ -3021,7 +3247,7 @@ ovsdb_idl_row_clear_old(struct ovsdb_idl_row *row)
{
ovs_assert(row->old_datum == row->new_datum);
if (!ovsdb_idl_row_is_orphan(row)) {
- if (ovsdb_idl_track_is_set(row->table)) {
+ if (ovsdb_idl_track_is_set(row->table) && !row->tracked_old_datum) {
row->tracked_old_datum = row->old_datum;
} else {
const struct ovsdb_idl_table_class *class = row->table->class_;
@@ -4273,8 +4499,10 @@ ovsdb_idl_txn_commit(struct ovsdb_idl_txn *txn)
if (!any_updates) {
txn->status = TXN_UNCHANGED;
json_destroy(operations);
- } else if (txn->db->idl->session
- && !jsonrpc_session_send(
+ } else if (!txn->db->idl->session) {
+ txn->status = TXN_TRY_AGAIN;
+ json_destroy(operations);
+ } else if (!jsonrpc_session_send(
txn->db->idl->session,
jsonrpc_create_request(
"transact", operations, &txn->request_id))) {
@@ -4663,6 +4891,7 @@ ovsdb_idl_txn_insert(struct ovsdb_idl_txn *txn,
hmap_insert(&row->table->rows, &row->hmap_node, uuid_hash(&row->uuid));
hmap_insert(&txn->txn_rows, &row->txn_node, uuid_hash(&row->uuid));
ovsdb_idl_add_to_indexes(row);
+
return row;
}
@@ -4981,6 +5210,8 @@ ovsdb_idl_set_lock(struct ovsdb_idl *idl, const char *lock_name)
}
if (idl->session) {
jsonrpc_session_send(idl->session, msg);
+ } else {
+ jsonrpc_msg_destroy(msg);
}
}
}
@@ -5331,13 +5562,103 @@ struct ovsdb_idl_txn *
ovsdb_idl_loop_run(struct ovsdb_idl_loop *loop)
{
ovsdb_idl_run(loop->idl);
+
+ /* See if we can commit the loop->committing_txn. */
+ if (loop->committing_txn) {
+ ovsdb_idl_try_commit_loop_txn(loop, NULL);
+ }
+
loop->open_txn = (loop->committing_txn
|| ovsdb_idl_get_seqno(loop->idl) == loop->skip_seqno
? NULL
: ovsdb_idl_txn_create(loop->idl));
+ if (loop->open_txn) {
+ ovsdb_idl_txn_add_comment(loop->open_txn, "%s", program_name);
+ }
return loop->open_txn;
}
+/* Attempts to commit the current transaction, if one is open.
+ *
+ * If a transaction was open, in this or a previous iteration of the main loop,
+ * and had not before finished committing (successfully or unsuccessfully), the
+ * return value is one of:
+ *
+ * 1: The transaction committed successfully (or it did not change anything in
+ * the database).
+ * 0: The transaction failed.
+ * -1: The commit is still in progress.
+ *
+ * Thus, the return value is -1 if the transaction is in progress and otherwise
+ * true for success, false for failure.
+ *
+ * (In the corner case where the IDL sends a transaction to the database and
+ * the database commits it, and the connection between the IDL and the database
+ * drops before the IDL receives the message confirming the commit, this
+ * function can return 0 even though the transaction succeeded.)
+ */
+static int
+ovsdb_idl_try_commit_loop_txn(struct ovsdb_idl_loop *loop,
+ bool *may_need_wakeup)
+{
+ if (!loop->committing_txn) {
+ /* Not a meaningful return value: no transaction was in progress. */
+ return 1;
+ }
+
+ int retval;
+ struct ovsdb_idl_txn *txn = loop->committing_txn;
+
+ enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(txn);
+ if (status != TXN_INCOMPLETE) {
+ switch (status) {
+ case TXN_TRY_AGAIN:
+ /* We want to re-evaluate the database when it's changed from
+ * the contents that it had when we started the commit. (That
+ * might have already happened.) */
+ loop->skip_seqno = loop->precommit_seqno;
+ if (ovsdb_idl_get_seqno(loop->idl) != loop->skip_seqno
+ && may_need_wakeup) {
+ *may_need_wakeup = true;
+ }
+ retval = 0;
+ break;
+
+ case TXN_SUCCESS:
+ /* Possibly some work on the database was deferred because no
+ * further transaction could proceed. Wake up again. */
+ retval = 1;
+ loop->cur_cfg = loop->next_cfg;
+ if (may_need_wakeup) {
+ *may_need_wakeup = true;
+ }
+ break;
+
+ case TXN_UNCHANGED:
+ retval = 1;
+ loop->cur_cfg = loop->next_cfg;
+ break;
+
+ case TXN_ABORTED:
+ case TXN_NOT_LOCKED:
+ case TXN_ERROR:
+ retval = 0;
+ break;
+
+ case TXN_UNCOMMITTED:
+ case TXN_INCOMPLETE:
+ default:
+ OVS_NOT_REACHED();
+ }
+ ovsdb_idl_txn_destroy(txn);
+ loop->committing_txn = NULL;
+ } else {
+ retval = -1;
+ }
+
+ return retval;
+}
+
/* Attempts to commit the current transaction, if one is open, and sets up the
* poll loop to wake up when some more work might be needed.
*
@@ -5368,57 +5689,11 @@ ovsdb_idl_loop_commit_and_wait(struct ovsdb_idl_loop *loop)
loop->precommit_seqno = ovsdb_idl_get_seqno(loop->idl);
}
- struct ovsdb_idl_txn *txn = loop->committing_txn;
- int retval;
- if (txn) {
- enum ovsdb_idl_txn_status status = ovsdb_idl_txn_commit(txn);
- if (status != TXN_INCOMPLETE) {
- switch (status) {
- case TXN_TRY_AGAIN:
- /* We want to re-evaluate the database when it's changed from
- * the contents that it had when we started the commit. (That
- * might have already happened.) */
- loop->skip_seqno = loop->precommit_seqno;
- if (ovsdb_idl_get_seqno(loop->idl) != loop->skip_seqno) {
- poll_immediate_wake();
- }
- retval = 0;
- break;
-
- case TXN_SUCCESS:
- /* Possibly some work on the database was deferred because no
- * further transaction could proceed. Wake up again. */
- retval = 1;
- loop->cur_cfg = loop->next_cfg;
- poll_immediate_wake();
- break;
-
- case TXN_UNCHANGED:
- retval = 1;
- loop->cur_cfg = loop->next_cfg;
- break;
-
- case TXN_ABORTED:
- case TXN_NOT_LOCKED:
- case TXN_ERROR:
- retval = 0;
- break;
-
- case TXN_UNCOMMITTED:
- case TXN_INCOMPLETE:
- default:
- OVS_NOT_REACHED();
- }
- ovsdb_idl_txn_destroy(txn);
- loop->committing_txn = NULL;
- } else {
- retval = -1;
- }
- } else {
- /* Not a meaningful return value: no transaction was in progress. */
- retval = 1;
+ bool may_need_wakeup = false;
+ int retval = ovsdb_idl_try_commit_loop_txn(loop, &may_need_wakeup);
+ if (may_need_wakeup) {
+ poll_immediate_wake();
}
-
ovsdb_idl_wait(loop->idl);
return retval;
diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h
index 9f12ce3206f38bc1c6e4da03df06c8aef5cf8101..05bb48d66c3f2cdaf7237a24993080f54c5b363b 100644
--- a/lib/ovsdb-idl.h
+++ b/lib/ovsdb-idl.h
@@ -62,8 +62,9 @@ struct ovsdb_idl *ovsdb_idl_create(const char *remote,
bool retry);
struct ovsdb_idl *ovsdb_idl_create_unconnected(
const struct ovsdb_idl_class *, bool monitor_everything_by_default);
-void ovsdb_idl_set_remote(struct ovsdb_idl *, const char *, bool);
-void ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *, bool);
+void ovsdb_idl_set_remote(struct ovsdb_idl *, const char *remote, bool retry);
+void ovsdb_idl_set_shuffle_remotes(struct ovsdb_idl *, bool shuffle);
+void ovsdb_idl_reset_min_index(struct ovsdb_idl *);
void ovsdb_idl_destroy(struct ovsdb_idl *);
void ovsdb_idl_set_leader_only(struct ovsdb_idl *, bool leader_only);
@@ -75,7 +76,6 @@ void ovsdb_idl_set_lock(struct ovsdb_idl *, const char *lock_name);
bool ovsdb_idl_has_lock(const struct ovsdb_idl *);
bool ovsdb_idl_is_lock_contended(const struct ovsdb_idl *);
-const struct uuid * ovsdb_idl_get_monitor_id(const struct ovsdb_idl *);
unsigned int ovsdb_idl_get_seqno(const struct ovsdb_idl *);
bool ovsdb_idl_has_ever_connected(const struct ovsdb_idl *);
void ovsdb_idl_enable_reconnect(struct ovsdb_idl *);
@@ -99,12 +99,12 @@ const struct ovsdb_idl_table_class *ovsdb_idl_table_class_from_column(
* The client may choose any subset of the columns and tables to replicate,
* specifying it one of two ways:
*
- * - As a blacklist (adding the columns or tables to replicate). To do so,
+ * - As a deny list (adding the columns or tables to replicate). To do so,
* the client passes false as 'monitor_everything_by_default' to
* ovsdb_idl_create() and then calls ovsdb_idl_add_column() and
* ovsdb_idl_add_table() for the desired columns and, if necessary, tables.
*
- * - As a whitelist (replicating all columns and tables except those
+ * - As an allow list (replicating all columns and tables except those
* explicitly removed). To do so, the client passes true as
* 'monitor_everything_by_default' to ovsdb_idl_create() and then calls
* ovsdb_idl_omit() to remove columns.
diff --git a/lib/packets.c b/lib/packets.c
index 9d7cc502419decaed62dddb746073ca32fbbf0dc..4a7643c5dd3a860d062cda1d967a68173e0356bb 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -75,6 +75,29 @@ dpid_from_string(const char *s, uint64_t *dpidp)
return *dpidp != 0;
}
+uint64_t
+eth_addr_to_uint64(const struct eth_addr ea)
+{
+ return (((uint64_t) ntohs(ea.be16[0]) << 32)
+ | ((uint64_t) ntohs(ea.be16[1]) << 16)
+ | ntohs(ea.be16[2]));
+}
+
+void
+eth_addr_from_uint64(uint64_t x, struct eth_addr *ea)
+{
+ ea->be16[0] = htons(x >> 32);
+ ea->be16[1] = htons((x & 0xFFFF0000) >> 16);
+ ea->be16[2] = htons(x & 0xFFFF);
+}
+
+void
+eth_addr_mark_random(struct eth_addr *ea)
+{
+ ea->ea[0] &= ~1; /* Unicast. */
+ ea->ea[0] |= 2; /* Private. */
+}
+
/* Returns true if 'ea' is a reserved address, that a bridge must never
* forward, false otherwise.
*
@@ -524,6 +547,79 @@ eth_format_masked(const struct eth_addr eth,
}
}
+void
+in6_addr_solicited_node(struct in6_addr *addr, const struct in6_addr *ip6)
+{
+ union ovs_16aligned_in6_addr *taddr =
+ (union ovs_16aligned_in6_addr *) addr;
+ memset(taddr->be16, 0, sizeof(taddr->be16));
+ taddr->be16[0] = htons(0xff02);
+ taddr->be16[5] = htons(0x1);
+ taddr->be16[6] = htons(0xff00);
+ memcpy(&addr->s6_addr[13], &ip6->s6_addr[13], 3);
+}
+
+/*
+ * Generates ipv6 EUI64 address from the given eth addr
+ * and prefix and stores it in 'lla'
+ */
+void
+in6_generate_eui64(struct eth_addr ea, const struct in6_addr *prefix,
+ struct in6_addr *lla)
+{
+ union ovs_16aligned_in6_addr *taddr =
+ (union ovs_16aligned_in6_addr *) lla;
+ union ovs_16aligned_in6_addr *prefix_taddr =
+ (union ovs_16aligned_in6_addr *) prefix;
+ taddr->be16[0] = prefix_taddr->be16[0];
+ taddr->be16[1] = prefix_taddr->be16[1];
+ taddr->be16[2] = prefix_taddr->be16[2];
+ taddr->be16[3] = prefix_taddr->be16[3];
+ taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]);
+ taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff);
+ taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]);
+ taddr->be16[7] = ea.be16[2];
+}
+
+/* Generates ipv6 link local address from the given eth addr
+ * with prefix 'fe80::/64' and stores it in 'lla'. */
+void
+in6_generate_lla(struct eth_addr ea, struct in6_addr *lla)
+{
+ union ovs_16aligned_in6_addr *taddr =
+ (union ovs_16aligned_in6_addr *) lla;
+ memset(taddr->be16, 0, sizeof(taddr->be16));
+ taddr->be16[0] = htons(0xfe80);
+ taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]);
+ taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff);
+ taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]);
+ taddr->be16[7] = ea.be16[2];
+}
+
+/* Returns true if 'addr' is a link local address. Otherwise, false. */
+bool
+in6_is_lla(struct in6_addr *addr)
+{
+#ifdef s6_addr32
+ return addr->s6_addr32[0] == htonl(0xfe800000) && !(addr->s6_addr32[1]);
+#else
+ return addr->s6_addr[0] == 0xfe && addr->s6_addr[1] == 0x80 &&
+ !(addr->s6_addr[2] | addr->s6_addr[3] | addr->s6_addr[4] |
+ addr->s6_addr[5] | addr->s6_addr[6] | addr->s6_addr[7]);
+#endif
+}
+
+void
+ipv6_multicast_to_ethernet(struct eth_addr *eth, const struct in6_addr *ip6)
+{
+ eth->ea[0] = 0x33;
+ eth->ea[1] = 0x33;
+ eth->ea[2] = ip6->s6_addr[12];
+ eth->ea[3] = ip6->s6_addr[13];
+ eth->ea[4] = ip6->s6_addr[14];
+ eth->ea[5] = ip6->s6_addr[15];
+}
+
/* Given the IP netmask 'netmask', returns the number of bits of the IP address
* that it specifies, that is, the number of 1-bits in 'netmask'.
*
@@ -957,6 +1053,7 @@ eth_compose(struct dp_packet *b, const struct eth_addr eth_dst,
void *data;
struct eth_header *eth;
+
dp_packet_clear(b);
/* The magic 2 here ensures that the L3 header (when it is added later)
diff --git a/lib/packets.h b/lib/packets.h
index 5d7f82c45b6a28f527414f1300177d991902b2cf..481bc22fa1fe8630bb51a8982fe0827c52f018fd 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -52,6 +52,12 @@ flow_tnl_dst_is_set(const struct flow_tnl *tnl)
return tnl->ip_dst || ipv6_addr_is_set(&tnl->ipv6_dst);
}
+static inline bool
+flow_tnl_src_is_set(const struct flow_tnl *tnl)
+{
+ return tnl->ip_src || ipv6_addr_is_set(&tnl->ipv6_src);
+}
+
struct in6_addr flow_tnl_dst(const struct flow_tnl *tnl);
struct in6_addr flow_tnl_src(const struct flow_tnl *tnl);
@@ -275,12 +281,7 @@ static inline bool eth_addr_equal_except(const struct eth_addr a,
|| ((a.be16[2] ^ b.be16[2]) & mask.be16[2]));
}
-static inline uint64_t eth_addr_to_uint64(const struct eth_addr ea)
-{
- return (((uint64_t) ntohs(ea.be16[0]) << 32)
- | ((uint64_t) ntohs(ea.be16[1]) << 16)
- | ntohs(ea.be16[2]));
-}
+uint64_t eth_addr_to_uint64(const struct eth_addr ea);
static inline uint64_t eth_addr_vlan_to_uint64(const struct eth_addr ea,
uint16_t vlan)
@@ -288,12 +289,7 @@ static inline uint64_t eth_addr_vlan_to_uint64(const struct eth_addr ea,
return (((uint64_t)vlan << 48) | eth_addr_to_uint64(ea));
}
-static inline void eth_addr_from_uint64(uint64_t x, struct eth_addr *ea)
-{
- ea->be16[0] = htons(x >> 32);
- ea->be16[1] = htons((x & 0xFFFF0000) >> 16);
- ea->be16[2] = htons(x & 0xFFFF);
-}
+void eth_addr_from_uint64(uint64_t x, struct eth_addr *ea);
static inline struct eth_addr eth_addr_invert(const struct eth_addr src)
{
@@ -306,11 +302,7 @@ static inline struct eth_addr eth_addr_invert(const struct eth_addr src)
return dst;
}
-static inline void eth_addr_mark_random(struct eth_addr *ea)
-{
- ea->ea[0] &= ~1; /* Unicast. */
- ea->ea[0] |= 2; /* Private. */
-}
+void eth_addr_mark_random(struct eth_addr *ea);
static inline void eth_addr_random(struct eth_addr *ea)
{
@@ -963,7 +955,7 @@ union ovs_16aligned_in6_addr {
ovs_16aligned_be32 be32[4];
};
-/* Like struct in6_hdr, but whereas that struct requires 32-bit alignment, this
+/* Like struct ip6_hdr, but whereas that struct requires 32-bit alignment, this
* one only requires 16-bit alignment. */
struct ovs_16aligned_ip6_hdr {
union {
@@ -1205,80 +1197,19 @@ in6_addr_get_mapped_ipv4(const struct in6_addr *addr)
}
}
-static inline void
-in6_addr_solicited_node(struct in6_addr *addr, const struct in6_addr *ip6)
-{
- union ovs_16aligned_in6_addr *taddr =
- (union ovs_16aligned_in6_addr *) addr;
- memset(taddr->be16, 0, sizeof(taddr->be16));
- taddr->be16[0] = htons(0xff02);
- taddr->be16[5] = htons(0x1);
- taddr->be16[6] = htons(0xff00);
- memcpy(&addr->s6_addr[13], &ip6->s6_addr[13], 3);
-}
+void in6_addr_solicited_node(struct in6_addr *addr,
+ const struct in6_addr *ip6);
-/*
- * Generates ipv6 EUI64 address from the given eth addr
- * and prefix and stores it in 'lla'
- */
-static inline void
-in6_generate_eui64(struct eth_addr ea, struct in6_addr *prefix,
- struct in6_addr *lla)
-{
- union ovs_16aligned_in6_addr *taddr =
- (union ovs_16aligned_in6_addr *) lla;
- union ovs_16aligned_in6_addr *prefix_taddr =
- (union ovs_16aligned_in6_addr *) prefix;
- taddr->be16[0] = prefix_taddr->be16[0];
- taddr->be16[1] = prefix_taddr->be16[1];
- taddr->be16[2] = prefix_taddr->be16[2];
- taddr->be16[3] = prefix_taddr->be16[3];
- taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]);
- taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff);
- taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]);
- taddr->be16[7] = ea.be16[2];
-}
+void in6_generate_eui64(struct eth_addr ea, const struct in6_addr *prefix,
+ struct in6_addr *lla);
-/*
- * Generates ipv6 link local address from the given eth addr
- * with prefix 'fe80::/64' and stores it in 'lla'
- */
-static inline void
-in6_generate_lla(struct eth_addr ea, struct in6_addr *lla)
-{
- union ovs_16aligned_in6_addr *taddr =
- (union ovs_16aligned_in6_addr *) lla;
- memset(taddr->be16, 0, sizeof(taddr->be16));
- taddr->be16[0] = htons(0xfe80);
- taddr->be16[4] = htons(((ea.ea[0] ^ 0x02) << 8) | ea.ea[1]);
- taddr->be16[5] = htons(ea.ea[2] << 8 | 0x00ff);
- taddr->be16[6] = htons(0xfe << 8 | ea.ea[3]);
- taddr->be16[7] = ea.be16[2];
-}
+void in6_generate_lla(struct eth_addr ea, struct in6_addr *lla);
/* Returns true if 'addr' is a link local address. Otherwise, false. */
-static inline bool
-in6_is_lla(struct in6_addr *addr)
-{
-#ifdef s6_addr32
- return addr->s6_addr32[0] == htonl(0xfe800000) && !(addr->s6_addr32[1]);
-#else
- return addr->s6_addr[0] == 0xfe && addr->s6_addr[1] == 0x80 &&
- !(addr->s6_addr[2] | addr->s6_addr[3] | addr->s6_addr[4] |
- addr->s6_addr[5] | addr->s6_addr[6] | addr->s6_addr[7]);
-#endif
-}
+bool in6_is_lla(struct in6_addr *addr);
-static inline void
-ipv6_multicast_to_ethernet(struct eth_addr *eth, const struct in6_addr *ip6)
-{
- eth->ea[0] = 0x33;
- eth->ea[1] = 0x33;
- eth->ea[2] = ip6->s6_addr[12];
- eth->ea[3] = ip6->s6_addr[13];
- eth->ea[4] = ip6->s6_addr[14];
- eth->ea[5] = ip6->s6_addr[15];
-}
+void ipv6_multicast_to_ethernet(struct eth_addr *eth,
+ const struct in6_addr *ip6);
static inline bool dl_type_is_ip_any(ovs_be16 dl_type)
{
@@ -1447,6 +1378,74 @@ static inline ovs_be32 get_erspan_ts(enum erspan_ts_gra gra)
return ts;
}
+/*
+ * GTP-U protocol header and metadata
+ * See:
+ * User Plane Protocol and Architectural Analysis on 3GPP 5G System
+ * draft-hmm-dmm-5g-uplane-analysis-00
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ver |P|R|E|S|N| Message Type| Length |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Tunnel Endpoint Identifier |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Sequence Number | N-PDU Number | Next-Ext-Hdr |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * GTP-U Flags:
+ * P: Protocol Type (Set to '1')
+ * R: Reserved Bit (Set to '0')
+ * E: Extension Header Flag (Set to '1' if extension header exists)
+ * S: Sequence Number Flag (Set to '1' if sequence number exists)
+ * N: N-PDU Number Flag (Set to '1' if N-PDU number exists)
+ *
+ * GTP-U Message Type:
+ * Indicates the type of GTP-U message.
+ *
+ * GTP-U Length:
+ * Indicates the length in octets of the payload.
+ *
+ * User payload is transmitted in G-PDU packets.
+ */
+
+#define GTPU_VER_MASK 0xe0
+#define GTPU_P_MASK 0x10
+#define GTPU_E_MASK 0x04
+#define GTPU_S_MASK 0x02
+
+/* GTP-U UDP port. */
+#define GTPU_DST_PORT 2152
+
+/* Default GTP-U flags: Ver = 1 and P = 1. */
+#define GTPU_FLAGS_DEFAULT 0x30
+
+/* GTP-U message type for normal user plane PDU. */
+#define GTPU_MSGTYPE_REQ 1 /* Echo Request. */
+#define GTPU_MSGTYPE_REPL 2 /* Echo Reply. */
+#define GTPU_MSGTYPE_GPDU 255 /* User Payload. */
+
+struct gtpu_metadata {
+ uint8_t flags;
+ uint8_t msgtype;
+};
+BUILD_ASSERT_DECL(sizeof(struct gtpu_metadata) == 2);
+
+struct gtpuhdr {
+ struct gtpu_metadata md;
+ ovs_be16 len;
+ ovs_16aligned_be32 teid;
+};
+BUILD_ASSERT_DECL(sizeof(struct gtpuhdr) == 8);
+
+struct gtpuhdr_opt {
+ ovs_be16 seqno;
+ uint8_t pdu_number;
+ uint8_t next_ext_type;
+};
+BUILD_ASSERT_DECL(sizeof(struct gtpuhdr_opt) == 4);
+
/* VXLAN protocol header */
struct vxlanhdr {
union {
diff --git a/lib/perf-counter.c b/lib/perf-counter.c
index 402fabe1775b22d832cd9e94f20cbaee09c431d5..e4eca58d03455869d55ed29ee8966f4e563883bf 100644
--- a/lib/perf-counter.c
+++ b/lib/perf-counter.c
@@ -111,7 +111,7 @@ perf_counter_to_ds(struct ds *ds, struct perf_counter *pfc)
ratio = 0.0;
}
- ds_put_format(ds, "%-40s%12"PRIu64"%12"PRIu64"%12.1f\n",
+ ds_put_format(ds, "%-40s %12"PRIu64" %12"PRIu64" %12.1f\n",
pfc->name, pfc->n_events, pfc->total_count, ratio);
}
diff --git a/lib/pvector.c b/lib/pvector.c
index aaeee92147d09077d1572f1ba400dd048ad3cec0..cc527fdc4121a12f1dc43234ff716c32a7bfa8dc 100644
--- a/lib/pvector.c
+++ b/lib/pvector.c
@@ -33,7 +33,7 @@ pvector_impl_alloc(size_t size)
struct pvector_impl *impl;
impl = xmalloc(sizeof *impl + size * sizeof impl->vector[0]);
- impl->size = 0;
+ atomic_init(&impl->size, 0);
impl->allocated = size;
return impl;
@@ -117,18 +117,22 @@ pvector_insert(struct pvector *pvec, void *ptr, int priority)
{
struct pvector_impl *temp = pvec->temp;
struct pvector_impl *old = pvector_impl_get(pvec);
+ size_t size;
ovs_assert(ptr != NULL);
+ /* There is no possible concurrent writer. Insertions must be protected
+ * by mutex or be always excuted from the same thread. */
+ atomic_read_relaxed(&old->size, &size);
+
/* Check if can add to the end without reallocation. */
- if (!temp && old->allocated > old->size &&
- (!old->size || priority <= old->vector[old->size - 1].priority)) {
- old->vector[old->size].ptr = ptr;
- old->vector[old->size].priority = priority;
+ if (!temp && old->allocated > size &&
+ (!size || priority <= old->vector[size - 1].priority)) {
+ old->vector[size].ptr = ptr;
+ old->vector[size].priority = priority;
/* Size increment must not be visible to the readers before the new
* entry is stored. */
- atomic_thread_fence(memory_order_release);
- ++old->size;
+ atomic_store_explicit(&old->size, size + 1, memory_order_release);
} else {
if (!temp) {
temp = pvector_impl_dup(old);
diff --git a/lib/pvector.h b/lib/pvector.h
index b990ed9d590c51ff63da651463ee1d294841fa3c..6da8c5b6335557a21ca68849522039500f23d503 100644
--- a/lib/pvector.h
+++ b/lib/pvector.h
@@ -26,10 +26,12 @@
/* Concurrent Priority Vector
* ==========================
*
- * Concurrent priority vector holds non-NULL pointers to objects in an
- * increasing priority order and allows readers to traverse the vector without
- * being concerned about writers modifying the vector as they are traversing
- * it.
+ * Concurrent priority vector holds non-NULL pointers to objects in a
+ * nondecreasing priority order and allows readers to traverse the vector
+ * without being concerned about writers modifying the vector as they are
+ * traversing it.
+ *
+ * Multiple elements of a given priority are allowed.
*
* The priority order is maintained as a linear vector of elements to allow
* for efficient memory prefetching.
@@ -69,8 +71,8 @@ struct pvector_entry {
};
struct pvector_impl {
- size_t size; /* Number of entries in the vector. */
- size_t allocated; /* Number of allocated entries. */
+ atomic_size_t size; /* Number of entries in the vector. */
+ size_t allocated; /* Number of allocated entries. */
struct pvector_entry vector[];
};
@@ -181,12 +183,17 @@ pvector_cursor_init(const struct pvector *pvec,
{
const struct pvector_impl *impl;
struct pvector_cursor cursor;
+ size_t size;
impl = ovsrcu_get(struct pvector_impl *, &pvec->impl);
- ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
+ /* Use memory_order_acquire to ensure entry access can not be
+ * reordered to happen before size read. */
+ atomic_read_explicit(&CONST_CAST(struct pvector_impl *, impl)->size,
+ &size, memory_order_acquire);
+ ovs_prefetch_range(impl->vector, size * sizeof impl->vector[0]);
- cursor.size = impl->size;
+ cursor.size = size;
cursor.vector = impl->vector;
cursor.entry_idx = -1;
diff --git a/lib/reconnect.c b/lib/reconnect.c
index c89abab8894ab9947cc39b93b17058a56fc2f647..a929ddfd2d0119699dc4e4d148a49aba0c7f0562 100644
--- a/lib/reconnect.c
+++ b/lib/reconnect.c
@@ -61,6 +61,7 @@ struct reconnect {
long long int last_activity;
long long int last_connected;
long long int last_disconnected;
+ long long int last_receive_attempt;
unsigned int max_tries;
unsigned int backoff_free_tries;
@@ -109,6 +110,7 @@ reconnect_create(long long int now)
fsm->last_activity = now;
fsm->last_connected = LLONG_MAX;
fsm->last_disconnected = LLONG_MAX;
+ fsm->last_receive_attempt = now;
fsm->max_tries = UINT_MAX;
fsm->creation_time = now;
@@ -501,6 +503,19 @@ reconnect_activity(struct reconnect *fsm, long long int now)
fsm->last_activity = now;
}
+/* Tell 'fsm' that some attempt to receive data on the connection was made at
+ * 'now'. The FSM only allows probe interval timer to expire when some attempt
+ * to receive data on the connection was received after the time when it should
+ * have expired. This helps in the case where there's a long delay in the poll
+ * loop and then reconnect_run() executes before the code to try to receive
+ * anything from the remote runs. (To disable this feature, just call
+ * reconnect_receive_attempted(fsm, LLONG_MAX).) */
+void
+reconnect_receive_attempted(struct reconnect *fsm, long long int now)
+{
+ fsm->last_receive_attempt = now;
+}
+
static void
reconnect_transition__(struct reconnect *fsm, long long int now,
enum state state)
@@ -541,13 +556,19 @@ reconnect_deadline__(const struct reconnect *fsm)
case S_ACTIVE:
if (fsm->probe_interval) {
long long int base = MAX(fsm->last_activity, fsm->state_entered);
- return base + fsm->probe_interval;
+ long long int expiration = base + fsm->probe_interval;
+ if (fsm->last_receive_attempt >= expiration) {
+ return expiration;
+ }
}
return LLONG_MAX;
case S_IDLE:
if (fsm->probe_interval) {
- return fsm->state_entered + fsm->probe_interval;
+ long long int expiration = fsm->state_entered + fsm->probe_interval;
+ if (fsm->last_receive_attempt >= expiration) {
+ return expiration;
+ }
}
return LLONG_MAX;
diff --git a/lib/reconnect.h b/lib/reconnect.h
index 9f2d469e2ddd5c69e2138008103a4e862d4ff64e..40cc569c42d0bd23b14a06aa03a67bdc7d27fe16 100644
--- a/lib/reconnect.h
+++ b/lib/reconnect.h
@@ -83,6 +83,7 @@ void reconnect_connected(struct reconnect *, long long int now);
void reconnect_connect_failed(struct reconnect *, long long int now,
int error);
void reconnect_activity(struct reconnect *, long long int now);
+void reconnect_receive_attempted(struct reconnect *, long long int now);
enum reconnect_action {
RECONNECT_CONNECT = 1,
diff --git a/lib/rtnetlink.c b/lib/rtnetlink.c
index f822dffc7bfd0de1806c3e69af7eb6da5f4b10b2..12580292575193f3c55d5a7703412b764bd378cc 100644
--- a/lib/rtnetlink.c
+++ b/lib/rtnetlink.c
@@ -68,12 +68,12 @@ rtnetlink_parse_link_info(const struct nlattr *nla,
ARRAY_SIZE(linkinfo_policy));
if (parsed) {
- change->master = (linkinfo[IFLA_INFO_KIND]
- ? nl_attr_get_string(linkinfo[IFLA_INFO_KIND])
- : NULL);
- change->slave = (linkinfo[IFLA_INFO_SLAVE_KIND]
- ? nl_attr_get_string(linkinfo[IFLA_INFO_SLAVE_KIND])
- : NULL);
+ change->primary = (linkinfo[IFLA_INFO_KIND]
+ ? nl_attr_get_string(linkinfo[IFLA_INFO_KIND])
+ : NULL);
+ change->sub = (linkinfo[IFLA_INFO_SLAVE_KIND]
+ ? nl_attr_get_string(linkinfo[IFLA_INFO_SLAVE_KIND])
+ : NULL);
}
return parsed;
@@ -134,8 +134,8 @@ rtnetlink_parse(struct ofpbuf *buf, struct rtnetlink_change *change)
parsed = rtnetlink_parse_link_info(attrs[IFLA_LINKINFO],
change);
} else {
- change->master = NULL;
- change->slave = NULL;
+ change->primary = NULL;
+ change->sub = NULL;
}
}
} else if (rtnetlink_type_is_rtnlgrp_addr(nlmsg->nlmsg_type)) {
diff --git a/lib/rtnetlink.h b/lib/rtnetlink.h
index 422d1db11a09e59b67cac24d8971aad5b1f6604e..b6ddb4bd1cbf42316adc55bd91b423c8dacac514 100644
--- a/lib/rtnetlink.h
+++ b/lib/rtnetlink.h
@@ -49,9 +49,9 @@ struct rtnetlink_change {
/* Network device address status. */
/* xxx To be added when needed. */
- /* Link info. */
- const char *master; /* Kind of master (NULL if not master). */
- const char *slave; /* Kind of slave (NULL if not slave). */
+ /* Link bonding info. */
+ const char *primary; /* Kind of primary (NULL if not primary). */
+ const char *sub; /* Kind of subordinate (NULL if not sub). */
};
/* Function called to report that a netdev has changed. 'change' describes the
diff --git a/lib/sha1.c b/lib/sha1.c
index 4f48ef21027e9d408f70fe5de5322111b3dc49e3..87360d9cd0dde7115ac851cbd3d827c374bdce72 100644
--- a/lib/sha1.c
+++ b/lib/sha1.c
@@ -197,7 +197,7 @@ sha1_init(struct sha1_ctx *sha_info)
* inputLen: The length of the input buffer.
*/
void
-sha1_update(struct sha1_ctx *ctx, const void *buffer_, size_t count)
+sha1_update(struct sha1_ctx *ctx, const void *buffer_, uint32_t count)
{
const uint8_t *buffer = buffer_;
unsigned int i;
@@ -274,7 +274,7 @@ sha1_final(struct sha1_ctx *ctx, uint8_t digest[SHA1_DIGEST_SIZE])
/* Computes the hash of 'n' bytes in 'data' into 'digest'. */
void
-sha1_bytes(const void *data, size_t n, uint8_t digest[SHA1_DIGEST_SIZE])
+sha1_bytes(const void *data, uint32_t n, uint8_t digest[SHA1_DIGEST_SIZE])
{
struct sha1_ctx ctx;
diff --git a/lib/sha1.h b/lib/sha1.h
index eda265dfc566b028ab93f94b3d25c006fb1919ca..a635ff7689e71bd0934f24e5eb59af851147f761 100644
--- a/lib/sha1.h
+++ b/lib/sha1.h
@@ -45,9 +45,9 @@ struct sha1_ctx {
};
void sha1_init(struct sha1_ctx *);
-void sha1_update(struct sha1_ctx *, const void *, size_t);
+void sha1_update(struct sha1_ctx *, const void *, uint32_t size);
void sha1_final(struct sha1_ctx *, uint8_t digest[SHA1_DIGEST_SIZE]);
-void sha1_bytes(const void *, size_t, uint8_t digest[SHA1_DIGEST_SIZE]);
+void sha1_bytes(const void *, uint32_t size, uint8_t digest[SHA1_DIGEST_SIZE]);
#define SHA1_FMT \
"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" \
diff --git a/lib/smap.c b/lib/smap.c
index 149b8b2436efb02e85d03dab3a0b545d82079f4b..e82261497cfd589ff6b117ee159a1010ff1e36f8 100644
--- a/lib/smap.c
+++ b/lib/smap.c
@@ -247,6 +247,22 @@ smap_get_int(const struct smap *smap, const char *key, int def)
return i_value;
}
+/* Gets the value associated with 'key' in 'smap' and converts it to an
+ * unsigned int. If 'key' is not in 'smap' or a valid unsigned integer
+ * can't be parsed from it's value, returns 'def'. */
+unsigned int
+smap_get_uint(const struct smap *smap, const char *key, unsigned int def)
+{
+ const char *value = smap_get(smap, key);
+ unsigned int u_value;
+
+ if (!value || !str_to_uint(value, 10, &u_value)) {
+ return def;
+ }
+
+ return u_value;
+}
+
/* Gets the value associated with 'key' in 'smap' and converts it to an
* unsigned long long. If 'key' is not in 'smap' or a valid number can't be
* parsed from it's value, returns 'def'. */
diff --git a/lib/smap.h b/lib/smap.h
index 766c65f7f504fe09dfec5e81ed25abf09e799394..a92115966706c4e4235807f10cb70fdaa018fd5e 100644
--- a/lib/smap.h
+++ b/lib/smap.h
@@ -104,6 +104,8 @@ const char *smap_get_def(const struct smap *, const char *key,
struct smap_node *smap_get_node(const struct smap *, const char *);
bool smap_get_bool(const struct smap *smap, const char *key, bool def);
int smap_get_int(const struct smap *smap, const char *key, int def);
+unsigned int smap_get_uint(const struct smap *smap, const char *key,
+ unsigned int def);
unsigned long long int smap_get_ullong(const struct smap *, const char *key,
unsigned long long def);
bool smap_get_uuid(const struct smap *, const char *key, struct uuid *);
diff --git a/lib/stream-windows.c b/lib/stream-windows.c
index 34bc610b6f499b8e4ab6dc1df260e0628d389f5e..5c4c55e5d4aa1f7a8699d8906d69b78b6225529f 100644
--- a/lib/stream-windows.c
+++ b/lib/stream-windows.c
@@ -41,7 +41,7 @@ static void maybe_unlink_and_free(char *path);
#define LOCAL_PREFIX "\\\\.\\pipe\\"
/* Size of the allowed PSIDs for securing Named Pipe. */
-#define ALLOWED_PSIDS_SIZE 2
+#define ALLOWED_PSIDS_SIZE 3
/* This function has the purpose to remove all the slashes received in s. */
static char *
@@ -412,6 +412,9 @@ create_pnpipe(char *name)
PACL acl = NULL;
PSECURITY_DESCRIPTOR psd = NULL;
HANDLE npipe;
+ HANDLE hToken = NULL;
+ DWORD dwBufSize = 0;
+ PTOKEN_USER pTokenUsr = NULL;
/* Disable access over network. */
if (!AllocateAndInitializeSid(&sia, 1, SECURITY_NETWORK_RID,
@@ -438,6 +441,32 @@ create_pnpipe(char *name)
goto handle_error;
}
+ /* Open the access token of calling process */
+ if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) {
+ VLOG_ERR_RL(&rl, "Error opening access token of calling process.");
+ goto handle_error;
+ }
+
+ /* get the buffer size buffer needed for SID */
+ GetTokenInformation(hToken, TokenUser, NULL, 0, &dwBufSize);
+
+ pTokenUsr = xmalloc(dwBufSize);
+ memset(pTokenUsr, 0, dwBufSize);
+
+ /* Retrieve the token information in a TOKEN_USER structure. */
+ if (!GetTokenInformation(hToken, TokenUser, pTokenUsr, dwBufSize,
+ &dwBufSize)) {
+ VLOG_ERR_RL(&rl, "Error retrieving token information.");
+ goto handle_error;
+ }
+ CloseHandle(hToken);
+
+ if (!IsValidSid(pTokenUsr->User.Sid)) {
+ VLOG_ERR_RL(&rl, "Invalid SID.");
+ goto handle_error;
+ }
+ allowedPsid[2] = pTokenUsr->User.Sid;
+
for (int i = 0; i < ALLOWED_PSIDS_SIZE; i++) {
aclSize += sizeof(ACCESS_ALLOWED_ACE) +
GetLengthSid(allowedPsid[i]) -
@@ -490,11 +519,13 @@ create_pnpipe(char *name)
npipe = CreateNamedPipe(name, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED,
PIPE_TYPE_MESSAGE | PIPE_READMODE_BYTE | PIPE_WAIT,
64, BUFSIZE, BUFSIZE, 0, &sa);
+ free(pTokenUsr);
free(acl);
free(psd);
return npipe;
handle_error:
+ free(pTokenUsr);
free(acl);
free(psd);
return INVALID_HANDLE_VALUE;
diff --git a/lib/tc.c b/lib/tc.c
index 12af0192b61438bf34248e4cb88bdd2c6c3b1e74..c2de78bfe347e93aec43e9ac34ff0d83bbb21c5c 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -51,21 +51,20 @@
#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
#endif
-#if TCA_MAX < 14
+#ifndef TCA_DUMP_FLAGS_TERSE
+#define TCA_DUMP_FLAGS_TERSE (1 << 0)
+#endif
+
+#if TCA_MAX < 15
#define TCA_CHAIN 11
#define TCA_INGRESS_BLOCK 13
+#define TCA_DUMP_FLAGS 15
#endif
VLOG_DEFINE_THIS_MODULE(tc);
static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
-enum tc_offload_policy {
- TC_POLICY_NONE,
- TC_POLICY_SKIP_SW,
- TC_POLICY_SKIP_HW
-};
-
static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
struct tc_pedit_key_ex {
@@ -313,6 +312,24 @@ static const struct nl_policy tca_flower_policy[] = {
.min_len = ETH_ALEN,
.optional = true, },
[TCA_FLOWER_KEY_ETH_TYPE] = { .type = NL_A_U16, .optional = false, },
+ [TCA_FLOWER_KEY_ARP_SIP] = { .type = NL_A_U32, .optional = true, },
+ [TCA_FLOWER_KEY_ARP_TIP] = { .type = NL_A_U32, .optional = true, },
+ [TCA_FLOWER_KEY_ARP_SHA] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN,
+ .optional = true, },
+ [TCA_FLOWER_KEY_ARP_THA] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN,
+ .optional = true, },
+ [TCA_FLOWER_KEY_ARP_OP] = { .type = NL_A_U8, .optional = true, },
+ [TCA_FLOWER_KEY_ARP_SIP_MASK] = { .type = NL_A_U32, .optional = true, },
+ [TCA_FLOWER_KEY_ARP_TIP_MASK] = { .type = NL_A_U32, .optional = true, },
+ [TCA_FLOWER_KEY_ARP_SHA_MASK] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN,
+ .optional = true, },
+ [TCA_FLOWER_KEY_ARP_THA_MASK] = { .type = NL_A_UNSPEC,
+ .min_len = ETH_ALEN,
+ .optional = true, },
+ [TCA_FLOWER_KEY_ARP_OP_MASK] = { .type = NL_A_U8, .optional = true, },
[TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
[TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
[TCA_FLOWER_KEY_IP_PROTO] = { .type = NL_A_U8, .optional = true, },
@@ -411,6 +428,50 @@ static const struct nl_policy tca_flower_policy[] = {
.optional = true, },
};
+static const struct nl_policy tca_flower_terse_policy[] = {
+ [TCA_FLOWER_FLAGS] = { .type = NL_A_U32, .optional = false, },
+ [TCA_FLOWER_ACT] = { .type = NL_A_NESTED, .optional = false, },
+};
+
+static void
+nl_parse_flower_arp(struct nlattr **attrs, struct tc_flower *flower)
+{
+ const struct eth_addr *eth;
+
+ if (attrs[TCA_FLOWER_KEY_ARP_SIP_MASK]) {
+ flower->key.arp.spa =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_SIP]);
+ flower->mask.arp.spa =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_SIP_MASK]);
+ }
+ if (attrs[TCA_FLOWER_KEY_ARP_TIP_MASK]) {
+ flower->key.arp.tpa =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_TIP]);
+ flower->mask.arp.tpa =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ARP_TIP_MASK]);
+ }
+ if (attrs[TCA_FLOWER_KEY_ARP_SHA_MASK]) {
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_SHA], ETH_ALEN);
+ memcpy(&flower->key.arp.sha, eth, sizeof flower->key.arp.sha);
+
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_SHA_MASK], ETH_ALEN);
+ memcpy(&flower->mask.arp.sha, eth, sizeof flower->mask.arp.sha);
+ }
+ if (attrs[TCA_FLOWER_KEY_ARP_THA_MASK]) {
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_THA], ETH_ALEN);
+ memcpy(&flower->key.arp.tha, eth, sizeof flower->key.arp.tha);
+
+ eth = nl_attr_get_unspec(attrs[TCA_FLOWER_KEY_ARP_THA_MASK], ETH_ALEN);
+ memcpy(&flower->mask.arp.tha, eth, sizeof flower->mask.arp.tha);
+ }
+ if (attrs[TCA_FLOWER_KEY_ARP_OP_MASK]) {
+ flower->key.arp.opcode =
+ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ARP_OP]);
+ flower->mask.arp.opcode =
+ nl_attr_get_u8(attrs[TCA_FLOWER_KEY_ARP_OP_MASK]);
+ }
+}
+
static void
nl_parse_flower_eth(struct nlattr **attrs, struct tc_flower *flower)
{
@@ -650,18 +711,26 @@ nl_parse_flower_tunnel(struct nlattr **attrs, struct tc_flower *flower)
flower->mask.tunnel.id = OVS_BE64_MAX;
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]) {
+ flower->mask.tunnel.ipv4.ipv4_src =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK]);
flower->key.tunnel.ipv4.ipv4_src =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_SRC]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]) {
+ flower->mask.tunnel.ipv4.ipv4_dst =
+ nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST_MASK]);
flower->key.tunnel.ipv4.ipv4_dst =
nl_attr_get_be32(attrs[TCA_FLOWER_KEY_ENC_IPV4_DST]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]) {
+ flower->mask.tunnel.ipv6.ipv6_src =
+ nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK]);
flower->key.tunnel.ipv6.ipv6_src =
nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_SRC]);
}
if (attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]) {
+ flower->mask.tunnel.ipv6.ipv6_dst =
+ nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST_MASK]);
flower->key.tunnel.ipv6.ipv6_dst =
nl_attr_get_in6_addr(attrs[TCA_FLOWER_KEY_ENC_IPV6_DST]);
}
@@ -934,6 +1003,7 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
int flower_off = m->flower_offset;
int sz = m->size;
int mf = m->offset;
+ int ef = ROUND_UP(mf, 4);
if (m->htype != type) {
continue;
@@ -941,9 +1011,10 @@ nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
/* check overlap between current pedit key, which is always
* 4 bytes (range [off, off + 3]), and a map entry in
- * flower_pedit_map (range [mf, mf + sz - 1]) */
+ * flower_pedit_map sf = ROUND_DOWN(mf, 4)
+ * (range [sf|mf, (mf + sz - 1)|ef]) */
if ((keys->off >= mf && keys->off < mf + sz)
- || (keys->off + 3 >= mf && keys->off + 3 < mf + sz)) {
+ || (keys->off + 3 >= mf && keys->off + 3 < ef)) {
int diff = flower_off + (keys->off - mf);
ovs_be32 *dst = (void *) (rewrite_key + diff);
ovs_be32 *dst_m = (void *) (rewrite_mask + diff);
@@ -1573,7 +1644,7 @@ nl_parse_act_csum(struct nlattr *options, struct tc_flower *flower)
static const struct nl_policy act_policy[] = {
[TCA_ACT_KIND] = { .type = NL_A_STRING, .optional = false, },
[TCA_ACT_COOKIE] = { .type = NL_A_UNSPEC, .optional = true, },
- [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = false, },
+ [TCA_ACT_OPTIONS] = { .type = NL_A_NESTED, .optional = true, },
[TCA_ACT_STATS] = { .type = NL_A_NESTED, .optional = false, },
};
@@ -1584,7 +1655,8 @@ static const struct nl_policy stats_policy[] = {
};
static int
-nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
+nl_parse_single_action(struct nlattr *action, struct tc_flower *flower,
+ bool terse)
{
struct nlattr *act_options;
struct nlattr *act_stats;
@@ -1597,7 +1669,8 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
int err = 0;
if (!nl_parse_nested(action, act_policy, action_attrs,
- ARRAY_SIZE(act_policy))) {
+ ARRAY_SIZE(act_policy)) ||
+ (!terse && !action_attrs[TCA_ACT_OPTIONS])) {
VLOG_ERR_RL(&error_rl, "failed to parse single action options");
return EPROTO;
}
@@ -1606,7 +1679,9 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
act_options = action_attrs[TCA_ACT_OPTIONS];
act_cookie = action_attrs[TCA_ACT_COOKIE];
- if (!strcmp(act_kind, "gact")) {
+ if (terse) {
+ /* Terse dump doesn't provide act options attribute. */
+ } else if (!strcmp(act_kind, "gact")) {
err = nl_parse_act_gact(act_options, flower);
} else if (!strcmp(act_kind, "mirred")) {
err = nl_parse_act_mirred(act_options, flower);
@@ -1647,8 +1722,10 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
}
bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs);
- put_32aligned_u64(&stats->n_packets, bs->packets);
- put_32aligned_u64(&stats->n_bytes, bs->bytes);
+ if (bs->packets) {
+ put_32aligned_u64(&stats->n_packets, bs->packets);
+ put_32aligned_u64(&stats->n_bytes, bs->bytes);
+ }
return 0;
}
@@ -1656,7 +1733,8 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower)
#define TCA_ACT_MIN_PRIO 1
static int
-nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower)
+nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower,
+ bool terse)
{
const struct nlattr *actions = attrs[TCA_FLOWER_ACT];
static struct nl_policy actions_orders_policy[TCA_ACT_MAX_NUM + 1] = {};
@@ -1682,7 +1760,7 @@ nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower)
VLOG_DBG_RL(&error_rl, "Can only support %d actions", TCA_ACT_MAX_NUM);
return EOPNOTSUPP;
}
- err = nl_parse_single_action(actions_orders[i], flower);
+ err = nl_parse_single_action(actions_orders[i], flower, terse);
if (err) {
return err;
@@ -1701,11 +1779,21 @@ nl_parse_flower_actions(struct nlattr **attrs, struct tc_flower *flower)
}
static int
-nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower)
+nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower,
+ bool terse)
{
struct nlattr *attrs[ARRAY_SIZE(tca_flower_policy)];
int err;
+ if (terse) {
+ if (!nl_parse_nested(nl_options, tca_flower_terse_policy,
+ attrs, ARRAY_SIZE(tca_flower_terse_policy))) {
+ VLOG_ERR_RL(&error_rl, "failed to parse flower classifier terse options");
+ return EPROTO;
+ }
+ goto skip_flower_opts;
+ }
+
if (!nl_parse_nested(nl_options, tca_flower_policy,
attrs, ARRAY_SIZE(tca_flower_policy))) {
VLOG_ERR_RL(&error_rl, "failed to parse flower classifier options");
@@ -1713,6 +1801,7 @@ nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower)
}
nl_parse_flower_eth(attrs, flower);
+ nl_parse_flower_arp(attrs, flower);
nl_parse_flower_mpls(attrs, flower);
nl_parse_flower_vlan(attrs, flower);
nl_parse_flower_ip(attrs, flower);
@@ -1721,13 +1810,14 @@ nl_parse_flower_options(struct nlattr *nl_options, struct tc_flower *flower)
return err;
}
+skip_flower_opts:
nl_parse_flower_flags(attrs, flower);
- return nl_parse_flower_actions(attrs, flower);
+ return nl_parse_flower_actions(attrs, flower, terse);
}
int
parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
- struct tc_flower *flower)
+ struct tc_flower *flower, bool terse)
{
struct tcmsg *tc;
struct nlattr *ta[ARRAY_SIZE(tca_policy)];
@@ -1770,15 +1860,22 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
return EPROTO;
}
- return nl_parse_flower_options(ta[TCA_OPTIONS], flower);
+ return nl_parse_flower_options(ta[TCA_OPTIONS], flower, terse);
}
int
-tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump)
+tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump, bool terse)
{
struct ofpbuf request;
request_from_tcf_id(id, 0, RTM_GETTFILTER, NLM_F_DUMP, &request);
+ if (terse) {
+ struct nla_bitfield32 dump_flags = { TCA_DUMP_FLAGS_TERSE,
+ TCA_DUMP_FLAGS_TERSE };
+
+ nl_msg_put_unspec(&request, TCA_DUMP_FLAGS, &dump_flags,
+ sizeof dump_flags);
+ }
nl_dump_start(dump, NETLINK_ROUTE, &request);
ofpbuf_uninit(&request);
@@ -1807,7 +1904,7 @@ tc_get_flower(struct tcf_id *id, struct tc_flower *flower)
return error;
}
- error = parse_netlink_to_tc_flower(reply, id, flower);
+ error = parse_netlink_to_tc_flower(reply, id, flower, false);
ofpbuf_delete(reply);
return error;
}
@@ -2038,7 +2135,7 @@ nl_msg_put_act_tunnel_key_set(struct ofpbuf *request, bool id_present,
if (ipv4_dst) {
nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_SRC, ipv4_src);
nl_msg_put_be32(request, TCA_TUNNEL_KEY_ENC_IPV4_DST, ipv4_dst);
- } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
+ } else if (ipv6_addr_is_set(ipv6_dst)) {
nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_DST,
ipv6_dst);
nl_msg_put_in6_addr(request, TCA_TUNNEL_KEY_ENC_IPV6_SRC,
@@ -2135,12 +2232,10 @@ nl_msg_put_act_ct(struct ofpbuf *request, struct tc_action *action)
action->ct.range.ipv4.max);
}
} else if (action->ct.range.ip_family == AF_INET6) {
- size_t ipv6_sz = sizeof(action->ct.range.ipv6.max);
nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MIN,
&action->ct.range.ipv6.min);
- if (!is_all_zeros(&action->ct.range.ipv6.max,
- ipv6_sz)) {
+ if (ipv6_addr_is_set(&action->ct.range.ipv6.max)) {
nl_msg_put_in6_addr(request, TCA_CT_NAT_IPV6_MAX,
&action->ct.range.ipv6.max);
}
@@ -2594,8 +2689,12 @@ nl_msg_put_flower_tunnel_opts(struct ofpbuf *request, uint16_t type,
static void
nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
{
+ ovs_be32 ipv4_src_mask = flower->mask.tunnel.ipv4.ipv4_src;
+ ovs_be32 ipv4_dst_mask = flower->mask.tunnel.ipv4.ipv4_dst;
ovs_be32 ipv4_src = flower->key.tunnel.ipv4.ipv4_src;
ovs_be32 ipv4_dst = flower->key.tunnel.ipv4.ipv4_dst;
+ struct in6_addr *ipv6_src_mask = &flower->mask.tunnel.ipv6.ipv6_src;
+ struct in6_addr *ipv6_dst_mask = &flower->mask.tunnel.ipv6.ipv6_dst;
struct in6_addr *ipv6_src = &flower->key.tunnel.ipv6.ipv6_src;
struct in6_addr *ipv6_dst = &flower->key.tunnel.ipv6.ipv6_dst;
ovs_be16 tp_dst = flower->key.tunnel.tp_dst;
@@ -2606,12 +2705,21 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower)
uint8_t ttl_mask = flower->mask.tunnel.ttl;
ovs_be64 id_mask = flower->mask.tunnel.id;
- if (ipv4_dst) {
- nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src);
+ if (ipv4_dst_mask || ipv4_src_mask) {
+ nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
+ ipv4_dst_mask);
+ nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
+ ipv4_src_mask);
nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_DST, ipv4_dst);
- } else if (!is_all_zeros(ipv6_dst, sizeof *ipv6_dst)) {
- nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src);
+ nl_msg_put_be32(request, TCA_FLOWER_KEY_ENC_IPV4_SRC, ipv4_src);
+ } else if (ipv6_addr_is_set(ipv6_dst_mask) ||
+ ipv6_addr_is_set(ipv6_src_mask)) {
+ nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
+ ipv6_dst_mask);
+ nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
+ ipv6_src_mask);
nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_DST, ipv6_dst);
+ nl_msg_put_in6_addr(request, TCA_FLOWER_KEY_ENC_IPV6_SRC, ipv6_src);
}
if (tos_mask) {
nl_msg_put_u8(request, TCA_FLOWER_KEY_ENC_IP_TOS, tos);
@@ -2645,6 +2753,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
bool is_vlan = eth_type_vlan(flower->key.eth_type);
bool is_qinq = is_vlan && eth_type_vlan(flower->key.encap_eth_type[0]);
bool is_mpls = eth_type_mpls(flower->key.eth_type);
+ enum tc_offload_policy policy = flower->tc_policy;
int err;
/* need to parse acts first as some acts require changing the matching
@@ -2669,6 +2778,14 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
FLOWER_PUT_MASKED_VALUE(dst_mac, TCA_FLOWER_KEY_ETH_DST);
FLOWER_PUT_MASKED_VALUE(src_mac, TCA_FLOWER_KEY_ETH_SRC);
+ if (host_eth_type == ETH_P_ARP) {
+ FLOWER_PUT_MASKED_VALUE(arp.spa, TCA_FLOWER_KEY_ARP_SIP);
+ FLOWER_PUT_MASKED_VALUE(arp.tpa, TCA_FLOWER_KEY_ARP_TIP);
+ FLOWER_PUT_MASKED_VALUE(arp.sha, TCA_FLOWER_KEY_ARP_SHA);
+ FLOWER_PUT_MASKED_VALUE(arp.tha, TCA_FLOWER_KEY_ARP_THA);
+ FLOWER_PUT_MASKED_VALUE(arp.opcode, TCA_FLOWER_KEY_ARP_OP);
+ }
+
if (host_eth_type == ETH_P_IP || host_eth_type == ETH_P_IPV6) {
FLOWER_PUT_MASKED_VALUE(ip_ttl, TCA_FLOWER_KEY_IP_TTL);
FLOWER_PUT_MASKED_VALUE(ip_tos, TCA_FLOWER_KEY_IP_TOS);
@@ -2762,7 +2879,11 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower)
}
}
- nl_msg_put_u32(request, TCA_FLOWER_FLAGS, tc_get_tc_cls_policy(tc_policy));
+ if (policy == TC_POLICY_NONE) {
+ policy = tc_policy;
+ }
+
+ nl_msg_put_u32(request, TCA_FLOWER_FLAGS, tc_get_tc_cls_policy(policy));
if (flower->tunnel) {
nl_msg_put_flower_tunnel(request, flower);
diff --git a/lib/tc.h b/lib/tc.h
index d31c0953edf04e7c11ae9ebb9db0f47d7be9bd23..281231c0d3f1c614b1e0631a1b5dbacc99b737f6 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -121,6 +121,14 @@ struct tc_flower_key {
uint32_t ct_mark;
ovs_u128 ct_label;
+ struct {
+ ovs_be32 spa;
+ ovs_be32 tpa;
+ struct eth_addr sha;
+ struct eth_addr tha;
+ uint8_t opcode;
+ } arp;
+
struct {
ovs_be32 ipv4_src;
ovs_be32 ipv4_dst;
@@ -235,7 +243,7 @@ struct tc_action {
} ipv6;
};
- union {
+ struct {
ovs_be16 min;
ovs_be16 max;
} port;
@@ -304,6 +312,14 @@ is_tcf_id_eq(struct tcf_id *id1, struct tcf_id *id2)
&& id1->chain == id2->chain;
}
+enum tc_offload_policy {
+ TC_POLICY_NONE = 0,
+ TC_POLICY_SKIP_SW,
+ TC_POLICY_SKIP_HW
+};
+
+BUILD_ASSERT_DECL(TC_POLICY_NONE == 0);
+
struct tc_flower {
struct tc_flower_key key;
struct tc_flower_key mask;
@@ -329,6 +345,8 @@ struct tc_flower {
bool needs_full_ip_proto_mask;
enum tc_offloaded_state offloaded_state;
+ /* Used to force skip_hw when probing tc features. */
+ enum tc_offload_policy tc_policy;
};
/* assert that if we overflow with a masked write of uint32_t to the last byte
@@ -341,10 +359,11 @@ BUILD_ASSERT_DECL(offsetof(struct tc_flower, rewrite)
int tc_replace_flower(struct tcf_id *id, struct tc_flower *flower);
int tc_del_filter(struct tcf_id *id);
int tc_get_flower(struct tcf_id *id, struct tc_flower *flower);
-int tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump);
+int tc_dump_flower_start(struct tcf_id *id, struct nl_dump *dump, bool terse);
int parse_netlink_to_tc_flower(struct ofpbuf *reply,
struct tcf_id *id,
- struct tc_flower *flower);
+ struct tc_flower *flower,
+ bool terse);
void tc_set_policy(const char *policy);
#endif /* tc.h */
diff --git a/lib/tnl-ports.c b/lib/tnl-ports.c
index 17353046cc6ed9a29a75683cf22041318b764117..58269d3b1631ccc557ad628f27177eaea5b0a5e2 100644
--- a/lib/tnl-ports.c
+++ b/lib/tnl-ports.c
@@ -30,7 +30,6 @@
#include "openvswitch/ofpbuf.h"
#include "ovs-thread.h"
#include "odp-util.h"
-#include "ovs-thread.h"
#include "unixctl.h"
#include "util.h"
@@ -178,6 +177,9 @@ tnl_type_to_nw_proto(const char type[])
if (!strcmp(type, "vxlan")) {
return IPPROTO_UDP;
}
+ if (!strcmp(type, "gtpu")) {
+ return IPPROTO_UDP;
+ }
return 0;
}
diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
index f8a0e19524e9e986c1052fb9ac21b9b855b74d46..c0b0ae0448979b8aa655a9eab1ff942b8006b3d6 100644
--- a/lib/tun-metadata.c
+++ b/lib/tun-metadata.c
@@ -261,6 +261,23 @@ tun_metadata_write(struct flow_tnl *tnl,
value->tun_metadata + mf->n_bytes - loc->len, loc, idx);
}
+/* Deletes field 'mf' in 'tnl' (in non-UDPIF format).
+ * 'mf' must be an MFF_TUN_METADATA* field.
+ */
+void
+tun_metadata_delete(struct flow_tnl *tnl, const struct mf_field *mf)
+{
+ unsigned int idx;
+
+ if (tnl->flags & FLOW_TNL_F_UDPIF) {
+ return;
+ }
+
+ idx = mf->id - MFF_TUN_METADATA0;
+ ovs_assert(idx < TUN_METADATA_NUM_OPTS);
+ ULLONG_SET0(tnl->metadata.present.map, idx);
+}
+
static const struct tun_metadata_loc *
metadata_loc_from_match(const struct tun_table *map, struct match *match,
const char *name, unsigned int idx,
diff --git a/lib/tun-metadata.h b/lib/tun-metadata.h
index 7dad9504b8da67d88592edc8544983580d78a69f..67dedae2522b943dd2ecde2554415b2926067fdb 100644
--- a/lib/tun-metadata.h
+++ b/lib/tun-metadata.h
@@ -47,6 +47,7 @@ void tun_metadata_read(const struct flow_tnl *,
const struct mf_field *, union mf_value *);
void tun_metadata_write(struct flow_tnl *,
const struct mf_field *, const union mf_value *);
+void tun_metadata_delete(struct flow_tnl *, const struct mf_field *);
void tun_metadata_set_match(const struct mf_field *,
const union mf_value *value,
const union mf_value *mask, struct match *,
diff --git a/lib/unixctl.c b/lib/unixctl.c
index c216de3d05a7e9751ceefdb820f5ae8bb390e388..69aed6722c1095ca2b64010f102824e871b156ba 100644
--- a/lib/unixctl.c
+++ b/lib/unixctl.c
@@ -77,7 +77,9 @@ unixctl_list_commands(struct unixctl_conn *conn, int argc OVS_UNUSED,
const struct shash_node *node = nodes[i];
const struct unixctl_command *command = node->data;
- ds_put_format(&ds, " %-23s %s\n", node->name, command->usage);
+ if (command->usage) {
+ ds_put_format(&ds, " %-23s %s\n", node->name, command->usage);
+ }
}
free(nodes);
@@ -94,7 +96,7 @@ unixctl_version(struct unixctl_conn *conn, int argc OVS_UNUSED,
/* Registers a unixctl command with the given 'name'. 'usage' describes the
* arguments to the command; it is used only for presentation to the user in
- * "list-commands" output.
+ * "list-commands" output. (If 'usage' is NULL, then the command is hidden.)
*
* 'cb' is called when the command is received. It is passed an array
* containing the command name and arguments, plus a copy of 'aux'. Normally
diff --git a/lib/userspace-tso.c b/lib/userspace-tso.c
index 6a4a0149b7f56ad23cd60d0eff4aa8f045b032c2..f843c2a763ce5c4e9969e9784472150663b8de93 100644
--- a/lib/userspace-tso.c
+++ b/lib/userspace-tso.c
@@ -34,13 +34,8 @@ userspace_tso_init(const struct smap *ovs_other_config)
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
if (ovsthread_once_start(&once)) {
-#ifdef DPDK_NETDEV
VLOG_INFO("Userspace TCP Segmentation Offloading support enabled");
userspace_tso = true;
-#else
- VLOG_WARN("Userspace TCP Segmentation Offloading can not be enabled"
- "since OVS is built without DPDK support.");
-#endif
ovsthread_once_done(&once);
}
}
diff --git a/lib/util.c b/lib/util.c
index 830e14516f72bec267ee6cbae99a7beb24b48da0..25635b27ff0000d3d9959b8e974fe6ef38f13db2 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -1395,6 +1395,19 @@ is_all_ones(const void *p, size_t n)
return is_all_byte(p, n, 0xff);
}
+/* *dst |= *src for 'n' bytes. */
+void
+or_bytes(void *dst_, const void *src_, size_t n)
+{
+ const uint8_t *src = src_;
+ uint8_t *dst = dst_;
+ size_t i;
+
+ for (i = 0; i < n; i++) {
+ *dst++ |= *src++;
+ }
+}
+
/* Copies 'n_bits' bits starting from bit 'src_ofs' in 'src' to the 'n_bits'
* starting from bit 'dst_ofs' in 'dst'. 'src' is 'src_len' bytes long and
* 'dst' is 'dst_len' bytes long.
diff --git a/lib/util.h b/lib/util.h
index 7ad8758fe63704a24d322c3bddae7d449298d345..067dcad1578620efd7f4314cbb7f0df112a490cf 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -484,6 +484,7 @@ be64_is_superset(ovs_be64 super, ovs_be64 sub)
bool is_all_zeros(const void *, size_t);
bool is_all_ones(const void *, size_t);
bool is_all_byte(const void *, size_t, uint8_t byte);
+void or_bytes(void *dst, const void *src, size_t n);
void bitwise_copy(const void *src, unsigned int src_len, unsigned int src_ofs,
void *dst, unsigned int dst_len, unsigned int dst_ofs,
unsigned int n_bits);
diff --git a/lib/vlog.c b/lib/vlog.c
index 559943d87937722046fb2b1f21dc238eee6be1d1..533f93755502ccacee37f34da8aeccb8f2ac961c 100644
--- a/lib/vlog.c
+++ b/lib/vlog.c
@@ -257,7 +257,7 @@ vlog_get_level(const struct vlog_module *module,
}
static void
-update_min_level(struct vlog_module *module) OVS_REQUIRES(&log_file_mutex)
+update_min_level(struct vlog_module *module) OVS_REQUIRES(log_file_mutex)
{
enum vlog_destination destination;
@@ -612,6 +612,21 @@ vlog_set_syslog_target(const char *target)
ovs_rwlock_unlock(&pattern_rwlock);
}
+/*
+ * This function writes directly to log file without using async writer or
+ * taking a lock. Caller must hold 'log_file_mutex' or be sure that it's
+ * not necessary. Could be used in exceptional cases like dumping of backtrace
+ * on fatal signals.
+ */
+void
+vlog_direct_write_to_log_file_unsafe(const char *s)
+ OVS_NO_THREAD_SAFETY_ANALYSIS
+{
+ if (log_fd >= 0) {
+ ignore(write(log_fd, s, strlen(s)));
+ }
+}
+
/* Returns 'false' if 'facility' is not a valid string. If 'facility'
* is a valid string, sets 'value' with the integer value of 'facility'
* and returns 'true'. */
@@ -1088,10 +1103,17 @@ vlog_valist(const struct vlog_module *module, enum vlog_level level,
{
bool log_to_console = module->levels[VLF_CONSOLE] >= level;
bool log_to_syslog = module->levels[VLF_SYSLOG] >= level;
- bool log_to_file;
+ bool log_to_file = module->levels[VLF_FILE] >= level;
+
+ if (!(log_to_console || log_to_syslog || log_to_file)) {
+ /* fast path - all logging levels specify no logging, no
+ * need to hog the log mutex
+ */
+ return;
+ }
ovs_mutex_lock(&log_file_mutex);
- log_to_file = module->levels[VLF_FILE] >= level && log_fd >= 0;
+ log_to_file &= (log_fd >= 0);
ovs_mutex_unlock(&log_file_mutex);
if (log_to_console || log_to_syslog || log_to_file) {
int save_errno = errno;
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index add3aabcc278f7c6864a1da5d4cb29c35151876a..244ea0fbabae76d9f3e656d6799d575151027d80 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -95,23 +95,12 @@ AC_DEFUN([OVS_CHECK_WIN32],
AC_MSG_ERROR([Invalid --with-pthread value])
;;
*)
- if (cl) 2>&1 | grep 'x64' >/dev/null 2>&1; then
- cl_cv_x64=yes
- else
- cl_cv_x64=no
- fi
- if test "$cl_cv_x64" = yes; then
- PTHREAD_WIN32_DIR=$withval/lib/x64
- PTHREAD_WIN32_DIR_DLL=/$(echo ${withval} | ${SED} -e 's/://')/dll/x64
- PTHREAD_WIN32_DIR_DLL_WIN_FORM=$withval/dll/x64
- else
- PTHREAD_WIN32_DIR=$withval/lib/x86
- PTHREAD_WIN32_DIR_DLL=/$(echo ${withval} | ${SED} -e 's/://')/dll/x86
- PTHREAD_WIN32_DIR_DLL_WIN_FORM=$withval/dll/x86
- fi
+ PTHREAD_WIN32_DIR=$withval/lib
+ PTHREAD_WIN32_DIR_DLL=/$(echo ${withval} | ${SED} -e 's/://')/bin
+ PTHREAD_WIN32_DIR_DLL_WIN_FORM=$withval/bin
PTHREAD_INCLUDES=-I$withval/include
PTHREAD_LDFLAGS=-L$PTHREAD_WIN32_DIR
- PTHREAD_LIBS="-lpthreadVC2"
+ PTHREAD_LIBS="-lpthreadVC3"
AC_SUBST([PTHREAD_WIN32_DIR_DLL_WIN_FORM])
AC_SUBST([PTHREAD_WIN32_DIR_DLL])
AC_SUBST([PTHREAD_INCLUDES])
@@ -146,51 +135,51 @@ dnl OVS_CHECK_WINDOWS
dnl
dnl Configure Visual Studio solution build
AC_DEFUN([OVS_CHECK_VISUAL_STUDIO_DDK], [
-AC_ARG_WITH([vstudiotarget],
- [AS_HELP_STRING([--with-vstudiotarget=target_type],
- [Target type: Debug/Release])],
- [
- case "$withval" in
- "Release") ;;
- "Debug") ;;
- *) AC_MSG_ERROR([No valid Visual Studio configuration found]) ;;
- esac
-
- VSTUDIO_CONFIG=$withval
- ], [
- VSTUDIO_CONFIG=
- ]
- )
-
- AC_SUBST([VSTUDIO_CONFIG])
-
-AC_ARG_WITH([vstudiotargetver],
- [AS_HELP_STRING([--with-vstudiotargetver=target_ver1,target_ver2],
- [Target versions: Win8,Win8.1,Win10])],
- [
- targetver=`echo "$withval" | tr -s , ' ' `
- for ver in $targetver; do
- case "$ver" in
- "Win8") VSTUDIO_WIN8=true ;;
- "Win8.1") VSTUDIO_WIN8_1=true ;;
- "Win10") VSTUDIO_WIN10=true ;;
- *) AC_MSG_ERROR([No valid Visual Studio target version found]) ;;
- esac
- done
-
- ], [
- VSTUDIO_WIN8=true
- VSTUDIO_WIN8_1=true
- VSTUDIO_WIN10=true
- ]
- )
-
- AM_CONDITIONAL([VSTUDIO_WIN8], [test -n "$VSTUDIO_WIN8"])
- AM_CONDITIONAL([VSTUDIO_WIN8_1], [test -n "$VSTUDIO_WIN8_1"])
- AM_CONDITIONAL([VSTUDIO_WIN10], [test -n "$VSTUDIO_WIN10"])
-
- AC_DEFINE([VSTUDIO_DDK], [1], [System uses the Visual Studio build target.])
- AM_CONDITIONAL([VSTUDIO_DDK], [test -n "$VSTUDIO_CONFIG"])
+if test "$WIN32" = yes; then
+ AC_ARG_WITH([vstudiotarget],
+ [AS_HELP_STRING([--with-vstudiotarget=target_type],
+ [Target type: Debug/Release])],
+ [
+ case "$withval" in
+ "Release") ;;
+ "Debug") ;;
+ *) AC_MSG_ERROR([No valid Visual Studio configuration found]) ;;
+ esac
+
+ VSTUDIO_CONFIG=$withval
+ ], [
+ VSTUDIO_CONFIG="Debug"
+ ]
+ )
+
+ AC_SUBST([VSTUDIO_CONFIG])
+
+ AC_ARG_WITH([vstudiotargetver],
+ [AS_HELP_STRING([--with-vstudiotargetver=target_ver1,target_ver2],
+ [Target versions: Win8,Win8.1,Win10])],
+ [
+ targetver=`echo "$withval" | tr -s , ' ' `
+ for ver in $targetver; do
+ case "$ver" in
+ "Win8") VSTUDIO_WIN8=true ;;
+ "Win8.1") VSTUDIO_WIN8_1=true ;;
+ "Win10") VSTUDIO_WIN10=true ;;
+ *) AC_MSG_ERROR([No valid Visual Studio target version found]) ;;
+ esac
+ done
+
+ ], [
+ VSTUDIO_WIN8=true
+ VSTUDIO_WIN8_1=true
+ VSTUDIO_WIN10=true
+ ]
+ )
+ AC_DEFINE([VSTUDIO_DDK], [1], [System uses the Visual Studio build target.])
+fi
+AM_CONDITIONAL([VSTUDIO_WIN8], [test -n "$VSTUDIO_WIN8"])
+AM_CONDITIONAL([VSTUDIO_WIN8_1], [test -n "$VSTUDIO_WIN8_1"])
+AM_CONDITIONAL([VSTUDIO_WIN10], [test -n "$VSTUDIO_WIN10"])
+AM_CONDITIONAL([VSTUDIO_DDK], [test -n "$VSTUDIO_CONFIG"])
])
dnl Checks for Netlink support.
@@ -404,6 +393,40 @@ AC_DEFUN([OVS_CHECK_SPHINX],
AC_ARG_VAR([SPHINXBUILD])
AM_CONDITIONAL([HAVE_SPHINX], [test "$SPHINXBUILD" != none])])
+dnl Checks for binutils/assembler known issue with AVX512.
+dnl Due to backports, we probe assembling a reproducer instead of checking
+dnl binutils version string. More details, including ASM dumps and debug here:
+dnl GCC: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90028
+dnl The checking of binutils funcationality instead of LD version is similar
+dnl to as how DPDK proposes to solve this issue:
+dnl http://patches.dpdk.org/patch/71723/
+AC_DEFUN([OVS_CHECK_BINUTILS_AVX512],
+ [AC_CACHE_CHECK(
+ [binutils avx512 assembler checks passing],
+ [ovs_cv_binutils_avx512_good],
+ [dnl Assemble a short snippet to test for issue in "build-aux" dir:
+ mkdir -p build-aux
+ OBJFILE=build-aux/binutils_avx512_check.o
+ GATHER_PARAMS='0x8(,%ymm1,1),%ymm0{%k2}'
+ echo "vpgatherqq $GATHER_PARAMS" | as --64 -o $OBJFILE -
+ if ($CC -dumpmachine | grep x86_64) >/dev/null 2>&1; then
+ if (objdump -d --no-show-raw-insn $OBJFILE | grep -q $GATHER_PARAMS) >/dev/null 2>&1; then
+ ovs_cv_binutils_avx512_good=yes
+ CFLAGS="$CFLAGS -DHAVE_LD_AVX512_GOOD"
+ else
+ ovs_cv_binutils_avx512_good=no
+ dnl Explicitly disallow avx512f to stop compiler auto-vectorizing
+ dnl and causing zmm usage with buggy binutils versions.
+ CFLAGS="$CFLAGS -mno-avx512f"
+ fi
+ else
+ dnl non x86_64 architectures don't have avx512, so not affected
+ ovs_cv_binutils_avx512_good=no
+ fi])
+ rm $OBJFILE
+ AM_CONDITIONAL([HAVE_LD_AVX512_GOOD],
+ [test "$ovs_cv_binutils_avx512_good" = yes])])
+
dnl Checks for dot.
AC_DEFUN([OVS_CHECK_DOT],
[AC_CACHE_CHECK(
diff --git a/manpages.mk b/manpages.mk
deleted file mode 100644
index dc201484c6379e33b1fa160c5ebb75d93882cdf5..0000000000000000000000000000000000000000
--- a/manpages.mk
+++ /dev/null
@@ -1,266 +0,0 @@
-# Generated automatically -- do not modify! -*- buffer-read-only: t -*-
-
-ovsdb/ovsdb-client.1: \
- ovsdb/ovsdb-client.1.in \
- lib/common-syn.man \
- lib/common.man \
- lib/daemon-syn.man \
- lib/daemon.man \
- lib/ovs.tmac \
- lib/ssl-bootstrap-syn.man \
- lib/ssl-bootstrap.man \
- lib/ssl-connect-syn.man \
- lib/ssl-connect.man \
- lib/ssl-syn.man \
- lib/ssl.man \
- lib/table.man \
- lib/vlog-syn.man \
- lib/vlog.man \
- ovsdb/ovsdb-schemas.man
-ovsdb/ovsdb-client.1.in:
-lib/common-syn.man:
-lib/common.man:
-lib/daemon-syn.man:
-lib/daemon.man:
-lib/ovs.tmac:
-lib/ssl-bootstrap-syn.man:
-lib/ssl-bootstrap.man:
-lib/ssl-connect-syn.man:
-lib/ssl-connect.man:
-lib/ssl-syn.man:
-lib/ssl.man:
-lib/table.man:
-lib/vlog-syn.man:
-lib/vlog.man:
-ovsdb/ovsdb-schemas.man:
-
-ovsdb/ovsdb-server.1: \
- ovsdb/ovsdb-server.1.in \
- lib/common-syn.man \
- lib/common.man \
- lib/coverage-unixctl.man \
- lib/daemon-syn.man \
- lib/daemon.man \
- lib/memory-unixctl.man \
- lib/ovs.tmac \
- lib/service-syn.man \
- lib/service.man \
- lib/ssl-bootstrap-syn.man \
- lib/ssl-bootstrap.man \
- lib/ssl-connect-syn.man \
- lib/ssl-connect.man \
- lib/ssl-peer-ca-cert-syn.man \
- lib/ssl-peer-ca-cert.man \
- lib/ssl-syn.man \
- lib/ssl.man \
- lib/unixctl-syn.man \
- lib/unixctl.man \
- lib/vlog-syn.man \
- lib/vlog-unixctl.man \
- lib/vlog.man
-ovsdb/ovsdb-server.1.in:
-lib/common-syn.man:
-lib/common.man:
-lib/coverage-unixctl.man:
-lib/daemon-syn.man:
-lib/daemon.man:
-lib/memory-unixctl.man:
-lib/ovs.tmac:
-lib/service-syn.man:
-lib/service.man:
-lib/ssl-bootstrap-syn.man:
-lib/ssl-bootstrap.man:
-lib/ssl-connect-syn.man:
-lib/ssl-connect.man:
-lib/ssl-peer-ca-cert-syn.man:
-lib/ssl-peer-ca-cert.man:
-lib/ssl-syn.man:
-lib/ssl.man:
-lib/unixctl-syn.man:
-lib/unixctl.man:
-lib/vlog-syn.man:
-lib/vlog-unixctl.man:
-lib/vlog.man:
-
-ovsdb/ovsdb-tool.1: \
- ovsdb/ovsdb-tool.1.in \
- lib/common-syn.man \
- lib/common.man \
- lib/ovs.tmac \
- lib/vlog-syn.man \
- lib/vlog.man \
- ovsdb/ovsdb-schemas.man
-ovsdb/ovsdb-tool.1.in:
-lib/common-syn.man:
-lib/common.man:
-lib/ovs.tmac:
-lib/vlog-syn.man:
-lib/vlog.man:
-ovsdb/ovsdb-schemas.man:
-
-utilities/bugtool/ovs-bugtool.8: \
- utilities/bugtool/ovs-bugtool.8.in \
- lib/ovs.tmac
-utilities/bugtool/ovs-bugtool.8.in:
-lib/ovs.tmac:
-
-
-utilities/ovs-dpctl-top.8: \
- utilities/ovs-dpctl-top.8.in \
- lib/ovs.tmac
-utilities/ovs-dpctl-top.8.in:
-lib/ovs.tmac:
-
-utilities/ovs-dpctl.8: \
- utilities/ovs-dpctl.8.in \
- lib/common.man \
- lib/dpctl.man \
- lib/ovs.tmac \
- lib/vlog.man
-utilities/ovs-dpctl.8.in:
-lib/common.man:
-lib/dpctl.man:
-lib/ovs.tmac:
-lib/vlog.man:
-
-utilities/ovs-ofctl.8: \
- utilities/ovs-ofctl.8.in \
- lib/colors.man \
- lib/common.man \
- lib/daemon.man \
- lib/ofp-version.man \
- lib/ovs.tmac \
- lib/ssl.man \
- lib/unixctl.man \
- lib/vconn-active.man \
- lib/vlog.man
-utilities/ovs-ofctl.8.in:
-lib/colors.man:
-lib/common.man:
-lib/daemon.man:
-lib/ofp-version.man:
-lib/ovs.tmac:
-lib/ssl.man:
-lib/unixctl.man:
-lib/vconn-active.man:
-lib/vlog.man:
-
-utilities/ovs-pcap.1: \
- utilities/ovs-pcap.1.in \
- lib/common-syn.man \
- lib/common.man \
- lib/ovs.tmac
-utilities/ovs-pcap.1.in:
-lib/common-syn.man:
-lib/common.man:
-lib/ovs.tmac:
-
-lib/ovs.tmac:
-
-utilities/ovs-testcontroller.8: \
- utilities/ovs-testcontroller.8.in \
- lib/common.man \
- lib/daemon.man \
- lib/ofp-version.man \
- lib/ovs.tmac \
- lib/ssl-peer-ca-cert.man \
- lib/ssl.man \
- lib/unixctl.man \
- lib/vconn-active.man \
- lib/vconn-passive.man \
- lib/vlog.man
-utilities/ovs-testcontroller.8.in:
-lib/common.man:
-lib/daemon.man:
-lib/ofp-version.man:
-lib/ovs.tmac:
-lib/ssl-peer-ca-cert.man:
-lib/ssl.man:
-lib/unixctl.man:
-lib/vconn-active.man:
-lib/vconn-passive.man:
-lib/vlog.man:
-
-utilities/ovs-vsctl.8: \
- utilities/ovs-vsctl.8.in \
- lib/common.man \
- lib/db-ctl-base.man \
- lib/ovs.tmac \
- lib/ssl-bootstrap.man \
- lib/ssl-peer-ca-cert.man \
- lib/ssl.man \
- lib/table.man \
- lib/vconn-active.man \
- lib/vconn-passive.man \
- lib/vlog.man
-utilities/ovs-vsctl.8.in:
-lib/common.man:
-lib/db-ctl-base.man:
-lib/ovs.tmac:
-lib/ssl-bootstrap.man:
-lib/ssl-peer-ca-cert.man:
-lib/ssl.man:
-lib/table.man:
-lib/vconn-active.man:
-lib/vconn-passive.man:
-lib/vlog.man:
-
-vswitchd/ovs-vswitchd.8: \
- vswitchd/ovs-vswitchd.8.in \
- lib/common.man \
- lib/coverage-unixctl.man \
- lib/daemon.man \
- lib/dpctl.man \
- lib/dpif-netdev-unixctl.man \
- lib/memory-unixctl.man \
- lib/netdev-dpdk-unixctl.man \
- lib/ovs.tmac \
- lib/service.man \
- lib/ssl-bootstrap.man \
- lib/ssl-peer-ca-cert.man \
- lib/ssl.man \
- lib/unixctl.man \
- lib/vlog-unixctl.man \
- lib/vlog.man \
- ofproto/ofproto-dpif-unixctl.man \
- ofproto/ofproto-tnl-unixctl.man \
- ofproto/ofproto-unixctl.man
-vswitchd/ovs-vswitchd.8.in:
-lib/common.man:
-lib/coverage-unixctl.man:
-lib/daemon.man:
-lib/dpctl.man:
-lib/dpif-netdev-unixctl.man:
-lib/memory-unixctl.man:
-lib/netdev-dpdk-unixctl.man:
-lib/ovs.tmac:
-lib/service.man:
-lib/ssl-bootstrap.man:
-lib/ssl-peer-ca-cert.man:
-lib/ssl.man:
-lib/unixctl.man:
-lib/vlog-unixctl.man:
-lib/vlog.man:
-ofproto/ofproto-dpif-unixctl.man:
-ofproto/ofproto-tnl-unixctl.man:
-ofproto/ofproto-unixctl.man:
-
-vtep/vtep-ctl.8: \
- vtep/vtep-ctl.8.in \
- lib/common.man \
- lib/db-ctl-base.man \
- lib/ovs.tmac \
- lib/ssl-bootstrap.man \
- lib/ssl-peer-ca-cert.man \
- lib/ssl.man \
- lib/table.man \
- lib/vlog.man
-vtep/vtep-ctl.8.in:
-lib/common.man:
-lib/db-ctl-base.man:
-lib/ovs.tmac:
-lib/ssl-bootstrap.man:
-lib/ssl-peer-ca-cert.man:
-lib/ssl.man:
-lib/table.man:
-lib/vlog.man:
diff --git a/ofproto/bond.c b/ofproto/bond.c
index 405202fb64381f19b1e1372e910e6aa5cf17d659..35b9caac01afb6d18250515152fa2324ea9baf9a 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -54,20 +54,16 @@ static struct ovs_rwlock rwlock = OVS_RWLOCK_INITIALIZER;
static struct hmap all_bonds__ = HMAP_INITIALIZER(&all_bonds__);
static struct hmap *const all_bonds OVS_GUARDED_BY(rwlock) = &all_bonds__;
-/* Bit-mask for hashing a flow down to a bucket. */
-#define BOND_MASK 0xff
-#define BOND_BUCKETS (BOND_MASK + 1)
-
/* Priority for internal rules created to handle recirculation */
#define RECIRC_RULE_PRIORITY 20
-/* A hash bucket for mapping a flow to a slave.
+/* A hash bucket for mapping a flow to a member interface.
* "struct bond" has an array of BOND_BUCKETS of these. */
struct bond_entry {
- struct bond_slave *slave; /* Assigned slave, NULL if unassigned. */
+ struct bond_member *member; /* Assigned member, NULL if unassigned. */
uint64_t tx_bytes /* Count of bytes recently transmitted. */
OVS_GUARDED_BY(rwlock);
- struct ovs_list list_node; /* In bond_slave's 'entries' list. */
+ struct ovs_list list_node; /* In bond_member's 'entries' list. */
/* Recirculation.
*
@@ -78,12 +74,12 @@ struct bond_entry {
uint64_t pr_tx_bytes OVS_GUARDED_BY(rwlock);
};
-/* A bond slave, that is, one of the links comprising a bond. */
-struct bond_slave {
- struct hmap_node hmap_node; /* In struct bond's slaves hmap. */
- struct ovs_list list_node; /* In struct bond's enabled_slaves list. */
- struct bond *bond; /* The bond that contains this slave. */
- void *aux; /* Client-provided handle for this slave. */
+/* A bond member interface, that is, one of the links comprising a bond. */
+struct bond_member {
+ struct hmap_node hmap_node; /* In struct bond's members hmap. */
+ struct ovs_list list_node; /* In struct bond's enabled_members list. */
+ struct bond *bond; /* The bond that contains this member. */
+ void *aux; /* Client-provided handle for this member. */
struct netdev *netdev; /* Network device, owned by the client. */
uint64_t change_seq; /* Tracks changes in 'netdev'. */
@@ -92,7 +88,8 @@ struct bond_slave {
/* Link status. */
bool enabled; /* May be chosen for flows? */
- bool may_enable; /* Client considers this slave bondable. */
+ bool may_enable; /* Client considers this member bondable. */
+ bool is_primary; /* This member is preferred over others. */
long long delay_expires; /* Time after which 'enabled' may change. */
/* Rebalancing info. Used only by bond_rebalance(). */
@@ -108,24 +105,27 @@ struct bond {
char *name; /* Name provided by client. */
struct ofproto_dpif *ofproto; /* The bridge this bond belongs to. */
- /* Slaves. */
- struct hmap slaves;
+ /* Members. */
+ struct hmap members;
- /* Enabled slaves.
+ /* Enabled members.
*
- * Any reader or writer of 'enabled_slaves' must hold 'mutex'.
- * (To prevent the bond_slave from disappearing they must also hold
+ * Any reader or writer of 'enabled_members' must hold 'mutex'.
+ * (To prevent the bond_member from disappearing they must also hold
* 'rwlock'.) */
struct ovs_mutex mutex OVS_ACQ_AFTER(rwlock);
- struct ovs_list enabled_slaves OVS_GUARDED; /* Contains struct bond_slaves. */
+ struct ovs_list enabled_members OVS_GUARDED; /* Of struct bond_members. */
/* Bonding info. */
enum bond_mode balance; /* Balancing mode, one of BM_*. */
- struct bond_slave *active_slave;
- int updelay, downdelay; /* Delay before slave goes up/down, in ms. */
+ struct bond_member *active_member;
+ int updelay, downdelay; /* Delay before member goes up/down, in ms. */
enum lacp_status lacp_status; /* Status of LACP negotiations. */
bool bond_revalidate; /* True if flows need revalidation. */
uint32_t basis; /* Basis for flow hash function. */
+ bool use_lb_output_action; /* Use lb_output action to avoid recirculation.
+ Applicable only for Balance TCP mode. */
+ char *primary; /* Name of the primary member. */
/* SLB specific bonding info. */
struct bond_entry *hash; /* An array of BOND_BUCKETS elements. */
@@ -135,15 +135,14 @@ struct bond {
uint32_t recirc_id; /* Non zero if recirculation can be used.*/
struct hmap pr_rule_ops; /* Helps to maintain post recirculation rules.*/
- /* Store active slave to OVSDB. */
- bool active_slave_changed; /* Set to true whenever the bond changes
- active slave. It will be reset to false
- after it is stored into OVSDB */
+ /* Store active member to OVSDB. */
+ bool active_member_changed; /* Set to true whenever the bond changes active
+ * member. It will be reset to false after
+ * it is stored into OVSDB */
/* Interface name may not be persistent across an OS reboot, use
- * MAC address for identifing the active slave */
- struct eth_addr active_slave_mac;
- /* The MAC address of the active interface. */
+ * MAC address for identifing the active member. */
+ struct eth_addr active_member_mac; /* MAC address of the active member. */
/* Legacy compatibility. */
bool lacp_fallback_ab; /* Fallback to active-backup on LACP failure. */
@@ -166,27 +165,29 @@ struct bond_pr_rule_op {
};
static void bond_entry_reset(struct bond *) OVS_REQ_WRLOCK(rwlock);
-static struct bond_slave *bond_slave_lookup(struct bond *, const void *slave_)
+static struct bond_member *bond_member_lookup(struct bond *, const void *member_)
OVS_REQ_RDLOCK(rwlock);
-static void bond_enable_slave(struct bond_slave *, bool enable)
+static void bond_enable_member(struct bond_member *, bool enable)
OVS_REQ_WRLOCK(rwlock);
-static void bond_link_status_update(struct bond_slave *)
+static void bond_link_status_update(struct bond_member *)
OVS_REQ_WRLOCK(rwlock);
-static void bond_choose_active_slave(struct bond *)
+static void bond_choose_active_member(struct bond *)
OVS_REQ_WRLOCK(rwlock);
static struct bond_entry *lookup_bond_entry(const struct bond *,
const struct flow *,
uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
-static struct bond_slave *get_enabled_slave(struct bond *)
+static struct bond_member *get_enabled_member(struct bond *)
OVS_REQ_RDLOCK(rwlock);
-static struct bond_slave *choose_output_slave(const struct bond *,
- const struct flow *,
- struct flow_wildcards *,
- uint16_t vlan)
+static struct bond_member *choose_output_member(const struct bond *,
+ const struct flow *,
+ struct flow_wildcards *,
+ uint16_t vlan)
OVS_REQ_RDLOCK(rwlock);
-static void update_recirc_rules__(struct bond *bond);
+static void update_recirc_rules__(struct bond *);
static bool bond_is_falling_back_to_ab(const struct bond *);
+static void bond_add_lb_output_buckets(const struct bond *);
+static void bond_del_lb_output_buckets(const struct bond *);
/* Attempts to parse 's' as the name of a bond balancing mode. If successful,
* stores the mode in '*balance' and returns true. Otherwise returns false
@@ -224,8 +225,8 @@ bond_mode_to_string(enum bond_mode balance) {
/* Creates and returns a new bond whose configuration is initially taken from
* 's'.
*
- * The caller should register each slave on the new bond by calling
- * bond_slave_register(). */
+ * The caller should register each member on the new bond by calling
+ * bond_member_register(). */
struct bond *
bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto)
{
@@ -233,14 +234,15 @@ bond_create(const struct bond_settings *s, struct ofproto_dpif *ofproto)
bond = xzalloc(sizeof *bond);
bond->ofproto = ofproto;
- hmap_init(&bond->slaves);
- ovs_list_init(&bond->enabled_slaves);
+ hmap_init(&bond->members);
+ ovs_list_init(&bond->enabled_members);
ovs_mutex_init(&bond->mutex);
ovs_refcount_init(&bond->ref_cnt);
hmap_init(&bond->pr_rule_ops);
- bond->active_slave_mac = eth_addr_zero;
- bond->active_slave_changed = false;
+ bond->active_member_mac = eth_addr_zero;
+ bond->active_member_changed = false;
+ bond->primary = NULL;
bond_reconfigure(bond, s);
return bond;
@@ -261,7 +263,7 @@ bond_ref(const struct bond *bond_)
void
bond_unref(struct bond *bond)
{
- struct bond_slave *slave;
+ struct bond_member *member;
if (!bond || ovs_refcount_unref_relaxed(&bond->ref_cnt) != 1) {
return;
@@ -271,17 +273,21 @@ bond_unref(struct bond *bond)
hmap_remove(all_bonds, &bond->hmap_node);
ovs_rwlock_unlock(&rwlock);
- HMAP_FOR_EACH_POP (slave, hmap_node, &bond->slaves) {
- /* Client owns 'slave->netdev'. */
- free(slave->name);
- free(slave);
+ HMAP_FOR_EACH_POP (member, hmap_node, &bond->members) {
+ /* Client owns 'member->netdev'. */
+ free(member->name);
+ free(member);
}
- hmap_destroy(&bond->slaves);
+ hmap_destroy(&bond->members);
ovs_mutex_destroy(&bond->mutex);
/* Free bond resources. Remove existing post recirc rules. */
if (bond->recirc_id) {
+ if (bond_use_lb_output_action(bond)) {
+ /* Delete bond buckets from datapath if installed. */
+ bond_del_lb_output_buckets(bond);
+ }
recirc_free_id(bond->recirc_id);
bond->recirc_id = 0;
}
@@ -290,6 +296,7 @@ bond_unref(struct bond *bond)
update_recirc_rules__(bond);
hmap_destroy(&bond->pr_rule_ops);
+ free(bond->primary);
free(bond->name);
free(bond);
}
@@ -336,27 +343,35 @@ update_recirc_rules__(struct bond *bond)
struct ofpbuf ofpacts;
int i;
- ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
-
HMAP_FOR_EACH(pr_op, hmap_node, &bond->pr_rule_ops) {
pr_op->op = DEL;
}
if (bond->hash && bond->recirc_id) {
- for (i = 0; i < BOND_BUCKETS; i++) {
- struct bond_slave *slave = bond->hash[i].slave;
+ if (bond_use_lb_output_action(bond)) {
+ bond_add_lb_output_buckets(bond);
+ /* No need to install post recirculation rules as we are using
+ * lb_output action with bond buckets.
+ */
+ return;
+ } else {
+ for (i = 0; i < BOND_BUCKETS; i++) {
+ struct bond_member *member = bond->hash[i].member;
- if (slave) {
- match_init_catchall(&match);
- match_set_recirc_id(&match, bond->recirc_id);
- match_set_dp_hash_masked(&match, i, BOND_MASK);
+ if (member) {
+ match_init_catchall(&match);
+ match_set_recirc_id(&match, bond->recirc_id);
+ match_set_dp_hash_masked(&match, i, BOND_MASK);
- add_pr_rule(bond, &match, slave->ofp_port,
- &bond->hash[i].pr_rule);
+ add_pr_rule(bond, &match, member->ofp_port,
+ &bond->hash[i].pr_rule);
+ }
}
}
}
+ ofpbuf_use_stub(&ofpacts, ofpacts_stub, sizeof ofpacts_stub);
+
HMAP_FOR_EACH_SAFE(pr_op, next_op, hmap_node, &bond->pr_rule_ops) {
int error;
switch (pr_op->op) {
@@ -409,8 +424,8 @@ update_recirc_rules(struct bond *bond)
/* Updates 'bond''s overall configuration to 's'.
*
- * The caller should register each slave on 'bond' by calling
- * bond_slave_register(). This is optional if none of the slaves'
+ * The caller should register each member on 'bond' by calling
+ * bond_member_register(). This is optional if none of the members'
* configuration has changed. In any case it can't hurt.
*
* Returns true if the configuration has changed in such a way that requires
@@ -459,14 +474,37 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
bond->bond_revalidate = false;
}
+ if (!nullable_string_is_equal(bond->primary, s->primary)) {
+ free(bond->primary);
+ bond->primary = nullable_xstrdup(s->primary);
+ revalidate = true;
+ }
+
if (bond->balance != BM_AB) {
if (!bond->recirc_id) {
bond->recirc_id = recirc_alloc_id(bond->ofproto);
}
} else if (bond->recirc_id) {
+ if (bond_use_lb_output_action(bond)) {
+ /* Delete bond buckets from datapath if installed. */
+ bond_del_lb_output_buckets(bond);
+ }
recirc_free_id(bond->recirc_id);
bond->recirc_id = 0;
}
+ if (bond->use_lb_output_action != s->use_lb_output_action) {
+ if (s->use_lb_output_action &&
+ !ovs_lb_output_action_supported(bond->ofproto)) {
+ VLOG_WARN("%s: Datapath does not support 'lb_output' action, "
+ "disabled.", bond->name);
+ } else {
+ bond->use_lb_output_action = s->use_lb_output_action;
+ if (!bond->use_lb_output_action) {
+ bond_del_lb_output_buckets(bond);
+ }
+ revalidate = true;
+ }
+ }
if (bond->balance == BM_AB || !bond->hash || revalidate) {
bond_entry_reset(bond);
@@ -476,21 +514,21 @@ bond_reconfigure(struct bond *bond, const struct bond_settings *s)
return revalidate;
}
-static struct bond_slave *
-bond_find_slave_by_mac(const struct bond *bond, const struct eth_addr mac)
+static struct bond_member *
+bond_find_member_by_mac(const struct bond *bond, const struct eth_addr mac)
{
- struct bond_slave *slave;
+ struct bond_member *member;
- /* Find the last active slave */
- HMAP_FOR_EACH(slave, hmap_node, &bond->slaves) {
- struct eth_addr slave_mac;
+ /* Find the last active member */
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ struct eth_addr member_mac;
- if (netdev_get_etheraddr(slave->netdev, &slave_mac)) {
+ if (netdev_get_etheraddr(member->netdev, &member_mac)) {
continue;
}
- if (eth_addr_equals(slave_mac, mac)) {
- return slave;
+ if (eth_addr_equals(member_mac, mac)) {
+ return member;
}
}
@@ -498,139 +536,144 @@ bond_find_slave_by_mac(const struct bond *bond, const struct eth_addr mac)
}
static void
-bond_active_slave_changed(struct bond *bond)
+bond_active_member_changed(struct bond *bond)
{
- if (bond->active_slave) {
+ if (bond->active_member) {
struct eth_addr mac;
- netdev_get_etheraddr(bond->active_slave->netdev, &mac);
- bond->active_slave_mac = mac;
+ netdev_get_etheraddr(bond->active_member->netdev, &mac);
+ bond->active_member_mac = mac;
} else {
- bond->active_slave_mac = eth_addr_zero;
+ bond->active_member_mac = eth_addr_zero;
}
- bond->active_slave_changed = true;
+ bond->active_member_changed = true;
seq_change(connectivity_seq_get());
}
static void
-bond_slave_set_netdev__(struct bond_slave *slave, struct netdev *netdev)
+bond_member_set_netdev__(struct bond_member *member, struct netdev *netdev)
OVS_REQ_WRLOCK(rwlock)
{
- if (slave->netdev != netdev) {
- slave->netdev = netdev;
- slave->change_seq = 0;
+ if (member->netdev != netdev) {
+ member->netdev = netdev;
+ member->change_seq = 0;
}
}
-/* Registers 'slave_' as a slave of 'bond'. The 'slave_' pointer is an
- * arbitrary client-provided pointer that uniquely identifies a slave within a
- * bond. If 'slave_' already exists within 'bond' then this function
- * reconfigures the existing slave.
+/* Registers 'member_' as a member interface of 'bond'. The 'member_' pointer
+ * is an arbitrary client-provided pointer that uniquely identifies a member
+ * within a bond. If 'member_' already exists within 'bond' then this function
+ * reconfigures the existing member.
*
- * 'netdev' must be the network device that 'slave_' represents. It is owned
+ * 'netdev' must be the network device that 'member_' represents. It is owned
* by the client, so the client must not close it before either unregistering
- * 'slave_' or destroying 'bond'.
+ * 'member_' or destroying 'bond'.
*/
void
-bond_slave_register(struct bond *bond, void *slave_,
- ofp_port_t ofport, struct netdev *netdev)
+bond_member_register(struct bond *bond, void *member_,
+ ofp_port_t ofport, struct netdev *netdev)
{
- struct bond_slave *slave;
+ struct bond_member *member;
ovs_rwlock_wrlock(&rwlock);
- slave = bond_slave_lookup(bond, slave_);
- if (!slave) {
- slave = xzalloc(sizeof *slave);
-
- hmap_insert(&bond->slaves, &slave->hmap_node, hash_pointer(slave_, 0));
- slave->bond = bond;
- slave->aux = slave_;
- slave->ofp_port = ofport;
- slave->delay_expires = LLONG_MAX;
- slave->name = xstrdup(netdev_get_name(netdev));
+ member = bond_member_lookup(bond, member_);
+ if (!member) {
+ member = xzalloc(sizeof *member);
+
+ hmap_insert(&bond->members, &member->hmap_node, hash_pointer(member_, 0));
+ member->bond = bond;
+ member->aux = member_;
+ member->ofp_port = ofport;
+ member->delay_expires = LLONG_MAX;
+ member->name = xstrdup(netdev_get_name(netdev));
bond->bond_revalidate = true;
- slave->enabled = false;
- bond_enable_slave(slave, netdev_get_carrier(netdev));
+ member->enabled = false;
+ bond_enable_member(member, netdev_get_carrier(netdev));
}
- bond_slave_set_netdev__(slave, netdev);
+ bond_member_set_netdev__(member, netdev);
- free(slave->name);
- slave->name = xstrdup(netdev_get_name(netdev));
+ free(member->name);
+ member->name = xstrdup(netdev_get_name(netdev));
+ if (bond->primary && !strcmp(bond->primary, member->name)) {
+ member->is_primary = true;
+ } else {
+ member->is_primary = false;
+ }
ovs_rwlock_unlock(&rwlock);
}
-/* Updates the network device to be used with 'slave_' to 'netdev'.
+/* Updates the network device to be used with 'member_' to 'netdev'.
*
* This is useful if the caller closes and re-opens the network device
- * registered with bond_slave_register() but doesn't need to change anything
+ * registered with bond_member_register() but doesn't need to change anything
* else. */
void
-bond_slave_set_netdev(struct bond *bond, void *slave_, struct netdev *netdev)
+bond_member_set_netdev(struct bond *bond, void *member_, struct netdev *netdev)
{
- struct bond_slave *slave;
+ struct bond_member *member;
ovs_rwlock_wrlock(&rwlock);
- slave = bond_slave_lookup(bond, slave_);
- if (slave) {
- bond_slave_set_netdev__(slave, netdev);
+ member = bond_member_lookup(bond, member_);
+ if (member) {
+ bond_member_set_netdev__(member, netdev);
}
ovs_rwlock_unlock(&rwlock);
}
-/* Unregisters 'slave_' from 'bond'. If 'bond' does not contain such a slave
- * then this function has no effect.
+/* Unregisters 'member_' from 'bond'. If 'bond' does not contain such a
+ * member then this function has no effect.
*
- * Unregistering a slave invalidates all flows. */
+ * Unregistering a member invalidates all flows. */
void
-bond_slave_unregister(struct bond *bond, const void *slave_)
+bond_member_unregister(struct bond *bond, const void *member_)
{
- struct bond_slave *slave;
+ struct bond_member *member;
bool del_active;
ovs_rwlock_wrlock(&rwlock);
- slave = bond_slave_lookup(bond, slave_);
- if (!slave) {
+ member = bond_member_lookup(bond, member_);
+ if (!member) {
goto out;
}
bond->bond_revalidate = true;
- bond_enable_slave(slave, false);
+ bond_enable_member(member, false);
- del_active = bond->active_slave == slave;
+ del_active = bond->active_member == member;
if (bond->hash) {
struct bond_entry *e;
for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) {
- if (e->slave == slave) {
- e->slave = NULL;
+ if (e->member == member) {
+ e->member = NULL;
}
}
}
- free(slave->name);
+ free(member->name);
- hmap_remove(&bond->slaves, &slave->hmap_node);
- /* Client owns 'slave->netdev'. */
- free(slave);
+ hmap_remove(&bond->members, &member->hmap_node);
+ /* Client owns 'member->netdev'. */
+ free(member);
if (del_active) {
- bond_choose_active_slave(bond);
+ bond_choose_active_member(bond);
bond->send_learning_packets = true;
}
out:
ovs_rwlock_unlock(&rwlock);
}
-/* Should be called on each slave in 'bond' before bond_run() to indicate
- * whether or not 'slave_' may be enabled. This function is intended to allow
+/* Should be called on each member in 'bond' before bond_run() to indicate
+ * whether or not 'member_' may be enabled. This function is intended to allow
* other protocols to have some impact on bonding decisions. For example LACP
- * or high level link monitoring protocols may decide that a given slave should
- * not be able to send traffic. */
+ * or high level link monitoring protocols may decide that a given member
+ * should not be able to send traffic. */
void
-bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable)
+bond_member_set_may_enable(struct bond *bond, void *member_, bool may_enable)
{
ovs_rwlock_wrlock(&rwlock);
- bond_slave_lookup(bond, slave_)->may_enable = may_enable;
+ bond_member_lookup(bond, member_)->may_enable = may_enable;
ovs_rwlock_unlock(&rwlock);
}
@@ -642,7 +685,7 @@ bond_slave_set_may_enable(struct bond *bond, void *slave_, bool may_enable)
bool
bond_run(struct bond *bond, enum lacp_status lacp_status)
{
- struct bond_slave *slave;
+ struct bond_member *member, *primary;
bool revalidate;
ovs_rwlock_wrlock(&rwlock);
@@ -658,13 +701,21 @@ bond_run(struct bond *bond, enum lacp_status lacp_status)
}
}
- /* Enable slaves based on link status and LACP feedback. */
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- bond_link_status_update(slave);
- slave->change_seq = seq_read(connectivity_seq_get());
+ /* Enable members based on link status and LACP feedback. */
+ primary = NULL;
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ bond_link_status_update(member);
+ member->change_seq = seq_read(connectivity_seq_get());
+
+ /* Discover if there is an active member marked 'primary'. */
+ if (bond->balance == BM_AB && member->is_primary && member->enabled) {
+ primary = member;
+ }
}
- if (!bond->active_slave || !bond->active_slave->enabled) {
- bond_choose_active_slave(bond);
+
+ if (!bond->active_member || !bond->active_member->enabled ||
+ (primary && bond->active_member != primary)) {
+ bond_choose_active_member(bond);
}
revalidate = bond->bond_revalidate;
@@ -678,15 +729,15 @@ bond_run(struct bond *bond, enum lacp_status lacp_status)
void
bond_wait(struct bond *bond)
{
- struct bond_slave *slave;
+ struct bond_member *member;
ovs_rwlock_rdlock(&rwlock);
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- if (slave->delay_expires != LLONG_MAX) {
- poll_timer_wait_until(slave->delay_expires);
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (member->delay_expires != LLONG_MAX) {
+ poll_timer_wait_until(member->delay_expires);
}
- seq_wait(connectivity_seq_get(), slave->change_seq);
+ seq_wait(connectivity_seq_get(), member->change_seq);
}
if (bond->bond_revalidate) {
@@ -708,7 +759,7 @@ may_send_learning_packets(const struct bond *bond)
return ((bond->lacp_status == LACP_DISABLED
&& (bond->balance == BM_SLB || bond->balance == BM_AB))
|| (bond->lacp_fallback_ab && bond->lacp_status == LACP_CONFIGURED))
- && bond->active_slave;
+ && bond->active_member;
}
/* Returns true if 'bond' needs the client to send out packets to assist with
@@ -743,7 +794,7 @@ struct dp_packet *
bond_compose_learning_packet(struct bond *bond, const struct eth_addr eth_src,
uint16_t vlan, void **port_aux)
{
- struct bond_slave *slave;
+ struct bond_member *member;
struct dp_packet *packet;
struct flow flow;
@@ -751,7 +802,7 @@ bond_compose_learning_packet(struct bond *bond, const struct eth_addr eth_src,
ovs_assert(may_send_learning_packets(bond));
memset(&flow, 0, sizeof flow);
flow.dl_src = eth_src;
- slave = choose_output_slave(bond, &flow, NULL, vlan);
+ member = choose_output_member(bond, &flow, NULL, vlan);
packet = dp_packet_new(0);
compose_rarp(packet, eth_src);
@@ -759,7 +810,7 @@ bond_compose_learning_packet(struct bond *bond, const struct eth_addr eth_src,
eth_push_vlan(packet, htons(ETH_TYPE_VLAN), htons(vlan));
}
- *port_aux = slave->aux;
+ *port_aux = member->aux;
ovs_rwlock_unlock(&rwlock);
return packet;
}
@@ -773,7 +824,7 @@ bond_is_falling_back_to_ab(const struct bond *bond)
&& bond->lacp_status == LACP_CONFIGURED);
}
-/* Checks whether a packet that arrived on 'slave_' within 'bond', with an
+/* Checks whether a packet that arrived on 'member_' within 'bond', with an
* Ethernet destination address of 'eth_dst', should be admitted.
*
* The return value is one of the following:
@@ -789,22 +840,22 @@ bond_is_falling_back_to_ab(const struct bond *bond)
* learning).
*/
enum bond_verdict
-bond_check_admissibility(struct bond *bond, const void *slave_,
+bond_check_admissibility(struct bond *bond, const void *member_,
const struct eth_addr eth_dst)
{
enum bond_verdict verdict = BV_DROP;
- struct bond_slave *slave;
+ struct bond_member *member;
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
ovs_rwlock_rdlock(&rwlock);
- slave = bond_slave_lookup(bond, slave_);
- if (!slave) {
+ member = bond_member_lookup(bond, member_);
+ if (!member) {
goto out;
}
/* LACP bonds have very loose admissibility restrictions because we can
* assume the remote switch is aware of the bond and will "do the right
- * thing". However, as a precaution we drop packets on disabled slaves
+ * thing". However, as a precaution we drop packets on disabled members
* because no correctly implemented partner switch should be sending
* packets to them.
*
@@ -812,14 +863,15 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
* drop all incoming traffic except if lacp_fallback_ab is enabled. */
switch (bond->lacp_status) {
case LACP_NEGOTIATED:
- /* To reduce packet-drops due to delay in enabling of slave (post
+ /* To reduce packet-drops due to delay in enabling of member (post
* LACP-SYNC), from main thread, check for may_enable as well.
* When may_enable is TRUE, it means LACP is UP and waiting for the
- * main thread to run LACP state machine and enable the slave. */
- verdict = (slave->enabled || slave->may_enable) ? BV_ACCEPT : BV_DROP;
- if (!slave->enabled && slave->may_enable) {
- VLOG_DBG_RL(&rl, "bond %s: slave %s: main thread not yet enabled slave",
- bond->name, bond->active_slave->name);
+ * main thread to run LACP state machine and enable the member. */
+ verdict = (member->enabled || member->may_enable) ? BV_ACCEPT : BV_DROP;
+ if (!member->enabled && member->may_enable) {
+ VLOG_DBG_RL(&rl, "bond %s: member %s: "
+ "main thread has not yet enabled member",
+ bond->name, bond->active_member->name);
}
goto out;
case LACP_CONFIGURED:
@@ -834,9 +886,9 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
break;
}
- /* Drop all multicast packets on inactive slaves. */
+ /* Drop all multicast packets on inactive members. */
if (eth_addr_is_multicast(eth_dst)) {
- if (bond->active_slave != slave) {
+ if (bond->active_member != member) {
goto out;
}
}
@@ -853,12 +905,12 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
/* fall through */
case BM_AB:
- /* Drop all packets which arrive on backup slaves. This is similar to
+ /* Drop all packets which arrive on backup members. This is similar to
* how Linux bonding handles active-backup bonds. */
- if (bond->active_slave != slave) {
+ if (bond->active_member != member) {
VLOG_DBG_RL(&rl, "active-backup bond received packet on backup"
- " slave (%s) destined for " ETH_ADDR_FMT,
- slave->name, ETH_ADDR_ARGS(eth_dst));
+ " member (%s) destined for " ETH_ADDR_FMT,
+ member->name, ETH_ADDR_ARGS(eth_dst));
goto out;
}
verdict = BV_ACCEPT;
@@ -866,27 +918,28 @@ bond_check_admissibility(struct bond *bond, const void *slave_,
case BM_SLB:
/* Drop all packets for which we have learned a different input port,
- * because we probably sent the packet on one slave and got it back on
+ * because we probably sent the packet on one member and got it back on
* the other. Gratuitous ARP packets are an exception to this rule:
* the host has moved to another switch. The exception to the
* exception is if we locked the learning table to avoid reflections on
- * bond slaves. */
+ * bond members. */
verdict = BV_DROP_IF_MOVED;
goto out;
}
OVS_NOT_REACHED();
out:
- if (slave && (verdict != BV_ACCEPT)) {
- VLOG_DBG_RL(&rl, "slave (%s): Admissibility verdict is to drop pkt %s."
- "active slave: %s, may_enable: %s enable: %s "
+ if (member && (verdict != BV_ACCEPT)) {
+ VLOG_DBG_RL(&rl, "member (%s): "
+ "Admissibility verdict is to drop pkt %s."
+ "active member: %s, may_enable: %s enable: %s "
"LACP status:%d",
- slave->name,
+ member->name,
(verdict == BV_DROP_IF_MOVED) ?
"as different port is learned" : "",
- (bond->active_slave == slave) ? "true" : "false",
- slave->may_enable ? "true" : "false",
- slave->enabled ? "true" : "false",
+ (bond->active_member == member) ? "true" : "false",
+ member->may_enable ? "true" : "false",
+ member->enabled ? "true" : "false",
bond->lacp_status);
}
@@ -895,9 +948,9 @@ out:
}
-/* Returns the slave (registered on 'bond' by bond_slave_register()) to which
- * a packet with the given 'flow' and 'vlan' should be forwarded. Returns
- * NULL if the packet should be dropped because no slaves are enabled.
+/* Returns the member (registered on 'bond' by bond_member_register()) to which
+ * a packet with the given 'flow' and 'vlan' should be forwarded. Returns NULL
+ * if the packet should be dropped because no members are enabled.
*
* 'vlan' is not necessarily the same as 'flow->vlan_tci'. First, 'vlan'
* should be a VID only (i.e. excluding the PCP bits). Second,
@@ -910,15 +963,15 @@ out:
* have been initialized (e.g., by flow_wildcards_init_catchall()).
*/
void *
-bond_choose_output_slave(struct bond *bond, const struct flow *flow,
- struct flow_wildcards *wc, uint16_t vlan)
+bond_choose_output_member(struct bond *bond, const struct flow *flow,
+ struct flow_wildcards *wc, uint16_t vlan)
{
- struct bond_slave *slave;
+ struct bond_member *member;
void *aux;
ovs_rwlock_rdlock(&rwlock);
- slave = choose_output_slave(bond, flow, wc, vlan);
- aux = slave ? slave->aux : NULL;
+ member = choose_output_member(bond, flow, wc, vlan);
+ aux = member ? member->aux : NULL;
ovs_rwlock_unlock(&rwlock);
return aux;
@@ -929,7 +982,7 @@ static void
bond_entry_account(struct bond_entry *entry, uint64_t rule_tx_bytes)
OVS_REQ_WRLOCK(rwlock)
{
- if (entry->slave) {
+ if (entry->member) {
uint64_t delta;
delta = rule_tx_bytes - entry->pr_tx_bytes;
@@ -944,19 +997,31 @@ bond_recirculation_account(struct bond *bond)
OVS_REQ_WRLOCK(rwlock)
{
int i;
+ uint64_t n_bytes[BOND_BUCKETS];
+ bool use_lb_output_action = bond_use_lb_output_action(bond);
+
+ if (use_lb_output_action) {
+ /* Retrieve bond stats from datapath. */
+ dpif_bond_stats_get(bond->ofproto->backer->dpif,
+ bond->recirc_id, n_bytes);
+ }
for (i=0; i<=BOND_MASK; i++) {
struct bond_entry *entry = &bond->hash[i];
struct rule *rule = entry->pr_rule;
+ struct pkt_stats stats;
- if (rule) {
- struct pkt_stats stats;
+ if (use_lb_output_action) {
+ stats.n_bytes = n_bytes[i];
+ } else if (rule) {
long long int used OVS_UNUSED;
rule->ofproto->ofproto_class->rule_get_stats(
rule, &stats, &used);
- bond_entry_account(entry, stats.n_bytes);
+ } else {
+ continue;
}
+ bond_entry_account(entry, stats.n_bytes);
}
}
@@ -976,12 +1041,12 @@ bond_update_post_recirc_rules__(struct bond* bond, const bool force)
/* Make sure all bond entries are populated */
for (e = bond->hash; e <= &bond->hash[BOND_MASK]; e++) {
- if (!e->slave || !e->slave->enabled) {
+ if (!e->member || !e->member->enabled) {
update_rules = true;
- e->slave = CONTAINER_OF(hmap_random_node(&bond->slaves),
- struct bond_slave, hmap_node);
- if (!e->slave->enabled) {
- e->slave = bond->active_slave;
+ e->member = CONTAINER_OF(hmap_random_node(&bond->members),
+ struct bond_member, hmap_node);
+ if (!e->member->enabled) {
+ e->member = bond->active_member;
}
}
}
@@ -1039,10 +1104,10 @@ bond_account(struct bond *bond, const struct flow *flow, uint16_t vlan,
ovs_rwlock_unlock(&rwlock);
}
-static struct bond_slave *
-bond_slave_from_bal_node(struct ovs_list *bal) OVS_REQ_RDLOCK(rwlock)
+static struct bond_member *
+bond_member_from_bal_node(struct ovs_list *bal) OVS_REQ_RDLOCK(rwlock)
{
- return CONTAINER_OF(bal, struct bond_slave, bal_node);
+ return CONTAINER_OF(bal, struct bond_member, bal_node);
}
static void
@@ -1051,24 +1116,24 @@ log_bals(struct bond *bond, const struct ovs_list *bals)
{
if (VLOG_IS_DBG_ENABLED()) {
struct ds ds = DS_EMPTY_INITIALIZER;
- const struct bond_slave *slave;
+ const struct bond_member *member;
- LIST_FOR_EACH (slave, bal_node, bals) {
+ LIST_FOR_EACH (member, bal_node, bals) {
if (ds.length) {
ds_put_char(&ds, ',');
}
ds_put_format(&ds, " %s %"PRIu64"kB",
- slave->name, slave->tx_bytes / 1024);
+ member->name, member->tx_bytes / 1024);
- if (!slave->enabled) {
+ if (!member->enabled) {
ds_put_cstr(&ds, " (disabled)");
}
- if (!ovs_list_is_empty(&slave->entries)) {
+ if (!ovs_list_is_empty(&member->entries)) {
struct bond_entry *e;
ds_put_cstr(&ds, " (");
- LIST_FOR_EACH (e, list_node, &slave->entries) {
- if (&e->list_node != ovs_list_front(&slave->entries)) {
+ LIST_FOR_EACH (e, list_node, &member->entries) {
+ if (&e->list_node != ovs_list_front(&member->entries)) {
ds_put_cstr(&ds, " + ");
}
ds_put_format(&ds, "h%"PRIdPTR": %"PRIu64"kB",
@@ -1082,12 +1147,12 @@ log_bals(struct bond *bond, const struct ovs_list *bals)
}
}
-/* Shifts 'hash' from its current slave to 'to'. */
+/* Shifts 'hash' from its current member to 'to'. */
static void
-bond_shift_load(struct bond_entry *hash, struct bond_slave *to)
+bond_shift_load(struct bond_entry *hash, struct bond_member *to)
OVS_REQ_WRLOCK(rwlock)
{
- struct bond_slave *from = hash->slave;
+ struct bond_member *from = hash->member;
struct bond *bond = from->bond;
uint64_t delta = hash->tx_bytes;
@@ -1104,19 +1169,19 @@ bond_shift_load(struct bond_entry *hash, struct bond_slave *to)
to->tx_bytes += delta;
/* Arrange for flows to be revalidated. */
- hash->slave = to;
+ hash->member = to;
bond->bond_revalidate = true;
}
/* Picks and returns a bond_entry to migrate from 'from' (the most heavily
- * loaded bond slave) to a bond slave that has 'to_tx_bytes' bytes of load,
- * given that doing so must decrease the ratio of the load on the two slaves by
- * at least 0.1. Returns NULL if there is no appropriate entry.
+ * loaded bond member) to a bond member that has 'to_tx_bytes' bytes of load,
+ * given that doing so must decrease the ratio of the load on the two members
+ * by at least 0.1. Returns NULL if there is no appropriate entry.
*
* The list of entries isn't sorted. I don't know of a reason to prefer to
* shift away small hashes or large hashes. */
static struct bond_entry *
-choose_entry_to_migrate(const struct bond_slave *from, uint64_t to_tx_bytes)
+choose_entry_to_migrate(const struct bond_member *from, uint64_t to_tx_bytes)
OVS_REQ_WRLOCK(rwlock)
{
struct bond_entry *e;
@@ -1153,28 +1218,28 @@ choose_entry_to_migrate(const struct bond_slave *from, uint64_t to_tx_bytes)
return NULL;
}
-/* Inserts 'slave' into 'bals' so that descending order of 'tx_bytes' is
+/* Inserts 'member' into 'bals' so that descending order of 'tx_bytes' is
* maintained. */
static void
-insert_bal(struct ovs_list *bals, struct bond_slave *slave)
+insert_bal(struct ovs_list *bals, struct bond_member *member)
{
- struct bond_slave *pos;
+ struct bond_member *pos;
LIST_FOR_EACH (pos, bal_node, bals) {
- if (slave->tx_bytes > pos->tx_bytes) {
+ if (member->tx_bytes > pos->tx_bytes) {
break;
}
}
- ovs_list_insert(&pos->bal_node, &slave->bal_node);
+ ovs_list_insert(&pos->bal_node, &member->bal_node);
}
-/* Removes 'slave' from its current list and then inserts it into 'bals' so
+/* Removes 'member' from its current list and then inserts it into 'bals' so
* that descending order of 'tx_bytes' is maintained. */
static void
-reinsert_bal(struct ovs_list *bals, struct bond_slave *slave)
+reinsert_bal(struct ovs_list *bals, struct bond_member *member)
{
- ovs_list_remove(&slave->bal_node);
- insert_bal(bals, slave);
+ ovs_list_remove(&member->bal_node);
+ insert_bal(bals, member);
}
/* If 'bond' needs rebalancing, does so.
@@ -1186,7 +1251,7 @@ reinsert_bal(struct ovs_list *bals, struct bond_slave *slave)
void
bond_rebalance(struct bond *bond)
{
- struct bond_slave *slave;
+ struct bond_member *member;
struct bond_entry *e;
struct ovs_list bals;
bool rebalanced = false;
@@ -1205,41 +1270,43 @@ bond_rebalance(struct bond *bond)
bond_recirculation_account(bond);
}
- /* Add each bond_entry to its slave's 'entries' list.
- * Compute each slave's tx_bytes as the sum of its entries' tx_bytes. */
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- slave->tx_bytes = 0;
- ovs_list_init(&slave->entries);
+ /* Add each bond_entry to its member's 'entries' list.
+ * Compute each member's tx_bytes as the sum of its entries' tx_bytes. */
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ member->tx_bytes = 0;
+ ovs_list_init(&member->entries);
}
for (e = &bond->hash[0]; e <= &bond->hash[BOND_MASK]; e++) {
- if (e->slave && e->tx_bytes) {
- e->slave->tx_bytes += e->tx_bytes;
- ovs_list_push_back(&e->slave->entries, &e->list_node);
+ if (e->member && e->tx_bytes) {
+ e->member->tx_bytes += e->tx_bytes;
+ ovs_list_push_back(&e->member->entries, &e->list_node);
}
}
- /* Add enabled slaves to 'bals' in descending order of tx_bytes.
+ /* Add enabled members to 'bals' in descending order of tx_bytes.
*
- * XXX This is O(n**2) in the number of slaves but it could be O(n lg n)
+ * XXX This is O(n**2) in the number of members but it could be O(n lg n)
* with a proper list sort algorithm. */
ovs_list_init(&bals);
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- if (slave->enabled) {
- insert_bal(&bals, slave);
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (member->enabled) {
+ insert_bal(&bals, member);
}
}
log_bals(bond, &bals);
- /* Shift load from the most-loaded slaves to the least-loaded slaves. */
+ /* Shift load from the most-loaded members to the least-loaded members. */
while (!ovs_list_is_short(&bals)) {
- struct bond_slave *from = bond_slave_from_bal_node(ovs_list_front(&bals));
- struct bond_slave *to = bond_slave_from_bal_node(ovs_list_back(&bals));
+ struct bond_member *from
+ = bond_member_from_bal_node(ovs_list_front(&bals));
+ struct bond_member *to
+ = bond_member_from_bal_node(ovs_list_back(&bals));
uint64_t overload;
overload = from->tx_bytes - to->tx_bytes;
if (overload < to->tx_bytes >> 5 || overload < 100000) {
- /* The extra load on 'from' (and all less-loaded slaves), compared
- * to that of 'to' (the least-loaded slave), is less than ~3%, or
+ /* The extra load on 'from' (and all less-loaded members), compared
+ * to that of 'to' (the least-loaded member), is less than ~3%, or
* it is less than ~1Mbps. No point in rebalancing. */
break;
}
@@ -1253,7 +1320,7 @@ bond_rebalance(struct bond *bond)
/* Delete element from from->entries.
*
* We don't add the element to to->hashes. That would only allow
- * 'e' to be migrated to another slave in this rebalancing run, and
+ * 'e' to be migrated to another member in this rebalancing run, and
* there is no point in doing that. */
ovs_list_remove(&e->list_node);
@@ -1299,14 +1366,14 @@ bond_find(const char *name) OVS_REQ_RDLOCK(rwlock)
return NULL;
}
-static struct bond_slave *
-bond_lookup_slave(struct bond *bond, const char *slave_name)
+static struct bond_member *
+bond_lookup_member(struct bond *bond, const char *member_name)
{
- struct bond_slave *slave;
+ struct bond_member *member;
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- if (!strcmp(slave->name, slave_name)) {
- return slave;
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (!strcmp(member->name, member_name)) {
+ return member;
}
}
return NULL;
@@ -1320,22 +1387,22 @@ bond_unixctl_list(struct unixctl_conn *conn,
struct ds ds = DS_EMPTY_INITIALIZER;
const struct bond *bond;
- ds_put_cstr(&ds, "bond\ttype\trecircID\tslaves\n");
+ ds_put_cstr(&ds, "bond\ttype\trecircID\tmembers\n");
ovs_rwlock_rdlock(&rwlock);
HMAP_FOR_EACH (bond, hmap_node, all_bonds) {
- const struct bond_slave *slave;
+ const struct bond_member *member;
size_t i;
ds_put_format(&ds, "%s\t%s\t%d\t", bond->name,
bond_mode_to_string(bond->balance), bond->recirc_id);
i = 0;
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
if (i++ > 0) {
ds_put_cstr(&ds, ", ");
}
- ds_put_cstr(&ds, slave->name);
+ ds_put_cstr(&ds, member->name);
}
ds_put_char(&ds, '\n');
}
@@ -1348,9 +1415,10 @@ static void
bond_print_details(struct ds *ds, const struct bond *bond)
OVS_REQ_RDLOCK(rwlock)
{
- struct shash slave_shash = SHASH_INITIALIZER(&slave_shash);
- const struct shash_node **sorted_slaves = NULL;
- const struct bond_slave *slave;
+ struct shash member_shash = SHASH_INITIALIZER(&member_shash);
+ const struct shash_node **sorted_members = NULL;
+ const struct bond_member *member;
+ bool use_lb_output_action;
bool may_recirc;
uint32_t recirc_id;
int i;
@@ -1366,6 +1434,11 @@ bond_print_details(struct ds *ds, const struct bond *bond)
ds_put_format(ds, "bond-hash-basis: %"PRIu32"\n", bond->basis);
+ use_lb_output_action = bond_use_lb_output_action(bond);
+ ds_put_format(ds, "lb_output action: %s, bond-id: %d\n",
+ use_lb_output_action ? "enabled" : "disabled",
+ use_lb_output_action ? recirc_id : -1);
+
ds_put_format(ds, "updelay: %d ms\n", bond->updelay);
ds_put_format(ds, "downdelay: %d ms\n", bond->downdelay);
@@ -1393,35 +1466,44 @@ bond_print_details(struct ds *ds, const struct bond *bond)
ds_put_format(ds, "lacp_fallback_ab: %s\n",
bond->lacp_fallback_ab ? "true" : "false");
- ds_put_cstr(ds, "active slave mac: ");
- ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_slave_mac));
- slave = bond_find_slave_by_mac(bond, bond->active_slave_mac);
- ds_put_format(ds,"(%s)\n", slave ? slave->name : "none");
-
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- shash_add(&slave_shash, slave->name, slave);
+ bool found_primary = false;
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (member->is_primary) {
+ found_primary = true;
+ }
+ shash_add(&member_shash, member->name, member);
}
- sorted_slaves = shash_sort(&slave_shash);
- for (i = 0; i < shash_count(&slave_shash); i++) {
+ ds_put_format(ds, "active-backup primary: %s%s\n",
+ bond->primary ? bond->primary : "",
+ (!found_primary && bond->primary)
+ ? " (no such member)" : "");
+
+ member = bond_find_member_by_mac(bond, bond->active_member_mac);
+ ds_put_cstr(ds, "active member mac: ");
+ ds_put_format(ds, ETH_ADDR_FMT, ETH_ADDR_ARGS(bond->active_member_mac));
+ ds_put_format(ds, "(%s)\n", member ? member->name : "none");
+
+ sorted_members = shash_sort(&member_shash);
+ for (i = 0; i < shash_count(&member_shash); i++) {
struct bond_entry *be;
- slave = sorted_slaves[i]->data;
+ member = sorted_members[i]->data;
/* Basic info. */
- ds_put_format(ds, "\nslave %s: %s\n",
- slave->name, slave->enabled ? "enabled" : "disabled");
- if (slave == bond->active_slave) {
- ds_put_cstr(ds, " active slave\n");
+ ds_put_format(ds, "\nmember %s: %s\n",
+ member->name, member->enabled ? "enabled" : "disabled");
+ if (member == bond->active_member) {
+ ds_put_cstr(ds, " active member\n");
}
- if (slave->delay_expires != LLONG_MAX) {
+ if (member->delay_expires != LLONG_MAX) {
ds_put_format(ds, " %s expires in %lld ms\n",
- slave->enabled ? "downdelay" : "updelay",
- slave->delay_expires - time_msec());
+ member->enabled ? "downdelay" : "updelay",
+ member->delay_expires - time_msec());
}
ds_put_format(ds, " may_enable: %s\n",
- slave->may_enable ? "true" : "false");
+ member->may_enable ? "true" : "false");
if (!bond_is_balanced(bond)) {
continue;
@@ -1432,7 +1514,7 @@ bond_print_details(struct ds *ds, const struct bond *bond)
int hash = be - bond->hash;
uint64_t be_tx_k;
- if (be->slave != slave) {
+ if (be->member != member) {
continue;
}
@@ -1445,8 +1527,8 @@ bond_print_details(struct ds *ds, const struct bond *bond)
/* XXX How can we list the MACs assigned to hashes of SLB bonds? */
}
}
- shash_destroy(&slave_shash);
- free(sorted_slaves);
+ shash_destroy(&member_shash);
+ free(sorted_members);
ds_put_cstr(ds, "\n");
}
@@ -1488,9 +1570,9 @@ bond_unixctl_migrate(struct unixctl_conn *conn,
{
const char *bond_s = argv[1];
const char *hash_s = argv[2];
- const char *slave_s = argv[3];
+ const char *member_s = argv[3];
struct bond *bond;
- struct bond_slave *slave;
+ struct bond_member *member;
struct bond_entry *entry;
int hash;
@@ -1513,20 +1595,21 @@ bond_unixctl_migrate(struct unixctl_conn *conn,
goto out;
}
- slave = bond_lookup_slave(bond, slave_s);
- if (!slave) {
- unixctl_command_reply_error(conn, "no such slave");
+ member = bond_lookup_member(bond, member_s);
+ if (!member) {
+ unixctl_command_reply_error(conn, "no such member");
goto out;
}
- if (!slave->enabled) {
- unixctl_command_reply_error(conn, "cannot migrate to disabled slave");
+ if (!member->enabled) {
+ unixctl_command_reply_error(conn,
+ "cannot migrate to disabled member");
goto out;
}
entry = &bond->hash[hash];
bond->bond_revalidate = true;
- entry->slave = slave;
+ entry->member = member;
unixctl_command_reply(conn, "migrated");
out:
@@ -1534,14 +1617,14 @@ out:
}
static void
-bond_unixctl_set_active_slave(struct unixctl_conn *conn,
- int argc OVS_UNUSED, const char *argv[],
- void *aux OVS_UNUSED)
+bond_unixctl_set_active_member(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[],
+ void *aux OVS_UNUSED)
{
const char *bond_s = argv[1];
- const char *slave_s = argv[2];
+ const char *member_s = argv[2];
struct bond *bond;
- struct bond_slave *slave;
+ struct bond_member *member;
ovs_rwlock_wrlock(&rwlock);
bond = bond_find(bond_s);
@@ -1550,25 +1633,26 @@ bond_unixctl_set_active_slave(struct unixctl_conn *conn,
goto out;
}
- slave = bond_lookup_slave(bond, slave_s);
- if (!slave) {
- unixctl_command_reply_error(conn, "no such slave");
+ member = bond_lookup_member(bond, member_s);
+ if (!member) {
+ unixctl_command_reply_error(conn, "no such member");
goto out;
}
- if (!slave->enabled) {
- unixctl_command_reply_error(conn, "cannot make disabled slave active");
+ if (!member->enabled) {
+ unixctl_command_reply_error(conn,
+ "cannot make disabled member active");
goto out;
}
- if (bond->active_slave != slave) {
+ if (bond->active_member != member) {
bond->bond_revalidate = true;
- bond->active_slave = slave;
- VLOG_INFO("bond %s: active interface is now %s",
- bond->name, slave->name);
+ bond->active_member = member;
+ VLOG_INFO("bond %s: active member is now %s",
+ bond->name, member->name);
bond->send_learning_packets = true;
unixctl_command_reply(conn, "done");
- bond_active_slave_changed(bond);
+ bond_active_member_changed(bond);
} else {
unixctl_command_reply(conn, "no change");
}
@@ -1577,12 +1661,12 @@ out:
}
static void
-enable_slave(struct unixctl_conn *conn, const char *argv[], bool enable)
+enable_member(struct unixctl_conn *conn, const char *argv[], bool enable)
{
const char *bond_s = argv[1];
- const char *slave_s = argv[2];
+ const char *member_s = argv[2];
struct bond *bond;
- struct bond_slave *slave;
+ struct bond_member *member;
ovs_rwlock_wrlock(&rwlock);
bond = bond_find(bond_s);
@@ -1591,13 +1675,13 @@ enable_slave(struct unixctl_conn *conn, const char *argv[], bool enable)
goto out;
}
- slave = bond_lookup_slave(bond, slave_s);
- if (!slave) {
- unixctl_command_reply_error(conn, "no such slave");
+ member = bond_lookup_member(bond, member_s);
+ if (!member) {
+ unixctl_command_reply_error(conn, "no such member");
goto out;
}
- bond_enable_slave(slave, enable);
+ bond_enable_member(member, enable);
unixctl_command_reply(conn, enable ? "enabled" : "disabled");
out:
@@ -1605,19 +1689,19 @@ out:
}
static void
-bond_unixctl_enable_slave(struct unixctl_conn *conn,
- int argc OVS_UNUSED, const char *argv[],
- void *aux OVS_UNUSED)
+bond_unixctl_enable_member(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[],
+ void *aux OVS_UNUSED)
{
- enable_slave(conn, argv, true);
+ enable_member(conn, argv, true);
}
static void
-bond_unixctl_disable_slave(struct unixctl_conn *conn,
- int argc OVS_UNUSED, const char *argv[],
- void *aux OVS_UNUSED)
+bond_unixctl_disable_member(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[],
+ void *aux OVS_UNUSED)
{
- enable_slave(conn, argv, false);
+ enable_member(conn, argv, false);
}
static void
@@ -1668,16 +1752,24 @@ bond_init(void)
unixctl_command_register("bond/list", "", 0, 0, bond_unixctl_list, NULL);
unixctl_command_register("bond/show", "[port]", 0, 1, bond_unixctl_show,
NULL);
- unixctl_command_register("bond/migrate", "port hash slave", 3, 3,
+ unixctl_command_register("bond/migrate", "port hash member", 3, 3,
bond_unixctl_migrate, NULL);
- unixctl_command_register("bond/set-active-slave", "port slave", 2, 2,
- bond_unixctl_set_active_slave, NULL);
- unixctl_command_register("bond/enable-slave", "port slave", 2, 2,
- bond_unixctl_enable_slave, NULL);
- unixctl_command_register("bond/disable-slave", "port slave", 2, 2,
- bond_unixctl_disable_slave, NULL);
+ unixctl_command_register("bond/set-active-member", "port member", 2, 2,
+ bond_unixctl_set_active_member, NULL);
+ unixctl_command_register("bond/enable-member", "port member", 2, 2,
+ bond_unixctl_enable_member, NULL);
+ unixctl_command_register("bond/disable-member", "port member", 2, 2,
+ bond_unixctl_disable_member, NULL);
unixctl_command_register("bond/hash", "mac [vlan] [basis]", 1, 3,
bond_unixctl_hash, NULL);
+
+ /* Backward-compatibility command names. */
+ unixctl_command_register("bond/set-active-slave", NULL, 2, 2,
+ bond_unixctl_set_active_member, NULL);
+ unixctl_command_register("bond/enable-slave", NULL, 2, 2,
+ bond_unixctl_enable_member, NULL);
+ unixctl_command_register("bond/disable-slave", NULL, 2, 2,
+ bond_unixctl_disable_member, NULL);
}
static void
@@ -1700,15 +1792,15 @@ bond_entry_reset(struct bond *bond)
}
}
-static struct bond_slave *
-bond_slave_lookup(struct bond *bond, const void *slave_)
+static struct bond_member *
+bond_member_lookup(struct bond *bond, const void *member_)
{
- struct bond_slave *slave;
+ struct bond_member *member;
- HMAP_FOR_EACH_IN_BUCKET (slave, hmap_node, hash_pointer(slave_, 0),
- &bond->slaves) {
- if (slave->aux == slave_) {
- return slave;
+ HMAP_FOR_EACH_IN_BUCKET (member, hmap_node, hash_pointer(member_, 0),
+ &bond->members) {
+ if (member->aux == member_) {
+ return member;
}
}
@@ -1716,51 +1808,51 @@ bond_slave_lookup(struct bond *bond, const void *slave_)
}
static void
-bond_enable_slave(struct bond_slave *slave, bool enable)
+bond_enable_member(struct bond_member *member, bool enable)
{
- struct bond *bond = slave->bond;
+ struct bond *bond = member->bond;
- slave->delay_expires = LLONG_MAX;
- if (enable != slave->enabled) {
- slave->bond->bond_revalidate = true;
- slave->enabled = enable;
+ member->delay_expires = LLONG_MAX;
+ if (enable != member->enabled) {
+ member->bond->bond_revalidate = true;
+ member->enabled = enable;
- ovs_mutex_lock(&slave->bond->mutex);
+ ovs_mutex_lock(&member->bond->mutex);
if (enable) {
- ovs_list_insert(&slave->bond->enabled_slaves, &slave->list_node);
+ ovs_list_insert(&member->bond->enabled_members, &member->list_node);
} else {
bond->send_learning_packets = true;
- ovs_list_remove(&slave->list_node);
+ ovs_list_remove(&member->list_node);
}
- ovs_mutex_unlock(&slave->bond->mutex);
+ ovs_mutex_unlock(&member->bond->mutex);
- VLOG_INFO("interface %s: %s", slave->name,
- slave->enabled ? "enabled" : "disabled");
+ VLOG_INFO("member %s: %s", member->name,
+ member->enabled ? "enabled" : "disabled");
}
}
static void
-bond_link_status_update(struct bond_slave *slave)
+bond_link_status_update(struct bond_member *member)
{
- struct bond *bond = slave->bond;
+ struct bond *bond = member->bond;
bool up;
- up = netdev_get_carrier(slave->netdev) && slave->may_enable;
- if ((up == slave->enabled) != (slave->delay_expires == LLONG_MAX)) {
+ up = netdev_get_carrier(member->netdev) && member->may_enable;
+ if ((up == member->enabled) != (member->delay_expires == LLONG_MAX)) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
- VLOG_INFO_RL(&rl, "interface %s: link state %s",
- slave->name, up ? "up" : "down");
- if (up == slave->enabled) {
- slave->delay_expires = LLONG_MAX;
- VLOG_INFO_RL(&rl, "interface %s: will not be %s",
- slave->name, up ? "disabled" : "enabled");
+ VLOG_INFO_RL(&rl, "member %s: link state %s",
+ member->name, up ? "up" : "down");
+ if (up == member->enabled) {
+ member->delay_expires = LLONG_MAX;
+ VLOG_INFO_RL(&rl, "member %s: will not be %s",
+ member->name, up ? "disabled" : "enabled");
} else {
int delay = up ? bond->updelay : bond->downdelay;
- slave->delay_expires = time_msec() + delay;
+ member->delay_expires = time_msec() + delay;
if (delay) {
- VLOG_INFO_RL(&rl, "interface %s: will be %s if it stays %s "
+ VLOG_INFO_RL(&rl, "member %s: will be %s if it stays %s "
"for %d ms",
- slave->name,
+ member->name,
up ? "enabled" : "disabled",
up ? "up" : "down",
delay);
@@ -1768,8 +1860,8 @@ bond_link_status_update(struct bond_slave *slave)
}
}
- if (time_msec() >= slave->delay_expires) {
- bond_enable_slave(slave, up);
+ if (time_msec() >= member->delay_expires) {
+ bond_enable_member(member, up);
}
}
@@ -1790,29 +1882,29 @@ lookup_bond_entry(const struct bond *bond, const struct flow *flow,
return &bond->hash[bond_hash(bond, flow, vlan) & BOND_MASK];
}
-/* Selects and returns an enabled slave from the 'enabled_slaves' list
- * in a round-robin fashion. If the 'enabled_slaves' list is empty,
+/* Selects and returns an enabled member from the 'enabled_members' list
+ * in a round-robin fashion. If the 'enabled_members' list is empty,
* returns NULL. */
-static struct bond_slave *
-get_enabled_slave(struct bond *bond)
+static struct bond_member *
+get_enabled_member(struct bond *bond)
{
struct ovs_list *node;
ovs_mutex_lock(&bond->mutex);
- if (ovs_list_is_empty(&bond->enabled_slaves)) {
+ if (ovs_list_is_empty(&bond->enabled_members)) {
ovs_mutex_unlock(&bond->mutex);
return NULL;
}
- node = ovs_list_pop_front(&bond->enabled_slaves);
- ovs_list_push_back(&bond->enabled_slaves, node);
+ node = ovs_list_pop_front(&bond->enabled_members);
+ ovs_list_push_back(&bond->enabled_members, node);
ovs_mutex_unlock(&bond->mutex);
- return CONTAINER_OF(node, struct bond_slave, list_node);
+ return CONTAINER_OF(node, struct bond_member, list_node);
}
-static struct bond_slave *
-choose_output_slave(const struct bond *bond, const struct flow *flow,
+static struct bond_member *
+choose_output_member(const struct bond *bond, const struct flow *flow,
struct flow_wildcards *wc, uint16_t vlan)
{
struct bond_entry *e;
@@ -1831,7 +1923,7 @@ choose_output_slave(const struct bond *bond, const struct flow *flow,
switch (balance) {
case BM_AB:
- return bond->active_slave;
+ return bond->active_member;
case BM_TCP:
if (bond->lacp_status != LACP_NEGOTIATED) {
@@ -1847,83 +1939,90 @@ choose_output_slave(const struct bond *bond, const struct flow *flow,
flow_mask_hash_fields(flow, wc, NX_HASH_FIELDS_ETH_SRC);
}
e = lookup_bond_entry(bond, flow, vlan);
- if (!e->slave || !e->slave->enabled) {
- e->slave = get_enabled_slave(CONST_CAST(struct bond*, bond));
+ if (!e->member || !e->member->enabled) {
+ e->member = get_enabled_member(CONST_CAST(struct bond *, bond));
}
- return e->slave;
+ return e->member;
default:
OVS_NOT_REACHED();
}
}
-static struct bond_slave *
-bond_choose_slave(const struct bond *bond)
+static struct bond_member *
+bond_choose_member(const struct bond *bond)
{
- struct bond_slave *slave, *best;
+ struct bond_member *member, *best;
+
+ /* If there's a primary and it's active, return that. */
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (member->is_primary && member->enabled) {
+ return member;
+ }
+ }
- /* Find the last active slave. */
- slave = bond_find_slave_by_mac(bond, bond->active_slave_mac);
- if (slave && slave->enabled) {
- return slave;
+ /* Find the last active member. */
+ member = bond_find_member_by_mac(bond, bond->active_member_mac);
+ if (member && member->enabled) {
+ return member;
}
- /* Find an enabled slave. */
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- if (slave->enabled) {
- return slave;
+ /* Find an enabled member. */
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (member->enabled) {
+ return member;
}
}
- /* All interfaces are disabled. Find an interface that will be enabled
+ /* All members are disabled. Find an member that will be enabled
* after its updelay expires. */
best = NULL;
- HMAP_FOR_EACH (slave, hmap_node, &bond->slaves) {
- if (slave->delay_expires != LLONG_MAX
- && slave->may_enable
- && (!best || slave->delay_expires < best->delay_expires)) {
- best = slave;
+ HMAP_FOR_EACH (member, hmap_node, &bond->members) {
+ if (member->delay_expires != LLONG_MAX
+ && member->may_enable
+ && (!best || member->delay_expires < best->delay_expires)) {
+ best = member;
}
}
return best;
}
static void
-bond_choose_active_slave(struct bond *bond)
+bond_choose_active_member(struct bond *bond)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
- struct bond_slave *old_active_slave = bond->active_slave;
+ struct bond_member *old_active_member = bond->active_member;
- bond->active_slave = bond_choose_slave(bond);
- if (bond->active_slave) {
- if (bond->active_slave->enabled) {
- VLOG_INFO_RL(&rl, "bond %s: active interface is now %s",
- bond->name, bond->active_slave->name);
+ bond->active_member = bond_choose_member(bond);
+ if (bond->active_member) {
+ if (bond->active_member->enabled) {
+ VLOG_INFO_RL(&rl, "bond %s: active member is now %s",
+ bond->name, bond->active_member->name);
} else {
- VLOG_INFO_RL(&rl, "bond %s: active interface is now %s, skipping "
- "remaining %lld ms updelay (since no interface was "
- "enabled)", bond->name, bond->active_slave->name,
- bond->active_slave->delay_expires - time_msec());
- bond_enable_slave(bond->active_slave, true);
+ VLOG_INFO_RL(&rl, "bond %s: active member is now %s, skipping "
+ "remaining %lld ms updelay (since no member was "
+ "enabled)", bond->name, bond->active_member->name,
+ bond->active_member->delay_expires - time_msec());
+ bond_enable_member(bond->active_member, true);
}
bond->send_learning_packets = true;
- if (bond->active_slave != old_active_slave) {
- bond_active_slave_changed(bond);
+ if (bond->active_member != old_active_member) {
+ bond_active_member_changed(bond);
}
- } else if (old_active_slave) {
- bond_active_slave_changed(bond);
- VLOG_INFO_RL(&rl, "bond %s: all interfaces disabled", bond->name);
+ } else if (old_active_member) {
+ bond_active_member_changed(bond);
+ VLOG_INFO_RL(&rl, "bond %s: all members disabled", bond->name);
}
}
/*
- * Return true if bond has unstored active slave change.
- * If return true, 'mac' will store the bond's current active slave's
+ * Return true if bond has unstored active member change.
+ * If return true, 'mac' will store the bond's current active member's
* MAC address. */
bool
-bond_get_changed_active_slave(const char *name, struct eth_addr *mac,
+bond_get_changed_active_member(const char *name, struct eth_addr *mac,
bool force)
{
struct bond *bond;
@@ -1931,9 +2030,9 @@ bond_get_changed_active_slave(const char *name, struct eth_addr *mac,
ovs_rwlock_wrlock(&rwlock);
bond = bond_find(name);
if (bond) {
- if (bond->active_slave_changed || force) {
- *mac = bond->active_slave_mac;
- bond->active_slave_changed = false;
+ if (bond->active_member_changed || force) {
+ *mac = bond->active_member_mac;
+ bond->active_member_changed = false;
ovs_rwlock_unlock(&rwlock);
return true;
}
@@ -1942,3 +2041,34 @@ bond_get_changed_active_slave(const char *name, struct eth_addr *mac,
return false;
}
+
+bool
+bond_use_lb_output_action(const struct bond *bond)
+{
+ return bond_may_recirc(bond) && bond->use_lb_output_action;
+}
+
+static void
+bond_add_lb_output_buckets(const struct bond *bond)
+{
+ ofp_port_t member_map[BOND_BUCKETS];
+
+ for (int i = 0; i < BOND_BUCKETS; i++) {
+ struct bond_member *member = bond->hash[i].member;
+
+ if (member) {
+ member_map[i] = member->ofp_port;
+ } else {
+ member_map[i] = OFPP_NONE;
+ }
+ }
+ ofproto_dpif_add_lb_output_buckets(bond->ofproto, bond->recirc_id,
+ member_map);
+}
+
+static void
+bond_del_lb_output_buckets(const struct bond *bond)
+{
+ ofproto_dpif_delete_lb_output_buckets(bond->ofproto,
+ bond->recirc_id);
+}
diff --git a/ofproto/bond.h b/ofproto/bond.h
index e7c3d9bc35dd1aca2e79d3b29fc325af3a6d5c00..1683ec87811333a69cd972a21e684e7374999305 100644
--- a/ofproto/bond.h
+++ b/ofproto/bond.h
@@ -28,7 +28,7 @@ struct ofpbuf;
struct ofproto_dpif;
enum lacp_status;
-/* How flows are balanced among bond slaves. */
+/* How flows are balanced among bond member interfaces. */
enum bond_mode {
BM_TCP, /* Transport Layer Load Balance. */
BM_SLB, /* Source Load Balance. */
@@ -48,16 +48,20 @@ struct bond_settings {
int rebalance_interval; /* Milliseconds between rebalances.
Zero to disable rebalancing. */
+ const char *primary; /* For AB mode, primary interface name. */
+
/* Link status detection. */
- int up_delay; /* ms before enabling an up slave. */
- int down_delay; /* ms before disabling a down slave. */
+ int up_delay; /* ms before enabling an up member. */
+ int down_delay; /* ms before disabling a down member. */
bool lacp_fallback_ab_cfg; /* Fallback to active-backup on LACP failure. */
- struct eth_addr active_slave_mac;
+ struct eth_addr active_member_mac;
/* The MAC address of the interface
that was active during the last
ovs run. */
+ bool use_lb_output_action; /* Use lb_output action. Only applicable for
+ bond mode BALANCE TCP. */
};
/* Program startup. */
@@ -70,22 +74,23 @@ void bond_unref(struct bond *);
struct bond *bond_ref(const struct bond *);
bool bond_reconfigure(struct bond *, const struct bond_settings *);
-void bond_slave_register(struct bond *, void *slave_, ofp_port_t ofport, struct netdev *);
-void bond_slave_set_netdev(struct bond *, void *slave_, struct netdev *);
-void bond_slave_unregister(struct bond *, const void *slave);
+void bond_member_register(struct bond *, void *member_, ofp_port_t ofport,
+ struct netdev *);
+void bond_member_set_netdev(struct bond *, void *member_, struct netdev *);
+void bond_member_unregister(struct bond *, const void *member);
bool bond_run(struct bond *, enum lacp_status);
void bond_wait(struct bond *);
-void bond_slave_set_may_enable(struct bond *, void *slave_, bool may_enable);
+void bond_member_set_may_enable(struct bond *, void *member_, bool may_enable);
/* Special MAC learning support for SLB bonding. */
bool bond_should_send_learning_packets(struct bond *);
struct dp_packet *bond_compose_learning_packet(struct bond *,
const struct eth_addr eth_src,
uint16_t vlan, void **port_aux);
-bool bond_get_changed_active_slave(const char *name, struct eth_addr *mac,
- bool force);
+bool bond_get_changed_active_member(const char *name, struct eth_addr *mac,
+ bool force);
/* Packet processing. */
enum bond_verdict {
@@ -93,10 +98,10 @@ enum bond_verdict {
BV_DROP, /* Drop this packet. */
BV_DROP_IF_MOVED /* Drop if we've learned a different port. */
};
-enum bond_verdict bond_check_admissibility(struct bond *, const void *slave_,
+enum bond_verdict bond_check_admissibility(struct bond *, const void *member_,
const struct eth_addr dst);
-void *bond_choose_output_slave(struct bond *, const struct flow *,
- struct flow_wildcards *, uint16_t vlan);
+void *bond_choose_output_member(struct bond *, const struct flow *,
+ struct flow_wildcards *, uint16_t vlan);
/* Rebalancing. */
void bond_account(struct bond *, const struct flow *, uint16_t vlan,
@@ -115,11 +120,14 @@ void bond_rebalance(struct bond *);
*
* On handling first output packet, 256 post recirculation flows are installed:
*
- * recirc_id=, dp_hash=<[0..255]>/0xff, actions: output
+ * recirc_id=, dp_hash=<[0..255]>/0xff, actions: output
*
* Bond module pulls stats from those post recirculation rules. If rebalancing
* is needed, those rules are updated with new output actions.
*/
void bond_update_post_recirc_rules(struct bond *, uint32_t *recirc_id,
uint32_t *hash_basis);
+
+bool bond_use_lb_output_action(const struct bond *bond);
+
#endif /* bond.h */
diff --git a/ofproto/connmgr.c b/ofproto/connmgr.c
index 51d656cba9605accfc59e4936203b1f507a9c760..9c5c633b4171571deccb2303dd9890a6210bc02b 100644
--- a/ofproto/connmgr.c
+++ b/ofproto/connmgr.c
@@ -190,8 +190,8 @@ struct ofservice {
static void ofservice_run(struct ofservice *);
static void ofservice_wait(struct ofservice *);
-static void ofservice_reconfigure(struct ofservice *,
- const struct ofproto_controller *)
+static int ofservice_reconfigure(struct ofservice *,
+ const struct ofproto_controller *)
OVS_REQUIRES(ofproto_mutex);
static void ofservice_create(struct connmgr *mgr, const char *target,
const struct ofproto_controller *)
@@ -212,9 +212,9 @@ struct connmgr {
* traversals from other threads can be made safe by holding the
* ofproto_mutex.*/
struct ovs_list conns; /* All ofconns. */
- uint64_t master_election_id; /* monotonically increasing sequence number
- * for master election */
- bool master_election_id_defined;
+ uint64_t primary_election_id; /* monotonically increasing sequence number
+ * for primary election */
+ bool primary_election_id_defined;
/* OpenFlow connection establishment. */
struct hmap services; /* Contains "struct ofservice"s. */
@@ -253,8 +253,8 @@ connmgr_create(struct ofproto *ofproto,
mgr->local_port_name = xstrdup(local_port_name);
ovs_list_init(&mgr->conns);
- mgr->master_election_id = 0;
- mgr->master_election_id_defined = false;
+ mgr->primary_election_id = 0;
+ mgr->primary_election_id_defined = false;
hmap_init(&mgr->services);
mgr->snoops = NULL;
@@ -602,7 +602,15 @@ connmgr_set_controllers(struct connmgr *mgr, struct shash *controllers)
target);
ofservice_destroy(ofservice);
} else {
- ofservice_reconfigure(ofservice, c);
+ if (ofservice_reconfigure(ofservice, c)) {
+ char *target_to_restore = xstrdup(target);
+ VLOG_INFO("%s: Changes to controller \"%s\" "
+ "expects re-initialization: Re-initializing now.",
+ mgr->name, target);
+ ofservice_destroy(ofservice);
+ ofservice_create(mgr, target_to_restore, c);
+ free(target_to_restore);
+ }
}
}
@@ -765,11 +773,11 @@ snoop_preference(const struct ofservice *ofservice)
}
switch (ofconn->role) {
- case OFPCR12_ROLE_MASTER:
+ case OFPCR12_ROLE_PRIMARY:
return 3;
case OFPCR12_ROLE_EQUAL:
return 2;
- case OFPCR12_ROLE_SLAVE:
+ case OFPCR12_ROLE_SECONDARY:
return 1;
case OFPCR12_ROLE_NOCHANGE:
default:
@@ -810,33 +818,33 @@ ofconn_get_type(const struct ofconn *ofconn)
return ofconn->type;
}
-/* If a master election id is defined, stores it into '*idp' and returns
+/* If a primary election id is defined, stores it into '*idp' and returns
* true. Otherwise, stores UINT64_MAX into '*idp' and returns false. */
bool
-ofconn_get_master_election_id(const struct ofconn *ofconn, uint64_t *idp)
+ofconn_get_primary_election_id(const struct ofconn *ofconn, uint64_t *idp)
{
- *idp = (ofconn->connmgr->master_election_id_defined
- ? ofconn->connmgr->master_election_id
+ *idp = (ofconn->connmgr->primary_election_id_defined
+ ? ofconn->connmgr->primary_election_id
: UINT64_MAX);
- return ofconn->connmgr->master_election_id_defined;
+ return ofconn->connmgr->primary_election_id_defined;
}
-/* Sets the master election id.
+/* Sets the primary election id.
*
* Returns true if successful, false if the id is stale
*/
bool
-ofconn_set_master_election_id(struct ofconn *ofconn, uint64_t id)
+ofconn_set_primary_election_id(struct ofconn *ofconn, uint64_t id)
{
- if (ofconn->connmgr->master_election_id_defined
+ if (ofconn->connmgr->primary_election_id_defined
&&
/* Unsigned difference interpreted as a two's complement signed
* value */
- (int64_t)(id - ofconn->connmgr->master_election_id) < 0) {
+ (int64_t)(id - ofconn->connmgr->primary_election_id) < 0) {
return false;
}
- ofconn->connmgr->master_election_id = id;
- ofconn->connmgr->master_election_id_defined = true;
+ ofconn->connmgr->primary_election_id = id;
+ ofconn->connmgr->primary_election_id_defined = true;
return true;
}
@@ -856,7 +864,7 @@ ofconn_send_role_status(struct ofconn *ofconn, uint32_t role, uint8_t reason)
struct ofputil_role_status status;
status.reason = reason;
status.role = role;
- ofconn_get_master_election_id(ofconn, &status.generation_id);
+ ofconn_get_primary_election_id(ofconn, &status.generation_id);
struct ofpbuf *buf
= ofputil_encode_role_status(&status, ofconn_get_protocol(ofconn));
@@ -865,19 +873,19 @@ ofconn_send_role_status(struct ofconn *ofconn, uint32_t role, uint8_t reason)
}
}
-/* Changes 'ofconn''s role to 'role'. If 'role' is OFPCR12_ROLE_MASTER then
- * any existing master is demoted to a slave. */
+/* Changes 'ofconn''s role to 'role'. If 'role' is OFPCR12_ROLE_PRIMARY then
+ * any existing primary is demoted to a secondary. */
void
ofconn_set_role(struct ofconn *ofconn, enum ofp12_controller_role role)
{
- if (role != ofconn->role && role == OFPCR12_ROLE_MASTER) {
+ if (role != ofconn->role && role == OFPCR12_ROLE_PRIMARY) {
struct ofconn *other;
LIST_FOR_EACH (other, connmgr_node, &ofconn->connmgr->conns) {
- if (other->role == OFPCR12_ROLE_MASTER) {
- other->role = OFPCR12_ROLE_SLAVE;
- ofconn_send_role_status(other, OFPCR12_ROLE_SLAVE,
- OFPCRR_MASTER_REQUEST);
+ if (other->role == OFPCR12_ROLE_PRIMARY) {
+ other->role = OFPCR12_ROLE_SECONDARY;
+ ofconn_send_role_status(other, OFPCR12_ROLE_SECONDARY,
+ OFPCRR_PRIMARY_REQUEST);
}
}
}
@@ -890,9 +898,9 @@ ofconn_set_invalid_ttl_to_controller(struct ofconn *ofconn, bool enable)
struct ofputil_async_cfg ac = ofconn_get_async_config(ofconn);
uint32_t bit = 1u << OFPR_INVALID_TTL;
if (enable) {
- ac.master[OAM_PACKET_IN] |= bit;
+ ac.primary[OAM_PACKET_IN] |= bit;
} else {
- ac.master[OAM_PACKET_IN] &= ~bit;
+ ac.primary[OAM_PACKET_IN] &= ~bit;
}
ofconn_set_async_config(ofconn, &ac);
}
@@ -902,7 +910,7 @@ ofconn_get_invalid_ttl_to_controller(struct ofconn *ofconn)
{
struct ofputil_async_cfg ac = ofconn_get_async_config(ofconn);
uint32_t bit = 1u << OFPR_INVALID_TTL;
- return (ac.master[OAM_PACKET_IN] & bit) != 0;
+ return (ac.primary[OAM_PACKET_IN] & bit) != 0;
}
/* Returns the currently configured protocol for 'ofconn', one of OFPUTIL_P_*.
@@ -994,11 +1002,11 @@ ofconn_set_async_config(struct ofconn *ofconn,
if (ofputil_protocol_to_ofp_version(ofconn_get_protocol(ofconn))
< OFP14_VERSION) {
- if (ofconn->async_cfg->master[OAM_PACKET_IN] & (1u << OFPR_ACTION)) {
- ofconn->async_cfg->master[OAM_PACKET_IN] |= OFPR14_ACTION_BITS;
+ if (ofconn->async_cfg->primary[OAM_PACKET_IN] & (1u << OFPR_ACTION)) {
+ ofconn->async_cfg->primary[OAM_PACKET_IN] |= OFPR14_ACTION_BITS;
}
- if (ofconn->async_cfg->slave[OAM_PACKET_IN] & (1u << OFPR_ACTION)) {
- ofconn->async_cfg->slave[OAM_PACKET_IN] |= OFPR14_ACTION_BITS;
+ if (ofconn->async_cfg->secondary[OAM_PACKET_IN] & (1u << OFPR_ACTION)) {
+ ofconn->async_cfg->secondary[OAM_PACKET_IN] |= OFPR14_ACTION_BITS;
}
}
}
@@ -1433,9 +1441,9 @@ ofconn_receives_async_msg(const struct ofconn *ofconn,
}
struct ofputil_async_cfg ac = ofconn_get_async_config(ofconn);
- uint32_t *masks = (ofconn->role == OFPCR12_ROLE_SLAVE
- ? ac.slave
- : ac.master);
+ uint32_t *masks = (ofconn->role == OFPCR12_ROLE_SECONDARY
+ ? ac.secondary
+ : ac.primary);
return (masks[type] & (1u << reason)) != 0;
}
@@ -2011,16 +2019,15 @@ ofservice_wait(struct ofservice *ofservice)
}
}
-static void
+static int
ofservice_reconfigure(struct ofservice *ofservice,
const struct ofproto_controller *settings)
OVS_REQUIRES(ofproto_mutex)
{
- /* If the allowed OpenFlow versions change, close all of the existing
- * connections to allow them to reconnect and possibly negotiate a new
- * version. */
+ /* If the allowed OpenFlow versions change, a full cleanup is needed
+ * for the ofservice and connections. */
if (ofservice->s.allowed_versions != settings->allowed_versions) {
- ofservice_close_all(ofservice);
+ return -EINVAL;
}
ofservice->s = *settings;
@@ -2029,6 +2036,8 @@ ofservice_reconfigure(struct ofservice *ofservice,
LIST_FOR_EACH (ofconn, ofservice_node, &ofservice->conns) {
ofconn_reconfigure(ofconn, settings);
}
+
+ return 0;
}
/* Finds and returns the ofservice within 'mgr' that has the given
diff --git a/ofproto/connmgr.h b/ofproto/connmgr.h
index 079c8437c3ef410aeebb6ba41623c000db5fb515..e299386c7a2e8c4c23fae82b592e1144eb660426 100644
--- a/ofproto/connmgr.h
+++ b/ofproto/connmgr.h
@@ -84,8 +84,8 @@ void connmgr_get_snoops(const struct connmgr *, struct sset *snoops);
/* Individual connections to OpenFlow controllers. */
enum ofconn_type ofconn_get_type(const struct ofconn *);
-bool ofconn_get_master_election_id(const struct ofconn *, uint64_t *idp);
-bool ofconn_set_master_election_id(struct ofconn *, uint64_t);
+bool ofconn_get_primary_election_id(const struct ofconn *, uint64_t *idp);
+bool ofconn_set_primary_election_id(struct ofconn *, uint64_t);
enum ofp12_controller_role ofconn_get_role(const struct ofconn *);
void ofconn_set_role(struct ofconn *, enum ofp12_controller_role);
diff --git a/ofproto/ipfix-gen-entities b/ofproto/ipfix-gen-entities
index 0be719967d17a53a8abb1b44833e143fa39045cd..d5abe9c2edae1a9e455a30f3966438dedd0203c1 100755
--- a/ofproto/ipfix-gen-entities
+++ b/ofproto/ipfix-gen-entities
@@ -1,6 +1,6 @@
-#! /usr/bin/env python
+#!/usr/bin/env python3
#
-# Copyright (C) 2012 Nicira, Inc.
+# Copyright (C) 2012, 2020 Nicira, Inc.
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
index b413768ef0fc566c3af56da1f2177d3ee02cb01c..796eb6f881f9355ec1e9d1843a1d008b9db04f48 100644
--- a/ofproto/ofproto-dpif-ipfix.c
+++ b/ofproto/ofproto-dpif-ipfix.c
@@ -2979,6 +2979,7 @@ dpif_ipfix_read_actions(const struct flow *flow,
enum ovs_action_attr type = nl_attr_type(a);
switch (type) {
case OVS_ACTION_ATTR_OUTPUT:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
ipfix_actions->output_action = true;
break;
case OVS_ACTION_ATTR_SAMPLE:
diff --git a/ofproto/ofproto-dpif-rid.h b/ofproto/ofproto-dpif-rid.h
index 147ef9c33348b50a511bac4c12f76d8822fd2444..4df630c62bd3d5d6133e8f4cdc67f8af61f60c65 100644
--- a/ofproto/ofproto-dpif-rid.h
+++ b/ofproto/ofproto-dpif-rid.h
@@ -22,6 +22,7 @@
#include "cmap.h"
#include "ofproto-dpif-mirror.h"
+#include "ofproto/ofproto-provider.h"
#include "openvswitch/list.h"
#include "openvswitch/ofp-actions.h"
#include "ovs-thread.h"
@@ -40,8 +41,8 @@ struct rule;
*
* Recirculation is the use of freezing to allow a frame to re-enter the
* datapath packet processing path to achieve more flexible packet processing,
- * such as modifying header fields after MPLS POP action and selecting a slave
- * port for bond ports.
+ * such as modifying header fields after MPLS POP action and selecting a
+ * member interface for bond ports.
*
*
* Data path and user space interface
@@ -99,7 +100,7 @@ struct rule;
/* Metadata for restoring pipeline context after recirculation. Helpers
* are inlined below to keep them together with the definition for easier
* updates. */
-BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
struct frozen_metadata {
/* Metadata in struct flow. */
@@ -115,16 +116,25 @@ frozen_metadata_from_flow(struct frozen_metadata *md,
{
memset(md, 0, sizeof *md);
md->tunnel = flow->tunnel;
+ /* It is unsafe for frozen_state to reference tun_table because
+ * tun_table is protected by RCU while the lifecycle of frozen_state
+ * can span several RCU quiesce states.
+ *
+ * The latest valid tun_table can be found by ofproto_get_tun_tab()
+ * efficiently. */
+ md->tunnel.metadata.tab = NULL;
md->metadata = flow->metadata;
memcpy(md->regs, flow->regs, sizeof md->regs);
md->in_port = flow->in_port.ofp_port;
}
static inline void
-frozen_metadata_to_flow(const struct frozen_metadata *md,
+frozen_metadata_to_flow(struct ofproto *ofproto,
+ const struct frozen_metadata *md,
struct flow *flow)
{
flow->tunnel = md->tunnel;
+ flow->tunnel.metadata.tab = ofproto_get_tun_tab(ofproto);
flow->metadata = md->metadata;
memcpy(flow->regs, md->regs, sizeof flow->regs);
flow->in_port.ofp_port = md->in_port;
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index f9ea47a2f17b6128bcf78e833ab5f45421acaeb7..fdcb9eabbfd0d089fd1659501499c63716d4c055 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -305,7 +305,7 @@ sflow_agent_get_counters(void *ds_, SFLPoller *poller,
SFLEthernet_counters* eth_counters;
struct netdev_stats stats;
enum netdev_flags flags;
- struct lacp_slave_stats lacp_stats;
+ struct lacp_member_stats lacp_stats;
const char *ifName;
dsp = dpif_sflow_find_port(ds, u32_to_odp(poller->bridgePort));
@@ -1175,8 +1175,9 @@ dpif_sflow_read_actions(const struct flow *flow,
case OVS_ACTION_ATTR_RECIRC:
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_CT:
- case OVS_ACTION_ATTR_CT_CLEAR:
+ case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_METER:
+ case OVS_ACTION_ATTR_LB_OUTPUT:
break;
case OVS_ACTION_ATTR_SET_MASKED:
diff --git a/ofproto/ofproto-dpif-trace.c b/ofproto/ofproto-dpif-trace.c
index 8ae8a221a897083c0b9eabe079cc63c1ad39bf1c..78a54c715dc731b3e28568b48ad42a9f95453d08 100644
--- a/ofproto/ofproto-dpif-trace.c
+++ b/ofproto/ofproto-dpif-trace.c
@@ -86,6 +86,7 @@ oftrace_node_destroy(struct oftrace_node *node)
bool
oftrace_add_recirc_node(struct ovs_list *recirc_queue,
enum oftrace_recirc_type type, const struct flow *flow,
+ const struct ofpact_nat *ofn,
const struct dp_packet *packet, uint32_t recirc_id,
const uint16_t zone)
{
@@ -101,6 +102,7 @@ oftrace_add_recirc_node(struct ovs_list *recirc_queue,
node->flow = *flow;
node->flow.recirc_id = recirc_id;
node->flow.ct_zone = zone;
+ node->nat_act = ofn;
node->packet = packet ? dp_packet_clone(packet) : NULL;
return true;
@@ -179,6 +181,25 @@ oftrace_node_print_details(struct ds *output,
}
}
+static void
+oftrace_print_ip_flow(const struct flow *flow, int af, struct ds *output)
+{
+ if (af == AF_INET) {
+ ds_put_format(output, "nw_src="IP_FMT",tp_src=%"PRIu16","
+ "nw_dst="IP_FMT",tp_dst=%"PRIu16,
+ IP_ARGS(flow->nw_src), ntohs(flow->tp_src),
+ IP_ARGS(flow->nw_dst), ntohs(flow->tp_dst));
+ } else if (af == AF_INET6) {
+ ds_put_cstr(output, "ipv6_src=");
+ ipv6_format_addr_bracket(&flow->ipv6_src, output, true);
+ ds_put_format(output, ",tp_src=%"PRIu16, ntohs(flow->tp_src));
+ ds_put_cstr(output, ",ipv6_dst=");
+ ipv6_format_addr_bracket(&flow->ipv6_dst, output, true);
+ ds_put_format(output, ",tp_dst=%"PRIu16, ntohs(flow->tp_dst));
+ }
+ ds_put_char(output, '\n');
+}
+
/* Parses the 'argc' elements of 'argv', ignoring argv[0]. The following
* forms are supported:
*
@@ -637,6 +658,73 @@ execute_actions_except_outputs(struct dpif *dpif,
ofpbuf_uninit(&pruned_actions);
}
+static void
+ofproto_trace_recirc_node(struct oftrace_recirc_node *node,
+ struct ovs_list *next_ct_states,
+ struct ds *output)
+{
+ ds_put_cstr(output, "\n\n");
+ ds_put_char_multiple(output, '=', 79);
+ ds_put_format(output, "\nrecirc(%#"PRIx32")", node->recirc_id);
+
+ if (next_ct_states && node->type == OFT_RECIRC_CONNTRACK) {
+ uint32_t ct_state;
+ if (ovs_list_is_empty(next_ct_states)) {
+ ct_state = CS_TRACKED | CS_NEW;
+ ds_put_cstr(output, " - resume conntrack with default "
+ "ct_state=trk|new (use --ct-next to customize)");
+ } else {
+ ct_state = oftrace_pop_ct_state(next_ct_states);
+ struct ds s = DS_EMPTY_INITIALIZER;
+ format_flags(&s, ct_state_to_string, ct_state, '|');
+ ds_put_format(output, " - resume conntrack with ct_state=%s",
+ ds_cstr(&s));
+ ds_destroy(&s);
+ }
+ node->flow.ct_state = ct_state;
+ }
+ ds_put_char(output, '\n');
+
+ /* If there's any snat/dnat information assume we always translate to
+ * the first IP/port to make sure we don't match on incorrect flows later
+ * on.
+ */
+ if (node->nat_act) {
+ const struct ofpact_nat *ofn = node->nat_act;
+
+ ds_put_cstr(output, "Replacing src/dst IP/ports to simulate NAT:\n");
+ ds_put_cstr(output, " Initial flow: ");
+ oftrace_print_ip_flow(&node->flow, ofn->range_af, output);
+
+ if (ofn->flags & NX_NAT_F_SRC) {
+ if (ofn->range_af == AF_INET) {
+ node->flow.nw_src = ofn->range.addr.ipv4.min;
+ } else if (ofn->range_af == AF_INET6) {
+ node->flow.ipv6_src = ofn->range.addr.ipv6.min;
+ }
+
+ if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) {
+ node->flow.tp_src = htons(ofn->range.proto.min);
+ }
+ }
+ if (ofn->flags & NX_NAT_F_DST) {
+ if (ofn->range_af == AF_INET) {
+ node->flow.nw_dst = ofn->range.addr.ipv4.min;
+ } else if (ofn->range_af == AF_INET6) {
+ node->flow.ipv6_dst = ofn->range.addr.ipv6.min;
+ }
+
+ if (ofn->range_af != AF_UNSPEC && ofn->range.proto.min) {
+ node->flow.tp_dst = htons(ofn->range.proto.min);
+ }
+ }
+ ds_put_cstr(output, " Modified flow: ");
+ oftrace_print_ip_flow(&node->flow, ofn->range_af, output);
+ }
+ ds_put_char_multiple(output, '=', 79);
+ ds_put_cstr(output, "\n\n");
+}
+
static void
ofproto_trace__(struct ofproto_dpif *ofproto, const struct flow *flow,
const struct dp_packet *packet, struct ovs_list *recirc_queue,
@@ -729,31 +817,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct flow *flow,
struct oftrace_recirc_node *recirc_node;
LIST_FOR_EACH_POP (recirc_node, node, &recirc_queue) {
- ds_put_cstr(output, "\n\n");
- ds_put_char_multiple(output, '=', 79);
- ds_put_format(output, "\nrecirc(%#"PRIx32")",
- recirc_node->recirc_id);
-
- if (next_ct_states && recirc_node->type == OFT_RECIRC_CONNTRACK) {
- uint32_t ct_state;
- if (ovs_list_is_empty(next_ct_states)) {
- ct_state = CS_TRACKED | CS_NEW;
- ds_put_cstr(output, " - resume conntrack with default "
- "ct_state=trk|new (use --ct-next to customize)");
- } else {
- ct_state = oftrace_pop_ct_state(next_ct_states);
- struct ds s = DS_EMPTY_INITIALIZER;
- format_flags(&s, ct_state_to_string, ct_state, '|');
- ds_put_format(output, " - resume conntrack with ct_state=%s",
- ds_cstr(&s));
- ds_destroy(&s);
- }
- recirc_node->flow.ct_state = ct_state;
- }
- ds_put_char(output, '\n');
- ds_put_char_multiple(output, '=', 79);
- ds_put_cstr(output, "\n\n");
-
+ ofproto_trace_recirc_node(recirc_node, next_ct_states, output);
ofproto_trace__(ofproto, &recirc_node->flow, recirc_node->packet,
&recirc_queue, ofpacts, ofpacts_len, output);
oftrace_recirc_node_destroy(recirc_node);
diff --git a/ofproto/ofproto-dpif-trace.h b/ofproto/ofproto-dpif-trace.h
index 63dbb50bad5718145fd972c7d9276fdbfd692781..4b04f1756f0f0dcdb9245e6820c7599a43374b98 100644
--- a/ofproto/ofproto-dpif-trace.h
+++ b/ofproto/ofproto-dpif-trace.h
@@ -73,6 +73,7 @@ struct oftrace_recirc_node {
uint32_t recirc_id;
struct flow flow;
struct dp_packet *packet;
+ const struct ofpact_nat *nat_act;
};
/* A node within a next_ct_states list. */
@@ -91,6 +92,7 @@ struct oftrace_node *oftrace_report(struct ovs_list *, enum oftrace_node_type,
const char *text);
bool oftrace_add_recirc_node(struct ovs_list *recirc_queue,
enum oftrace_recirc_type, const struct flow *,
+ const struct ofpact_nat *,
const struct dp_packet *, uint32_t recirc_id,
const uint16_t zone);
diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 409286ab15879fec4da89d6273bb6157d03c939d..5fae46adfc25b89a0a5836d1928c51912c96d8d1 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -56,6 +56,8 @@ COVERAGE_DEFINE(handler_duplicate_upcall);
COVERAGE_DEFINE(upcall_ukey_contention);
COVERAGE_DEFINE(upcall_ukey_replace);
COVERAGE_DEFINE(revalidate_missed_dp_flow);
+COVERAGE_DEFINE(upcall_flow_limit_hit);
+COVERAGE_DEFINE(upcall_flow_limit_kill);
/* A thread that reads upcalls from dpif, forwards each upcall's packet,
* and possibly sets up a kernel flow as a cache. */
@@ -332,7 +334,7 @@ static size_t recv_upcalls(struct handler *);
static int process_upcall(struct udpif *, struct upcall *,
struct ofpbuf *odp_actions, struct flow_wildcards *);
static void handle_upcalls(struct udpif *, struct upcall *, size_t n_upcalls);
-static void udpif_stop_threads(struct udpif *);
+static void udpif_stop_threads(struct udpif *, bool delete_flows);
static void udpif_start_threads(struct udpif *, size_t n_handlers,
size_t n_revalidators);
static void udpif_pause_revalidators(struct udpif *);
@@ -483,7 +485,7 @@ udpif_run(struct udpif *udpif)
void
udpif_destroy(struct udpif *udpif)
{
- udpif_stop_threads(udpif);
+ udpif_stop_threads(udpif, false);
dpif_register_dp_purge_cb(udpif->dpif, NULL, udpif);
dpif_register_upcall_cb(udpif->dpif, NULL, udpif);
@@ -504,9 +506,15 @@ udpif_destroy(struct udpif *udpif)
free(udpif);
}
-/* Stops the handler and revalidator threads. */
+/* Stops the handler and revalidator threads.
+ *
+ * If 'delete_flows' is true, we delete ukeys and delete all flows from the
+ * datapath. Otherwise, we end up double-counting stats for flows that remain
+ * in the datapath. If 'delete_flows' is false, we skip this step. This is
+ * appropriate if OVS is about to exit anyway and it is desirable to let
+ * existing network connections continue being forwarded afterward. */
static void
-udpif_stop_threads(struct udpif *udpif)
+udpif_stop_threads(struct udpif *udpif, bool delete_flows)
{
if (udpif && (udpif->n_handlers != 0 || udpif->n_revalidators != 0)) {
size_t i;
@@ -526,10 +534,10 @@ udpif_stop_threads(struct udpif *udpif)
dpif_disable_upcall(udpif->dpif);
ovsrcu_quiesce_end();
- /* Delete ukeys, and delete all flows from the datapath to prevent
- * double-counting stats. */
- for (i = 0; i < udpif->n_revalidators; i++) {
- revalidator_purge(&udpif->revalidators[i]);
+ if (delete_flows) {
+ for (i = 0; i < udpif->n_revalidators; i++) {
+ revalidator_purge(&udpif->revalidators[i]);
+ }
}
latch_poll(&udpif->exit_latch);
@@ -627,7 +635,7 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers_,
if (udpif->n_handlers != n_handlers_
|| udpif->n_revalidators != n_revalidators_) {
- udpif_stop_threads(udpif);
+ udpif_stop_threads(udpif, true);
}
if (!udpif->handlers && !udpif->revalidators) {
@@ -644,23 +652,6 @@ udpif_set_threads(struct udpif *udpif, size_t n_handlers_,
}
}
-/* Waits for all ongoing upcall translations to complete. This ensures that
- * there are no transient references to any removed ofprotos (or other
- * objects). In particular, this should be called after an ofproto is removed
- * (e.g. via xlate_remove_ofproto()) but before it is destroyed. */
-void
-udpif_synchronize(struct udpif *udpif)
-{
- /* This is stronger than necessary. It would be sufficient to ensure
- * (somehow) that each handler and revalidator thread had passed through
- * its main loop once. */
- size_t n_handlers_ = udpif->n_handlers;
- size_t n_revalidators_ = udpif->n_revalidators;
-
- udpif_stop_threads(udpif);
- udpif_start_threads(udpif, n_handlers_, n_revalidators_);
-}
-
/* Notifies 'udpif' that something changed which may render previous
* xlate_actions() results invalid. */
void
@@ -698,7 +689,7 @@ udpif_flush(struct udpif *udpif)
size_t n_handlers_ = udpif->n_handlers;
size_t n_revalidators_ = udpif->n_revalidators;
- udpif_stop_threads(udpif);
+ udpif_stop_threads(udpif, true);
dpif_flow_flush(udpif->dpif);
udpif_start_threads(udpif, n_handlers_, n_revalidators_);
}
@@ -1093,7 +1084,7 @@ compose_slow_path(struct udpif *udpif, struct xlate_out *xout,
}
odp_put_userspace_action(pid, &cookie, sizeof cookie,
- ODPP_NONE, false, buf);
+ ODPP_NONE, false, buf, NULL);
if (meter_id != UINT32_MAX) {
nl_msg_end_nested(buf, ac_offset);
@@ -1292,7 +1283,10 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall)
atomic_read_relaxed(&udpif->flow_limit, &flow_limit);
if (udpif_get_n_flows(udpif) >= flow_limit) {
- VLOG_WARN_RL(&rl, "upcall: datapath flow limit reached");
+ COVERAGE_INC(upcall_flow_limit_hit);
+ VLOG_WARN_RL(&rl,
+ "upcall: datapath reached the dynamic limit of %u flows.",
+ flow_limit);
return false;
}
@@ -1545,7 +1539,8 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
flow_clear_conntrack(&frozen_flow);
}
- frozen_metadata_to_flow(&state->metadata, &frozen_flow);
+ frozen_metadata_to_flow(&upcall->ofproto->up, &state->metadata,
+ &frozen_flow);
flow_get_metadata(&frozen_flow, &am->pin.up.base.flow_metadata);
ofproto_dpif_send_async_msg(upcall->ofproto, am);
@@ -2511,8 +2506,7 @@ ukey_netdev_unref(struct udpif_key *ukey)
static void
ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey)
{
- const struct dpif *dpif = udpif->dpif;
- const struct dpif_class *dpif_class = dpif->dpif_class;
+ const char *dpif_type_str = dpif_normalize_type(dpif_type(udpif->dpif));
const struct nlattr *k;
unsigned int left;
@@ -2525,7 +2519,7 @@ ukey_to_flow_netdev(struct udpif *udpif, struct udpif_key *ukey)
if (type == OVS_KEY_ATTR_IN_PORT) {
ukey->in_netdev = netdev_ports_get(nl_attr_get_odp_port(k),
- dpif_class);
+ dpif_type_str);
} else if (type == OVS_KEY_ATTR_TUNNEL) {
struct flow_tnl tnl;
enum odp_key_fitness res;
@@ -2586,6 +2580,25 @@ udpif_update_flow_pps(struct udpif *udpif, struct udpif_key *ukey,
ukey->flow_time = udpif->dpif->current_ms;
}
+static long long int
+udpif_update_used(struct udpif *udpif, struct udpif_key *ukey,
+ struct dpif_flow_stats *stats)
+ OVS_REQUIRES(ukey->mutex)
+{
+ if (!udpif->dump->terse) {
+ return ukey->created;
+ }
+
+ if (stats->n_packets > ukey->stats.n_packets) {
+ stats->used = udpif->dpif->current_ms;
+ } else if (ukey->stats.used) {
+ stats->used = ukey->stats.used;
+ } else {
+ stats->used = ukey->created;
+ }
+ return stats->used;
+}
+
static void
revalidate(struct revalidator *revalidator)
{
@@ -2595,6 +2608,7 @@ revalidate(struct revalidator *revalidator)
struct udpif *udpif = revalidator->udpif;
struct dpif_flow_dump_thread *dump_thread;
uint64_t dump_seq, reval_seq;
+ bool kill_warn_print = true;
unsigned int flow_limit;
dump_seq = seq_read(udpif->dump_seq);
@@ -2611,6 +2625,7 @@ revalidate(struct revalidator *revalidator)
long long int max_idle;
long long int now;
+ size_t kill_all_limit;
size_t n_dp_flows;
bool kill_them_all;
@@ -2634,13 +2649,34 @@ revalidate(struct revalidator *revalidator)
* datapath flows, so we will recover before all the flows are
* gone.) */
n_dp_flows = udpif_get_n_flows(udpif);
- kill_them_all = n_dp_flows > flow_limit * 2;
+ if (n_dp_flows >= flow_limit) {
+ COVERAGE_INC(upcall_flow_limit_hit);
+ }
+
+ kill_them_all = false;
+ kill_all_limit = flow_limit * 2;
+ if (OVS_UNLIKELY(n_dp_flows > kill_all_limit)) {
+ static struct vlog_rate_limit rlem = VLOG_RATE_LIMIT_INIT(1, 1);
+
+ kill_them_all = true;
+ COVERAGE_INC(upcall_flow_limit_kill);
+ if (kill_warn_print) {
+ kill_warn_print = false;
+ VLOG_WARN_RL(&rlem,
+ "Number of datapath flows (%"PRIuSIZE") twice as high as "
+ "current dynamic flow limit (%"PRIuSIZE"). "
+ "Starting to delete flows unconditionally "
+ "as an emergency measure.", n_dp_flows, kill_all_limit);
+ }
+ }
+
max_idle = n_dp_flows > flow_limit ? 100 : ofproto_max_idle;
udpif->dpif->current_ms = time_msec();
for (f = flows; f < &flows[n_dumped]; f++) {
long long int used = f->stats.used;
struct recirc_refs recircs = RECIRC_REFS_EMPTY_INITIALIZER;
+ struct dpif_flow_stats stats = f->stats;
enum reval_result result;
struct udpif_key *ukey;
bool already_dumped;
@@ -2685,12 +2721,12 @@ revalidate(struct revalidator *revalidator)
}
if (!used) {
- used = ukey->created;
+ used = udpif_update_used(udpif, ukey, &stats);
}
if (kill_them_all || (used && used < now - max_idle)) {
result = UKEY_DELETE;
} else {
- result = revalidate_ukey(udpif, ukey, &f->stats, &odp_actions,
+ result = revalidate_ukey(udpif, ukey, &stats, &odp_actions,
reval_seq, &recircs,
f->attrs.offloaded);
}
@@ -2865,6 +2901,7 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
{
struct ds ds = DS_EMPTY_INITIALIZER;
+ uint64_t n_offloaded_flows;
struct udpif *udpif;
LIST_FOR_EACH (udpif, list_node, &all_udpifs) {
@@ -2879,6 +2916,10 @@ upcall_unixctl_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_put_format(&ds, " flows : (current %lu)"
" (avg %u) (max %u) (limit %u)\n", udpif_get_n_flows(udpif),
udpif->avg_n_flows, udpif->max_n_flows, flow_limit);
+ if (!dpif_get_n_offloaded_flows(udpif->dpif, &n_offloaded_flows)) {
+ ds_put_format(&ds, " offloaded flows : %"PRIu64"\n",
+ n_offloaded_flows);
+ }
ds_put_format(&ds, " dump duration : %lldms\n", udpif->dump_duration);
ds_put_format(&ds, " ufid enabled : ");
if (ufid_enabled) {
diff --git a/ofproto/ofproto-dpif-upcall.h b/ofproto/ofproto-dpif-upcall.h
index cef1d34198d6bded5e4f97df91dbc517afa63d68..693107ae56c1c746a054c18e5f70e8ea253557dc 100644
--- a/ofproto/ofproto-dpif-upcall.h
+++ b/ofproto/ofproto-dpif-upcall.h
@@ -33,7 +33,6 @@ struct udpif *udpif_create(struct dpif_backer *, struct dpif *);
void udpif_run(struct udpif *udpif);
void udpif_set_threads(struct udpif *, size_t n_handlers,
size_t n_revalidators);
-void udpif_synchronize(struct udpif *);
void udpif_destroy(struct udpif *);
void udpif_revalidate(struct udpif *);
void udpif_get_memory_usage(struct udpif *, struct simap *usage);
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 4407f9c97a9e4ddd19ed5efb05c21da353d3cde0..7108c8a30138e82e2a8f10982fca64dbb5dc4d2b 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2019 Nicira, Inc.
+/* Copyright (c) 2009-2017, 2019-2020 Nicira, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1171,11 +1171,15 @@ xlate_xport_copy(struct xbridge *xbridge, struct xbundle *xbundle,
*
* A sample workflow:
*
- * xlate_txn_start();
- * ...
- * edit_xlate_configuration();
- * ...
- * xlate_txn_commit(); */
+ * xlate_txn_start();
+ * ...
+ * edit_xlate_configuration();
+ * ...
+ * xlate_txn_commit();
+ *
+ * The ovsrcu_synchronize() call here also ensures that the upcall threads
+ * retain no references to anything in the previous configuration.
+ */
void
xlate_txn_commit(void)
{
@@ -1516,15 +1520,32 @@ xlate_lookup_ofproto_(const struct dpif_backer *backer,
return NULL;
}
- /* If recirculation was initiated due to bond (in_port = OFPP_NONE)
- * then frozen state is static and xport_uuid is not defined, so xport
- * cannot be restored from frozen state. */
- if (recirc_id_node->state.metadata.in_port != OFPP_NONE) {
+ ofp_port_t in_port = recirc_id_node->state.metadata.in_port;
+ if (in_port != OFPP_NONE && in_port != OFPP_CONTROLLER) {
struct uuid xport_uuid = recirc_id_node->state.xport_uuid;
xport = xport_lookup_by_uuid(xcfg, &xport_uuid);
if (xport && xport->xbridge && xport->xbridge->ofproto) {
goto out;
}
+ } else {
+ /* OFPP_NONE and OFPP_CONTROLLER are not real ports. They indicate
+ * that the packet originated from the controller via an OpenFlow
+ * "packet-out". The right thing to do is to find just the
+ * ofproto. There is no xport, which is OK.
+ *
+ * OFPP_NONE can also indicate that a bond caused recirculation. */
+ struct uuid uuid = recirc_id_node->state.ofproto_uuid;
+ const struct xbridge *bridge = xbridge_lookup_by_uuid(xcfg, &uuid);
+ if (bridge && bridge->ofproto) {
+ if (errorp) {
+ *errorp = NULL;
+ }
+ *xportp = NULL;
+ if (ofp_in_port) {
+ *ofp_in_port = in_port;
+ }
+ return bridge->ofproto;
+ }
}
}
@@ -1884,9 +1905,12 @@ bucket_is_alive(const struct xlate_ctx *ctx,
return (!ofputil_bucket_has_liveness(bucket)
|| (bucket->watch_port != OFPP_ANY
+ && bucket->watch_port != OFPP_CONTROLLER
&& odp_port_is_alive(ctx, bucket->watch_port))
|| (bucket->watch_group != OFPG_ANY
- && group_is_alive(ctx, bucket->watch_group, depth + 1)));
+ && group_is_alive(ctx, bucket->watch_group, depth + 1))
+ || (bucket->watch_port == OFPP_CONTROLLER
+ && ofproto_is_alive(&ctx->xbridge->ofproto->up)));
}
static void
@@ -2407,7 +2431,7 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
}
vid = out_xvlan.v[0].vid;
if (ovs_list_is_empty(&out_xbundle->xports)) {
- /* Partially configured bundle with no slaves. Drop the packet. */
+ /* Partially configured bundle with no members. Drop the packet. */
return;
} else if (!out_xbundle->bond) {
xport = CONTAINER_OF(ovs_list_front(&out_xbundle->xports), struct xport,
@@ -2432,12 +2456,12 @@ output_normal(struct xlate_ctx *ctx, const struct xbundle *out_xbundle,
}
}
- ofport = bond_choose_output_slave(out_xbundle->bond,
- &ctx->xin->flow, wc, vid);
+ ofport = bond_choose_output_member(out_xbundle->bond,
+ &ctx->xin->flow, wc, vid);
xport = xport_lookup(ctx->xcfg, ofport);
if (!xport) {
- /* No slaves enabled, so drop packet. */
+ /* No member interfaces enabled, so drop packet. */
return;
}
@@ -3076,6 +3100,7 @@ xlate_normal(struct xlate_ctx *ctx)
xlate_report(ctx, OFT_DETAIL, "MLD query, flooding");
xlate_normal_flood(ctx, in_xbundle, &xvlan);
}
+ return;
} else {
if (is_ip_local_multicast(flow, wc)) {
/* RFC4541: section 2.1.2, item 2: Packets with a dst IP
@@ -3198,12 +3223,11 @@ compose_sample_action(struct xlate_ctx *ctx,
odp_port_t odp_port = ofp_port_to_odp_port(
ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
- size_t cookie_offset = odp_put_userspace_action(pid, cookie,
- sizeof *cookie,
- tunnel_out_port,
- include_actions,
- ctx->odp_actions);
-
+ size_t cookie_offset;
+ int res = odp_put_userspace_action(pid, cookie, sizeof *cookie,
+ tunnel_out_port, include_actions,
+ ctx->odp_actions, &cookie_offset);
+ ovs_assert(res == 0);
if (is_sample) {
nl_msg_end_nested(ctx->odp_actions, actions_offset);
nl_msg_end_nested(ctx->odp_actions, sample_offset);
@@ -3355,11 +3379,11 @@ process_special(struct xlate_ctx *ctx, const struct xport *xport)
if (packet) {
lacp_may_enable = lacp_process_packet(xport->xbundle->lacp,
xport->ofport, packet);
- /* Update LACP status in bond-slave to avoid packet-drops until
- * LACP state machine is run by the main thread. */
+ /* Update LACP status in bond-member to avoid packet-drops
+ * until LACP state machine is run by the main thread. */
if (xport->xbundle->bond && lacp_may_enable) {
- bond_slave_set_may_enable(xport->xbundle->bond, xport->ofport,
- lacp_may_enable);
+ bond_member_set_may_enable(xport->xbundle->bond, xport->ofport,
+ lacp_may_enable);
}
}
slow = SLOW_LACP;
@@ -3548,6 +3572,8 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, struct eth_addr dmac,
break;
case OVS_VPORT_TYPE_VXLAN:
case OVS_VPORT_TYPE_GENEVE:
+ case OVS_VPORT_TYPE_GTPU:
+ case OVS_VPORT_TYPE_BAREUDP:
nw_proto = IPPROTO_UDP;
break;
case OVS_VPORT_TYPE_LISP:
@@ -4099,7 +4125,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
/* If 'struct flow' gets additional metadata, we'll need to zero it out
* before traversing a patch port. */
- BUILD_ASSERT_DECL(FLOW_WC_SEQ == 41);
+ BUILD_ASSERT_DECL(FLOW_WC_SEQ == 42);
memset(&flow_tnl, 0, sizeof flow_tnl);
if (!check_output_prerequisites(ctx, xport, flow, check_stp)) {
@@ -4182,7 +4208,17 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port,
/* Commit accumulated flow updates before output. */
xlate_commit_actions(ctx);
- if (xr) {
+ if (xr && bond_use_lb_output_action(xport->xbundle->bond)) {
+ /*
+ * If bond mode is balance-tcp and optimize balance tcp is enabled
+ * then use the hash directly for member selection and avoid
+ * recirculation.
+ *
+ * Currently support for netdev datapath only.
+ */
+ nl_msg_put_u32(ctx->odp_actions, OVS_ACTION_ATTR_LB_OUTPUT,
+ xr->recirc_id);
+ } else if (xr) {
/* Recirculate the packet. */
struct ovs_action_hash *act_hash;
@@ -4796,7 +4832,7 @@ put_controller_user_action(struct xlate_ctx *ctx,
ctx->xin->flow.in_port.ofp_port);
uint32_t pid = dpif_port_get_pid(ctx->xbridge->dpif, odp_port);
odp_put_userspace_action(pid, &cookie, sizeof cookie, ODPP_NONE,
- false, ctx->odp_actions);
+ false, ctx->odp_actions, NULL);
}
static void
@@ -4974,7 +5010,8 @@ compose_recirculate_and_fork(struct xlate_ctx *ctx, uint8_t table,
if (OVS_UNLIKELY(ctx->xin->trace) && recirc_id) {
if (oftrace_add_recirc_node(ctx->xin->recirc_queue,
OFT_RECIRC_CONNTRACK, &ctx->xin->flow,
- ctx->xin->packet, recirc_id, zone)) {
+ ctx->ct_nat_action, ctx->xin->packet,
+ recirc_id, zone)) {
xlate_report(ctx, OFT_DETAIL, "A clone of the packet is forked to "
"recirculate. The forked pipeline will be resumed at "
"table %u.", table);
@@ -5134,6 +5171,21 @@ compose_dec_mpls_ttl_action(struct xlate_ctx *ctx)
return true;
}
+static void
+xlate_delete_field(struct xlate_ctx *ctx,
+ struct flow *flow,
+ const struct ofpact_delete_field *odf)
+{
+ struct ds s = DS_EMPTY_INITIALIZER;
+
+ /* Currently, only tun_metadata is allowed for delete_field action. */
+ tun_metadata_delete(&flow->tunnel, odf->field);
+
+ ds_put_format(&s, "delete %s", odf->field->name);
+ xlate_report(ctx, OFT_DETAIL, "%s", ds_cstr(&s));
+ ds_destroy(&s);
+}
+
/* Emits an action that outputs to 'port', within 'ctx'.
*
* 'controller_len' affects only packets sent to an OpenFlow controller. It
@@ -5340,7 +5392,7 @@ xlate_set_queue_action(struct xlate_ctx *ctx, uint32_t queue_id)
}
static bool
-slave_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
+member_enabled_cb(ofp_port_t ofp_port, void *xbridge_)
{
const struct xbridge *xbridge = xbridge_;
struct xport *port;
@@ -5369,7 +5421,7 @@ xlate_bundle_action(struct xlate_ctx *ctx,
{
ofp_port_t port;
- port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, slave_enabled_cb,
+ port = bundle_execute(bundle, &ctx->xin->flow, ctx->wc, member_enabled_cb,
CONST_CAST(struct xbridge *, ctx->xbridge));
if (bundle->dst.field) {
nxm_reg_load(&bundle->dst, ofp_to_u16(port), &ctx->xin->flow, ctx->wc);
@@ -5659,6 +5711,7 @@ reversible_actions(const struct ofpact *ofpacts, size_t ofpacts_len)
case OFPACT_WRITE_ACTIONS:
case OFPACT_WRITE_METADATA:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
break;
case OFPACT_CT:
@@ -5968,6 +6021,7 @@ freeze_unroll_actions(const struct ofpact *a, const struct ofpact *end,
case OFPACT_CT_CLEAR:
case OFPACT_NAT:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
/* These may not generate PACKET INs. */
break;
@@ -6163,7 +6217,6 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
put_ct_label(&ctx->xin->flow, ctx->odp_actions, ctx->wc);
put_ct_helper(ctx, ctx->odp_actions, ofc);
put_ct_nat(ctx);
- ctx->ct_nat_action = NULL;
nl_msg_end_nested(ctx->odp_actions, ct_offset);
ctx->wc->masks.ct_mark = old_ct_mark_mask;
@@ -6174,6 +6227,8 @@ compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc,
compose_recirculate_and_fork(ctx, ofc->recirc_table, zone);
}
+ ctx->ct_nat_action = NULL;
+
/* The ct_* fields are only available in the scope of the 'recirc_table'
* call chain. */
flow_clear_conntrack(&ctx->xin->flow);
@@ -6628,6 +6683,7 @@ recirc_for_mpls(const struct ofpact *a, struct xlate_ctx *ctx)
case OFPACT_WRITE_METADATA:
case OFPACT_GOTO_TABLE:
case OFPACT_CHECK_PKT_LARGER:
+ case OFPACT_DELETE_FIELD:
default:
break;
}
@@ -7005,6 +7061,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len,
xlate_fin_timeout(ctx, ofpact_get_FIN_TIMEOUT(a));
break;
+ case OFPACT_DELETE_FIELD:
+ xlate_delete_field(ctx, flow, ofpact_get_DELETE_FIELD(a));
+ break;
+
case OFPACT_CLEAR_ACTIONS:
xlate_report_action_set(ctx, "was");
ofpbuf_clear(&ctx->action_set);
@@ -7261,7 +7321,8 @@ count_output_actions(const struct ofpbuf *odp_actions)
int n = 0;
NL_ATTR_FOR_EACH_UNSAFE (a, left, odp_actions->data, odp_actions->size) {
- if (a->nla_type == OVS_ACTION_ATTR_OUTPUT) {
+ if ((a->nla_type == OVS_ACTION_ATTR_OUTPUT) ||
+ (a->nla_type == OVS_ACTION_ATTR_LB_OUTPUT)) {
n++;
}
}
@@ -7519,7 +7580,8 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
/* Restore pipeline metadata. May change flow's in_port and other
* metadata to the values that existed when freezing was triggered. */
- frozen_metadata_to_flow(&state->metadata, flow);
+ frozen_metadata_to_flow(&ctx.xbridge->ofproto->up,
+ &state->metadata, flow);
/* Restore stack, if any. */
if (state->stack) {
@@ -7571,14 +7633,10 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout)
ctx.error = XLATE_INVALID_TUNNEL_METADATA;
goto exit;
}
- } else if (!flow->tunnel.metadata.tab || xin->frozen_state) {
+ } else if (!flow->tunnel.metadata.tab) {
/* If the original flow did not come in on a tunnel, then it won't have
* FLOW_TNL_F_UDPIF set. However, we still need to have a metadata
* table in case we generate tunnel actions. */
- /* If the translation is from a frozen state, we use the latest
- * TLV map to avoid segmentation fault in case the old TLV map is
- * replaced by a new one.
- * XXX: It is better to abort translation if the table is changed. */
flow->tunnel.metadata.tab = ofproto_get_tun_tab(
&ctx.xbridge->ofproto->up);
}
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index d3cb392077df1dbf2a22fd6d6298d7981577b3e2..fd0b2fdea0b5e695a4d2b8626b89187b32a92ec7 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -698,8 +698,10 @@ close_dpif_backer(struct dpif_backer *backer, bool del)
udpif_destroy(backer->udpif);
- SIMAP_FOR_EACH (node, &backer->tnl_backers) {
- dpif_port_del(backer->dpif, u32_to_odp(node->data), false);
+ if (del) {
+ SIMAP_FOR_EACH (node, &backer->tnl_backers) {
+ dpif_port_del(backer->dpif, u32_to_odp(node->data), false);
+ }
}
simap_destroy(&backer->tnl_backers);
ovs_rwlock_destroy(&backer->odp_to_ofport_lock);
@@ -866,6 +868,12 @@ ovs_explicit_drop_action_supported(struct ofproto_dpif *ofproto)
return ofproto->backer->rt_support.explicit_drop_action;
}
+bool
+ovs_lb_output_action_supported(struct ofproto_dpif *ofproto)
+{
+ return ofproto->backer->rt_support.lb_output_action;
+}
+
/* Tests whether 'backer''s datapath supports recirculation. Only newer
* datapaths support OVS_KEY_ATTR_RECIRC_ID in keys. We need to disable some
* features on older datapaths that don't support this feature.
@@ -1580,6 +1588,8 @@ check_support(struct dpif_backer *backer)
backer->rt_support.ct_timeout = check_ct_timeout_policy(backer);
backer->rt_support.explicit_drop_action =
dpif_supports_explicit_drop_action(backer->dpif);
+ backer->rt_support.lb_output_action=
+ dpif_supports_lb_output_action(backer->dpif);
/* Flow fields. */
backer->rt_support.odp.ct_state = check_ct_state(backer);
@@ -1751,10 +1761,6 @@ destruct(struct ofproto *ofproto_, bool del)
xlate_remove_ofproto(ofproto);
xlate_txn_commit();
- /* Ensure that the upcall processing threads have no remaining references
- * to the ofproto or anything in it. */
- udpif_synchronize(ofproto->backer->udpif);
-
hmap_remove(&all_ofproto_dpifs_by_name,
&ofproto->all_ofproto_dpifs_by_name_node);
hmap_remove(&all_ofproto_dpifs_by_uuid,
@@ -1802,6 +1808,7 @@ run(struct ofproto *ofproto_)
{
struct ofproto_dpif *ofproto = ofproto_dpif_cast(ofproto_);
uint64_t new_seq, new_dump_seq;
+ bool is_connected;
if (mbridge_need_revalidate(ofproto->mbridge)) {
ofproto->backer->need_revalidate = REV_RECONFIGURE;
@@ -1870,6 +1877,15 @@ run(struct ofproto *ofproto_)
ofproto->backer->need_revalidate = REV_MCAST_SNOOPING;
}
+ /* Check if controller connection is toggled. */
+ is_connected = ofproto_is_alive(&ofproto->up);
+ if (ofproto->is_controller_connected != is_connected) {
+ ofproto->is_controller_connected = is_connected;
+ /* Trigger revalidation as fast failover group monitoring
+ * controller port may need to check liveness again. */
+ ofproto->backer->need_revalidate = REV_RECONFIGURE;
+ }
+
new_dump_seq = seq_read(udpif_dump_seq(ofproto->backer->udpif));
if (ofproto->dump_seq != new_dump_seq) {
struct rule *rule, *next_rule;
@@ -2183,7 +2199,7 @@ port_modified(struct ofport *port_)
struct netdev *netdev = port->up.netdev;
if (port->bundle && port->bundle->bond) {
- bond_slave_set_netdev(port->bundle->bond, port, netdev);
+ bond_member_set_netdev(port->bundle->bond, port, netdev);
}
if (port->cfm) {
@@ -3124,10 +3140,10 @@ bundle_del_port(struct ofport_dpif *port)
port->bundle = NULL;
if (bundle->lacp) {
- lacp_slave_unregister(bundle->lacp, port);
+ lacp_member_unregister(bundle->lacp, port);
}
if (bundle->bond) {
- bond_slave_unregister(bundle->bond, port);
+ bond_member_unregister(bundle->bond, port);
}
bundle_update(bundle);
@@ -3135,7 +3151,7 @@ bundle_del_port(struct ofport_dpif *port)
static bool
bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
- struct lacp_slave_settings *lacp)
+ struct lacp_member_settings *lacp)
{
struct ofport_dpif *port;
@@ -3161,7 +3177,7 @@ bundle_add_port(struct ofbundle *bundle, ofp_port_t ofp_port,
}
if (lacp) {
bundle->ofproto->backer->need_revalidate = REV_RECONFIGURE;
- lacp_slave_register(bundle->lacp, port, lacp);
+ lacp_member_register(bundle->lacp, port, lacp);
}
return true;
@@ -3220,8 +3236,8 @@ bundle_set(struct ofproto *ofproto_, void *aux,
return 0;
}
- ovs_assert(s->n_slaves == 1 || s->bond != NULL);
- ovs_assert((s->lacp != NULL) == (s->lacp_slaves != NULL));
+ ovs_assert(s->n_members == 1 || s->bond != NULL);
+ ovs_assert((s->lacp != NULL) == (s->lacp_members != NULL));
if (!bundle) {
bundle = xmalloc(sizeof *bundle);
@@ -3267,18 +3283,18 @@ bundle_set(struct ofproto *ofproto_, void *aux,
/* Update set of ports. */
ok = true;
- for (i = 0; i < s->n_slaves; i++) {
- if (!bundle_add_port(bundle, s->slaves[i],
- s->lacp ? &s->lacp_slaves[i] : NULL)) {
+ for (i = 0; i < s->n_members; i++) {
+ if (!bundle_add_port(bundle, s->members[i],
+ s->lacp ? &s->lacp_members[i] : NULL)) {
ok = false;
}
}
- if (!ok || ovs_list_size(&bundle->ports) != s->n_slaves) {
+ if (!ok || ovs_list_size(&bundle->ports) != s->n_members) {
struct ofport_dpif *next_port;
LIST_FOR_EACH_SAFE (port, next_port, bundle_node, &bundle->ports) {
- for (i = 0; i < s->n_slaves; i++) {
- if (s->slaves[i] == port->up.ofp_port) {
+ for (i = 0; i < s->n_members; i++) {
+ if (s->members[i] == port->up.ofp_port) {
goto found;
}
}
@@ -3287,7 +3303,7 @@ bundle_set(struct ofproto *ofproto_, void *aux,
found: ;
}
}
- ovs_assert(ovs_list_size(&bundle->ports) <= s->n_slaves);
+ ovs_assert(ovs_list_size(&bundle->ports) <= s->n_members);
if (ovs_list_is_empty(&bundle->ports)) {
bundle_destroy(bundle);
@@ -3392,8 +3408,8 @@ bundle_set(struct ofproto *ofproto_, void *aux,
}
LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
- bond_slave_register(bundle->bond, port,
- port->up.ofp_port, port->up.netdev);
+ bond_member_register(bundle->bond, port,
+ port->up.ofp_port, port->up.netdev);
}
} else {
bond_unref(bundle->bond);
@@ -3433,6 +3449,27 @@ bundle_remove(struct ofport *port_)
}
}
+int
+ofproto_dpif_add_lb_output_buckets(struct ofproto_dpif *ofproto,
+ uint32_t bond_id,
+ const ofp_port_t *slave_map)
+{
+ odp_port_t odp_map[BOND_BUCKETS];
+
+ for (int bucket = 0; bucket < BOND_BUCKETS; bucket++) {
+ /* Convert ofp_port to odp_port. */
+ odp_map[bucket] = ofp_port_to_odp_port(ofproto, slave_map[bucket]);
+ }
+ return dpif_bond_add(ofproto->backer->dpif, bond_id, odp_map);
+}
+
+int
+ofproto_dpif_delete_lb_output_buckets(struct ofproto_dpif *ofproto,
+ uint32_t bond_id)
+{
+ return dpif_bond_del(ofproto->backer->dpif, bond_id);
+}
+
static void
send_pdu_cb(void *port_, const void *pdu, size_t pdu_size)
{
@@ -3525,7 +3562,7 @@ bundle_run(struct ofbundle *bundle)
struct ofport_dpif *port;
LIST_FOR_EACH (port, bundle_node, &bundle->ports) {
- bond_slave_set_may_enable(bundle->bond, port, port->up.may_enable);
+ bond_member_set_may_enable(bundle->bond, port, port->up.may_enable);
}
if (bond_run(bundle->bond, lacp_status(bundle->lacp))) {
@@ -3771,7 +3808,7 @@ may_enable_port(struct ofport_dpif *ofport)
/* If LACP is enabled, it must report that the link is enabled. */
if (ofport->bundle
- && !lacp_slave_may_enable(ofport->bundle->lacp, ofport)) {
+ && !lacp_member_may_enable(ofport->bundle->lacp, ofport)) {
return false;
}
@@ -3787,7 +3824,7 @@ port_run(struct ofport_dpif *ofport)
ofport->carrier_seq = carrier_seq;
if (carrier_changed && ofport->bundle) {
- lacp_slave_carrier_changed(ofport->bundle->lacp, ofport, enable);
+ lacp_member_carrier_changed(ofport->bundle->lacp, ofport, enable);
}
if (enable) {
@@ -3899,7 +3936,7 @@ port_del(struct ofproto *ofproto_, ofp_port_t ofp_port)
/* The caller is going to close ofport->up.netdev. If this is a
* bonded port, then the bond is using that netdev, so remove it
* from the bond. The client will need to reconfigure everything
- * after deleting ports, so then the slave will get re-added. */
+ * after deleting ports, so then the member will get re-added. */
bundle_remove(&ofport->up);
}
}
@@ -3983,11 +4020,12 @@ vport_get_status(const struct ofport *ofport_, char **errp)
}
static int
-port_get_lacp_stats(const struct ofport *ofport_, struct lacp_slave_stats *stats)
+port_get_lacp_stats(const struct ofport *ofport_,
+ struct lacp_member_stats *stats)
{
struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
if (ofport->bundle && ofport->bundle->lacp) {
- if (lacp_get_slave_stats(ofport->bundle->lacp, ofport, stats)) {
+ if (lacp_get_member_stats(ofport->bundle->lacp, ofport, stats)) {
return 0;
}
}
@@ -4088,7 +4126,7 @@ port_is_lacp_current(const struct ofport *ofport_)
{
const struct ofport_dpif *ofport = ofport_dpif_cast(ofport_);
return (ofport->bundle && ofport->bundle->lacp
- ? lacp_slave_is_current(ofport->bundle->lacp, ofport)
+ ? lacp_member_is_current(ofport->bundle->lacp, ofport)
: -1);
}
@@ -5418,7 +5456,8 @@ clear_existing_ct_timeout_policies(struct dpif_backer *backer)
static void
ct_zone_config_init(struct dpif_backer *backer)
{
- backer->tp_ids = id_pool_create(0, MAX_TIMEOUT_POLICY_ID);
+ backer->tp_ids = id_pool_create(DEFAULT_TP_ID + 1,
+ MAX_TIMEOUT_POLICY_ID - 1);
cmap_init(&backer->ct_zones);
hmap_init(&backer->ct_tps);
ovs_list_init(&backer->ct_tp_kill_list);
@@ -5563,6 +5602,7 @@ get_datapath_cap(const char *datapath_type, struct smap *cap)
smap_add(cap, "ct_timeout", s.ct_timeout ? "true" : "false");
smap_add(cap, "explicit_drop_action",
s.explicit_drop_action ? "true" :"false");
+ smap_add(cap, "lb_output_action", s.lb_output_action ? "true" : "false");
}
/* Gets timeout policy name in 'backer' based on 'zone', 'dl_type' and
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index c9d5df34b0e40dd502988f3a2181f265c0982a39..b41c3d82adf25017f15d0bfad28cf96fe7727f2c 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -54,7 +54,6 @@
#include "ovs-thread.h"
#include "ofproto-provider.h"
#include "util.h"
-#include "ovs-thread.h"
struct dpif_flow_stats;
struct ofproto_async_msg;
@@ -202,7 +201,10 @@ struct group_dpif *group_dpif_lookup(struct ofproto_dpif *,
DPIF_SUPPORT_FIELD(bool, ct_timeout, "Conntrack timeout policy") \
\
/* True if the datapath supports explicit drop action. */ \
- DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action")
+ DPIF_SUPPORT_FIELD(bool, explicit_drop_action, "Explicit Drop action") \
+ \
+ /* True if the datapath supports balance_tcp optimization */ \
+ DPIF_SUPPORT_FIELD(bool, lb_output_action, "Optimized Balance TCP mode")
/* Stores the various features which the corresponding backer supports. */
@@ -342,6 +344,9 @@ struct ofproto_dpif {
struct guarded_list ams; /* Contains "struct ofproto_async_msgs"s. */
struct seq *ams_seq; /* For notifying 'ams' reception. */
uint64_t ams_seqno;
+
+ bool is_controller_connected; /* True if any controller admitted this
+ * switch connection. */
};
struct ofproto_dpif *ofproto_dpif_lookup_by_name(const char *name);
@@ -379,6 +384,11 @@ int ofproto_dpif_add_internal_flow(struct ofproto_dpif *,
struct rule **rulep);
int ofproto_dpif_delete_internal_flow(struct ofproto_dpif *, struct match *,
int priority);
+int ofproto_dpif_add_lb_output_buckets(struct ofproto_dpif *, uint32_t bond_id,
+ const ofp_port_t *member_map);
+int ofproto_dpif_delete_lb_output_buckets(struct ofproto_dpif *,
+ uint32_t bond_id);
+bool ovs_lb_output_action_supported(struct ofproto_dpif *);
bool ovs_native_tunneling_is_on(struct ofproto_dpif *);
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index afecb24cba090543a56f7d699f560fa702e759df..9ad2b71d23eb9e2f2084a1de06cb78a5bc11c28d 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -1225,7 +1225,7 @@ struct ofproto_class {
* not support LACP.
*/
int (*port_get_lacp_stats)(const struct ofport *port,
- struct lacp_slave_stats *stats);
+ struct lacp_member_stats *stats);
/* ## ----------------------- ## */
/* ## OpenFlow Rule Functions ## */
@@ -1707,11 +1707,11 @@ struct ofproto_class {
/* If 's' is nonnull, this function registers a "bundle" associated with
* client data pointer 'aux' in 'ofproto'. A bundle is the same concept as
- * a Port in OVSDB, that is, it consists of one or more "slave" devices
- * (Interfaces, in OVSDB) along with VLAN and LACP configuration and, if
- * there is more than one slave, a bonding configuration. If 'aux' is
- * already registered then this function updates its configuration to 's'.
- * Otherwise, this function registers a new bundle.
+ * a Port in OVSDB, that is, it consists of one or more "member"
+ * devices (Interfaces, in OVSDB) along with VLAN and LACP configuration
+ * and, if there is more than one member, a bonding configuration. If 'aux'
+ * is already registered then this function updates its configuration to
+ * 's'. Otherwise, this function registers a new bundle.
*
* If 's' is NULL, this function unregisters the bundle registered on
* 'ofproto' associated with client data pointer 'aux'. If no such bundle
diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man
index 925752343e87831c2f7124277983690a1d78fa8b..095afd57cc55e234832510384d4fbe6eb3883938 100644
--- a/ofproto/ofproto-unixctl.man
+++ b/ofproto/ofproto-unixctl.man
@@ -9,7 +9,7 @@ that may be used on \fBofproto/trace\fR.
.IP "\fBofproto/trace\fR [\fIoptions\fR] [\fIdpname\fR] \fIodp_flow\fR [\fIpacket\fR]
.IQ "\fBofproto/trace\fR [\fIoptions\fR] \fIbridge\fR \fIbr_flow\fR [\fIpacket\fR]]
.IQ "\fBofproto/trace\-packet\-out\fR [\fIoptions\fR] [\fIdpname\fR] \fIodp_flow\fR [\fIpacket\fR] \fIactions\fR"
-.IQ "\fBofproto/trace\-packet\-out\fR [\fIoptions\fR \fIbridge\fR \fIbr_flow\fR [\fIpacket\fR] \fIactions\fR"
+.IQ "\fBofproto/trace\-packet\-out\fR [\fIoptions\fR] \fIbridge\fR \fIbr_flow\fR [\fIpacket\fR] \fIactions\fR"
Traces the path of an imaginary packet through \fIswitch\fR and
reports the path that it took. The initial treatment of the packet
varies based on the command:
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 08830d837164e7e622f56754e1166a9a0acf324c..b91517cd250dba01a5f6a85eb66be05cc54f1f6f 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -815,6 +815,13 @@ ofproto_set_dp_desc(struct ofproto *p, const char *dp_desc)
p->dp_desc = nullable_xstrdup(dp_desc);
}
+void
+ofproto_set_serial_desc(struct ofproto *p, const char *serial_desc)
+{
+ free(p->serial_desc);
+ p->serial_desc = nullable_xstrdup(serial_desc);
+}
+
int
ofproto_set_snoops(struct ofproto *ofproto, const struct sset *snoops)
{
@@ -1384,7 +1391,8 @@ ofproto_port_is_lacp_current(struct ofproto *ofproto, ofp_port_t ofp_port)
}
int
-ofproto_port_get_lacp_stats(const struct ofport *port, struct lacp_slave_stats *stats)
+ofproto_port_get_lacp_stats(const struct ofport *port,
+ struct lacp_member_stats *stats)
{
struct ofproto *ofproto = port->ofproto;
int error;
@@ -1402,8 +1410,8 @@ ofproto_port_get_lacp_stats(const struct ofport *port, struct lacp_slave_stats *
/* Registers a "bundle" associated with client data pointer 'aux' in 'ofproto'.
* A bundle is the same concept as a Port in OVSDB, that is, it consists of one
- * or more "slave" devices (Interfaces, in OVSDB) along with a VLAN
- * configuration plus, if there is more than one slave, a bonding
+ * or more "member" devices (Interfaces, in OVSDB) along with a VLAN
+ * configuration plus, if there is more than one member, a bonding
* configuration.
*
* If 'aux' is already registered then this function updates its configuration
@@ -1601,13 +1609,13 @@ ofproto_rule_delete(struct ofproto *ofproto, struct rule *rule)
}
static void
-ofproto_flush__(struct ofproto *ofproto)
+ofproto_flush__(struct ofproto *ofproto, bool del)
OVS_EXCLUDED(ofproto_mutex)
{
struct oftable *table;
/* This will flush all datapath flows. */
- if (ofproto->ofproto_class->flush) {
+ if (del && ofproto->ofproto_class->flush) {
ofproto->ofproto_class->flush(ofproto);
}
@@ -1710,7 +1718,7 @@ ofproto_destroy(struct ofproto *p, bool del)
return;
}
- ofproto_flush__(p);
+ ofproto_flush__(p, del);
HMAP_FOR_EACH_SAFE (ofport, next_ofport, hmap_node, &p->ports) {
ofport_destroy(ofport, del);
}
@@ -1899,7 +1907,8 @@ ofproto_wait(struct ofproto *p)
bool
ofproto_is_alive(const struct ofproto *p)
{
- return connmgr_has_controllers(p->connmgr);
+ return (connmgr_has_controllers(p->connmgr)
+ && connmgr_is_any_controller_admitted(p->connmgr));
}
/* Adds some memory usage statistics for 'ofproto' into 'usage', for use with
@@ -2288,7 +2297,7 @@ void
ofproto_flush_flows(struct ofproto *ofproto)
{
COVERAGE_INC(ofproto_flush);
- ofproto_flush__(ofproto);
+ ofproto_flush__(ofproto, false);
connmgr_flushed(ofproto->connmgr);
}
@@ -3463,7 +3472,7 @@ handle_set_config(struct ofconn *ofconn, const struct ofp_header *oh)
}
if (ofconn_get_type(ofconn) != OFCONN_PRIMARY
- || ofconn_get_role(ofconn) != OFPCR12_ROLE_SLAVE) {
+ || ofconn_get_role(ofconn) != OFPCR12_ROLE_SECONDARY) {
enum ofputil_frag_handling cur = ofproto->frag_handling;
enum ofputil_frag_handling next = config.frag;
@@ -3488,16 +3497,16 @@ handle_set_config(struct ofconn *ofconn, const struct ofp_header *oh)
return 0;
}
-/* Checks whether 'ofconn' is a slave controller. If so, returns an OpenFlow
- * error message code for the caller to propagate upward. Otherwise, returns
- * 0.
+/* Checks whether 'ofconn' is a secondary controller. If so, returns an
+ * OpenFlow error message code for the caller to propagate upward. Otherwise,
+ * returns 0.
*
* The log message mentions 'msg_type'. */
static enum ofperr
-reject_slave_controller(struct ofconn *ofconn)
+reject_secondary_controller(struct ofconn *ofconn)
{
- if (ofconn_get_role(ofconn) == OFPCR12_ROLE_SLAVE) {
- return OFPERR_OFPBRC_IS_SLAVE;
+ if (ofconn_get_role(ofconn) == OFPCR12_ROLE_SECONDARY) {
+ return OFPERR_OFPBRC_IS_SECONDARY;
} else {
return 0;
}
@@ -3678,7 +3687,7 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh)
COVERAGE_INC(ofproto_packet_out);
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -3800,7 +3809,7 @@ handle_port_mod(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofport *port;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -6077,8 +6086,8 @@ ofproto_rule_send_removed(struct rule *rule)
fr.hard_timeout = rule->hard_timeout;
ovs_mutex_unlock(&rule->mutex);
rule->ofproto->ofproto_class->rule_get_stats(rule, &stats, &used);
- fr.packet_count += stats.n_packets;
- fr.byte_count += stats.n_bytes;
+ fr.packet_count = stats.n_packets;
+ fr.byte_count = stats.n_bytes;
connmgr_send_flow_removed(connmgr, &fr);
ovs_mutex_unlock(&ofproto_mutex);
}
@@ -6166,7 +6175,7 @@ handle_flow_mod(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofpbuf ofpacts;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -6229,7 +6238,7 @@ handle_role_request(struct ofconn *ofconn, const struct ofp_header *oh)
if (request.role != OFPCR12_ROLE_NOCHANGE) {
if (request.role != OFPCR12_ROLE_EQUAL
&& request.have_generation_id
- && !ofconn_set_master_election_id(ofconn, request.generation_id)) {
+ && !ofconn_set_primary_election_id(ofconn, request.generation_id)) {
return OFPERR_OFPRRFC_STALE;
}
@@ -6237,7 +6246,7 @@ handle_role_request(struct ofconn *ofconn, const struct ofp_header *oh)
}
reply.role = ofconn_get_role(ofconn);
- reply.have_generation_id = ofconn_get_master_election_id(
+ reply.have_generation_id = ofconn_get_primary_election_id(
ofconn, &reply.generation_id);
buf = ofputil_encode_role_reply(oh, &reply);
ofconn_send_reply(ofconn, buf);
@@ -6857,7 +6866,7 @@ handle_meter_mod(struct ofconn *ofconn, const struct ofp_header *oh)
uint32_t meter_id;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -7793,7 +7802,7 @@ handle_group_mod(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofproto_group_mod ogm;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -7914,7 +7923,7 @@ handle_table_mod(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofputil_table_mod tm;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -8287,7 +8296,7 @@ handle_bundle_control(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofpbuf *buf;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -8341,7 +8350,7 @@ handle_bundle_add(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofputil_bundle_add_msg badd;
enum ofptype type;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
@@ -8419,7 +8428,7 @@ handle_tlv_table_mod(struct ofconn *ofconn, const struct ofp_header *oh)
struct ofputil_tlv_table_mod ttm;
enum ofperr error;
- error = reject_slave_controller(ofconn);
+ error = reject_secondary_controller(ofconn);
if (error) {
return error;
}
diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index bac4a1c21f2e8b6fb85d156b333320915876de0f..b0262da2dff4f50f8daf67aa286547f4d7ae1778 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -351,6 +351,7 @@ void ofproto_set_threads(int n_handlers, int n_revalidators);
void ofproto_type_set_config(const char *type,
const struct smap *other_config);
void ofproto_set_dp_desc(struct ofproto *, const char *dp_desc);
+void ofproto_set_serial_desc(struct ofproto *p, const char *serial_desc);
int ofproto_set_snoops(struct ofproto *, const struct sset *snoops);
int ofproto_set_netflow(struct ofproto *,
const struct netflow_options *nf_options);
@@ -387,7 +388,8 @@ bool ofproto_port_bfd_status_changed(struct ofproto *, ofp_port_t ofp_port);
int ofproto_port_get_bfd_status(struct ofproto *, ofp_port_t ofp_port,
struct smap *);
int ofproto_port_is_lacp_current(struct ofproto *, ofp_port_t ofp_port);
-int ofproto_port_get_lacp_stats(const struct ofport *, struct lacp_slave_stats *);
+int ofproto_port_get_lacp_stats(const struct ofport *,
+ struct lacp_member_stats *);
int ofproto_port_set_stp(struct ofproto *, ofp_port_t ofp_port,
const struct ofproto_port_stp_settings *);
int ofproto_port_get_stp_status(struct ofproto *, ofp_port_t ofp_port,
@@ -440,8 +442,8 @@ enum port_priority_tags_mode {
struct ofproto_bundle_settings {
char *name; /* For use in log messages. */
- ofp_port_t *slaves; /* OpenFlow port numbers for slaves. */
- size_t n_slaves;
+ ofp_port_t *members; /* OpenFlow port numbers for members. */
+ size_t n_members;
enum port_vlan_mode vlan_mode; /* Selects mode for vlan and trunks */
uint16_t qinq_ethtype;
@@ -451,10 +453,10 @@ struct ofproto_bundle_settings {
enum port_priority_tags_mode use_priority_tags;
/* Use 802.1p tag for frames in VLAN 0? */
- struct bond_settings *bond; /* Must be nonnull iff if n_slaves > 1. */
+ struct bond_settings *bond; /* Must be nonnull iff if n_members > 1. */
struct lacp_settings *lacp; /* Nonnull to enable LACP. */
- struct lacp_slave_settings *lacp_slaves; /* Array of n_slaves elements. */
+ struct lacp_member_settings *lacp_members; /* Array of n_members elements. */
bool protected; /* Protected port mode */
};
diff --git a/ofproto/tunnel.c b/ofproto/tunnel.c
index 03f0ab76562ac0f15241cbad7e40c4b7606930a9..3455ed233b282927ef13451f119ad6c170eb62df 100644
--- a/ofproto/tunnel.c
+++ b/ofproto/tunnel.c
@@ -13,8 +13,6 @@
* limitations under the License. */
#include
-#include "tunnel.h"
-
#include
#include "byte-order.h"
diff --git a/ovsdb/TODO.rst b/ovsdb/TODO.rst
index fb4a50fa67814b0e6dc354d3dd8d31695a7571bb..fd0163f22515175591e330891bb3e07c9063306c 100644
--- a/ovsdb/TODO.rst
+++ b/ovsdb/TODO.rst
@@ -27,12 +27,8 @@ OVSDB Clustering To-do List
* Ephemeral columns.
-* Unit test snapshotting.
-
* Locks.
-* Investigate 100% CPU for long-running triggers
-
* Tons of unit tests.
* Increase exponential backoff cap. Introduce randomization.
@@ -47,10 +43,6 @@ OVSDB Clustering To-do List
* ACID (and CAP?) explanation.
- * Upgrading OVN to a clustered database
-
- * Installing OVN with a clustered database
-
* Overall diagram explaining the cluster and ovsdb protocol pieces
* Future work:
diff --git a/ovsdb/automake.mk b/ovsdb/automake.mk
index b895f42925ef4cb0f6245b9fa3eac1f0482fb62a..d60f3f4ec8cba0a8c7ffd2f093c63a58c3f19015 100644
--- a/ovsdb/automake.mk
+++ b/ovsdb/automake.mk
@@ -106,7 +106,7 @@ CLEANFILES += $(OVSIDL_BUILT)
# However, current versions of Automake seem to output all variable
# assignments before any targets, so it doesn't seem to be a problem,
# at least for now.
-$(OVSIDL_BUILT): ovsdb/ovsdb-idlc.in
+$(OVSIDL_BUILT): ovsdb/ovsdb-idlc.in python/ovs/dirs.py
# ovsdb-doc
EXTRA_DIST += ovsdb/ovsdb-doc
diff --git a/ovsdb/condition.c b/ovsdb/condition.c
index 692c0932864c6fc670350ec852dffb2a366a7694..388dd54a16cfd8027892defb9a55a24101a6d768 100644
--- a/ovsdb/condition.c
+++ b/ovsdb/condition.c
@@ -29,33 +29,6 @@
#include "table.h"
#include "util.h"
-struct ovsdb_error *
-ovsdb_function_from_string(const char *name, enum ovsdb_function *function)
-{
-#define OVSDB_FUNCTION(ENUM, NAME) \
- if (!strcmp(name, NAME)) { \
- *function = ENUM; \
- return NULL; \
- }
- OVSDB_FUNCTIONS;
-#undef OVSDB_FUNCTION
-
- return ovsdb_syntax_error(NULL, "unknown function",
- "No function named %s.", name);
-}
-
-const char *
-ovsdb_function_to_string(enum ovsdb_function function)
-{
- switch (function) {
-#define OVSDB_FUNCTION(ENUM, NAME) case ENUM: return NAME;
- OVSDB_FUNCTIONS;
-#undef OVSDB_FUNCTION
- }
-
- return NULL;
-}
-
static struct ovsdb_error *
ovsdb_clause_from_json(const struct ovsdb_table_schema *ts,
const struct json *json,
diff --git a/ovsdb/dot2pic b/ovsdb/dot2pic
index de67261ac621d07dcb89bd1aaaa5a6c88025439e..2f858e19d5b6300a2c22360e29a033a79e2990fa 100755
--- a/ovsdb/dot2pic
+++ b/ovsdb/dot2pic
@@ -1,6 +1,6 @@
-#! /usr/bin/env python
+#!/usr/bin/env python3
-# Copyright (c) 2009, 2010, 2011, 2013, 2017 Nicira, Inc.
+# Copyright (c) 2009, 2010, 2011, 2013, 2017, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index e45f3d6796a7110f2bd0bcae1ce4dce81ba84de9..3a0dad5d0a7bc7ed55e723deff4f8fcf694ae36b 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -712,7 +712,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser,
long long int timeout_msec = 0;
size_t i;
- timeout = ovsdb_parser_member(parser, "timeout", OP_NUMBER | OP_OPTIONAL);
+ timeout = ovsdb_parser_member(parser, "timeout", OP_INTEGER | OP_OPTIONAL);
where = ovsdb_parser_member(parser, "where", OP_ARRAY);
columns_json = ovsdb_parser_member(parser, "columns",
OP_ARRAY | OP_OPTIONAL);
@@ -730,7 +730,7 @@ ovsdb_execute_wait(struct ovsdb_execution *x, struct ovsdb_parser *parser,
}
if (!error) {
if (timeout) {
- timeout_msec = MIN(LLONG_MAX, json_real(timeout));
+ timeout_msec = json_integer(timeout);
if (timeout_msec < 0) {
error = ovsdb_syntax_error(timeout, NULL,
"timeout must be nonnegative");
diff --git a/ovsdb/log.c b/ovsdb/log.c
index c82a79c9ffeede9e9048e2c7cbe6f50f8743169d..4a28fa3db6dab4b6a53aa54f2516cae4ec200c71 100644
--- a/ovsdb/log.c
+++ b/ovsdb/log.c
@@ -212,7 +212,7 @@ ovsdb_log_open(const char *name, const char *magic,
if (!strcmp(name, "/dev/stdin") && open_mode == OVSDB_LOG_READ_ONLY) {
fd = dup(STDIN_FILENO);
} else {
- fd = open(name, flags, 0666);
+ fd = open(name, flags, 0660);
}
if (fd < 0) {
const char *op = (open_mode == OVSDB_LOG_CREATE_EXCL ? "create"
@@ -658,7 +658,16 @@ ovsdb_log_write_and_free(struct ovsdb_log *log, struct json *json)
struct ovsdb_error *
ovsdb_log_commit_block(struct ovsdb_log *file)
{
+#if (_POSIX_C_SOURCE >= 199309L || _XOPEN_SOURCE >= 500)
+ /* we do not check metadata - mtime, atime, anywhere, so we
+ * do not need to update it every time we sync the log.
+ * if the system supports it, the log update should be
+ * data only
+ */
+ if (file->stream && fdatasync(fileno(file->stream))) {
+#else
if (file->stream && fsync(fileno(file->stream))) {
+#endif
return ovsdb_io_error(errno, "%s: fsync failed", file->display_name);
}
return NULL;
diff --git a/ovsdb/monitor.c b/ovsdb/monitor.c
index 1c66b428e7e48044d45780519715167f97a659e4..532dedcb645ccee794f254c0321f3232430bebc1 100644
--- a/ovsdb/monitor.c
+++ b/ovsdb/monitor.c
@@ -31,7 +31,6 @@
#include "simap.h"
#include "hash.h"
#include "table.h"
-#include "hash.h"
#include "timeval.h"
#include "transaction.h"
#include "jsonrpc-server.h"
diff --git a/ovsdb/ovsdb-doc b/ovsdb/ovsdb-doc
index 406c293114657617a47ff10c7d47f7c41b24ad14..10d0c0c1343005d29d223bb42c4aba34f5f37efb 100755
--- a/ovsdb/ovsdb-doc
+++ b/ovsdb/ovsdb-doc
@@ -1,6 +1,6 @@
-#! /usr/bin/python
+#!/usr/bin/python3
-# Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
+# Copyright (c) 2010, 2011, 2012, 2013, 2014, 2015, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
index c285ee4b3c100b28ce4fa20aa9697ac5ade0d6bf..5914e08789e6a0c7b41b910b2dbf11404ca033dc 100755
--- a/ovsdb/ovsdb-idlc.in
+++ b/ovsdb/ovsdb-idlc.in
@@ -279,13 +279,21 @@ const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *);
(ROW) = %(s)s_track_get_next(ROW))
-/* Returns true if 'row' was inserted since the last change tracking reset. */
+/* Returns true if 'row' was inserted since the last change tracking reset.
+ *
+ * Note: This can only be used to test rows of tracked changes. This cannot be
+ * used to test if an uncommitted row that has been added locally is new or it
+ * may given unexpected results. */
static inline bool %(s)s_is_new(const struct %(s)s *row)
{
- return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_MODIFY) == 0;
+ return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_INSERT) > 0;
}
-/* Returns true if 'row' was deleted since the last change tracking reset. */
+/* Returns true if 'row' was deleted since the last change tracking reset.
+ *
+ * Note: This can only be used to test rows of tracked changes. This cannot be
+ * used to test if an uncommitted row that has been added locally has been
+ * deleted or it may given unexpected results. */
static inline bool %(s)s_is_deleted(const struct %(s)s *row)
{
return %(s)s_row_get_seqno(row, OVSDB_IDL_CHANGE_DELETE) > 0;
@@ -333,6 +341,14 @@ struct %(s)s *%(s)s_cursor_data(struct ovsdb_idl_cursor *);
void %(s)s_init(struct %(s)s *);
void %(s)s_delete(const struct %(s)s *);
struct %(s)s *%(s)s_insert(struct ovsdb_idl_txn *);
+
+/* Returns true if the tracked column referenced by 'enum %(s)s_column_id' of
+ * the row referenced by 'struct %(s)s *' was updated since the last change
+ * tracking reset.
+ *
+ * Note: This can only be used to test rows of tracked changes. This cannot be
+ * used to test if an uncommitted row that has been added locally has been
+ * updated or it may given unexpected results. */
bool %(s)s_is_updated(const struct %(s)s *, enum %(s)s_column_id);
''' % {'s': structName, 'S': structName.upper()})
@@ -389,7 +405,7 @@ bool %(s)s_is_updated(const struct %(s)s *, enum %(s)s_column_id);
args = ['%(type)s%(name)s' % member for member in members]
print('%s);' % ', '.join(args))
- print('void %(s)s_set_condition(struct ovsdb_idl *, struct ovsdb_idl_condition *);' % {'s': structName})
+ print('unsigned int %(s)s_set_condition(struct ovsdb_idl *, struct ovsdb_idl_condition *);' % {'s': structName})
print("")
@@ -1306,6 +1322,7 @@ struct %(s)s *
&%(s)s_columns[%(S)s_COL_%(C)s],
datum,
&%(p)stable_classes[%(P)sTABLE_%(T)s]);
+ free(datum);
}
""" % {'t': tableName,
'p': prefix,
@@ -1351,9 +1368,10 @@ struct %(s)s *
print(" datum.values = NULL;")
txn_write_func = "ovsdb_idl_index_write"
elif type.is_optional_pointer():
- print(" union ovsdb_atom *key = xmalloc(sizeof (union ovsdb_atom));")
+ print(" union ovsdb_atom *key;")
print()
print(" if (%s) {" % keyVar)
+ print(" key = xmalloc(sizeof (union ovsdb_atom));")
print(" datum.n = 1;")
print(" datum.keys = key;")
print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), keyVar))
@@ -1364,9 +1382,10 @@ struct %(s)s *
print(" datum.values = NULL;")
txn_write_func = "ovsdb_idl_index_write"
elif type.n_max == 1:
- print(" union ovsdb_atom *key = xmalloc(sizeof(union ovsdb_atom));")
+ print(" union ovsdb_atom *key;")
print()
print(" if (%s) {" % nVar)
+ print(" key = xmalloc(sizeof(union ovsdb_atom));")
print(" datum.n = 1;")
print(" datum.keys = key;")
print(" " + type.key.assign_c_value_casting_away_const("key->%s" % type.key.type.to_string(), "*" + keyVar))
@@ -1413,10 +1432,10 @@ struct %(s)s *
print("\nstruct ovsdb_idl_column %s_columns[%s_N_COLUMNS];" % (
structName, structName.upper()))
print("""
-void
+unsigned int
%(s)s_set_condition(struct ovsdb_idl *idl, struct ovsdb_idl_condition *condition)
{
- ovsdb_idl_set_condition(idl, &%(p)stable_%(tl)s, condition);
+ return ovsdb_idl_set_condition(idl, &%(p)stable_%(tl)s, condition);
}""" % {'p': prefix,
's': structName,
'tl': tableName.lower()})
diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in
index 338f3bc299d2c7a1dab7d7ecf8b373e8f2f311a6..5a7f3ba1301d7ffbd1aa002e81177a9fcf1fd2fe 100644
--- a/ovsdb/ovsdb-server.1.in
+++ b/ovsdb/ovsdb-server.1.in
@@ -206,6 +206,10 @@ but not before 100 commits have been added or 10 minutes have elapsed
since the last compaction. It will also be compacted automatically
after 24 hours since the last compaction if 100 commits were added
regardless of its size.
+.IP "\fBovsdb\-server/memory-trim-on-compaction\fR \fIon\fR|\fIoff\fR"
+If this option is \fIon\fR, ovsdb-server will try to reclaim all unused
+heap memory back to the system after each successful database compaction
+to reduce the memory consumption of the process. \fIoff\fR by default.
.
.IP "\fBovsdb\-server/reconnect\fR"
Makes \fBovsdb\-server\fR drop all of the JSON\-RPC
@@ -347,6 +351,11 @@ until the server has left the cluster.
.IP
Once a server leaves a cluster, it may never rejoin it. Instead,
create a new server and join it to the cluster.
+.IP
+Note that removing the server from the cluster alters the total size
+of the cluster. For example, if you remove two servers from a three
+server cluster, then the "cluster" becomes a single functioning server.
+This does not result in a three server cluster that lacks quorum.
.
.IP "\fBcluster/kick \fIdb server\fR"
Start graceful removal of \fIserver\fR from \fIdb\fR's cluster, like
@@ -372,6 +381,11 @@ This command must be executed on the leader. It initiates the change to the
cluster. To see if the change takes effect (committed), use
\fBcluster/status\fR to show the current setting. Once a change is committed,
it persists at server restarts.
+.IP "\fBcluster/set\-backlog\-threshold \fIdb\fR \fIn_msgs\fR \fIn_bytes\fR"
+Sets the backlog limits for \fIdb\fR's RAFT connections to a maximum of
+\fIn_msgs\fR messages or \fIn_bytes\fR bytes. If the backlog on one of the
+connections reaches the limit, it will be disconnected (and re-established).
+Values are checked only if the backlog contains more than 50 messages.
.
.so lib/vlog-unixctl.man
.so lib/memory-unixctl.man
diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c
index b6957d7300922893aafbe1f52e68a3a448059d92..0e60e2b87cdb99bec61eec678dc6fc6b2bdfa3a2 100644
--- a/ovsdb/ovsdb-server.c
+++ b/ovsdb/ovsdb-server.c
@@ -76,8 +76,12 @@ static char *ssl_protocols;
static char *ssl_ciphers;
static bool bootstrap_ca_cert;
+/* Try to reclaim heap memory back to system after DB compaction. */
+static bool trim_memory = false;
+
static unixctl_cb_func ovsdb_server_exit;
static unixctl_cb_func ovsdb_server_compact;
+static unixctl_cb_func ovsdb_server_memory_trim_on_compaction;
static unixctl_cb_func ovsdb_server_reconnect;
static unixctl_cb_func ovsdb_server_perf_counters_clear;
static unixctl_cb_func ovsdb_server_perf_counters_show;
@@ -90,6 +94,7 @@ static unixctl_cb_func ovsdb_server_set_active_ovsdb_server_probe_interval;
static unixctl_cb_func ovsdb_server_set_sync_exclude_tables;
static unixctl_cb_func ovsdb_server_get_sync_exclude_tables;
static unixctl_cb_func ovsdb_server_get_sync_status;
+static unixctl_cb_func ovsdb_server_get_db_storage_status;
struct server_config {
struct sset *remotes;
@@ -242,7 +247,7 @@ main_loop(struct server_config *config,
xasprintf("removing database %s because storage "
"disconnected permanently", node->name));
} else if (ovsdb_storage_should_snapshot(db->db->storage)) {
- log_and_free_error(ovsdb_snapshot(db->db));
+ log_and_free_error(ovsdb_snapshot(db->db, trim_memory));
}
}
if (run_process) {
@@ -409,6 +414,9 @@ main(int argc, char *argv[])
unixctl_command_register("exit", "", 0, 0, ovsdb_server_exit, &exiting);
unixctl_command_register("ovsdb-server/compact", "", 0, 1,
ovsdb_server_compact, &all_dbs);
+ unixctl_command_register("ovsdb-server/memory-trim-on-compaction",
+ "on|off", 1, 1,
+ ovsdb_server_memory_trim_on_compaction, NULL);
unixctl_command_register("ovsdb-server/reconnect", "", 0, 0,
ovsdb_server_reconnect, jsonrpc);
@@ -453,6 +461,9 @@ main(int argc, char *argv[])
unixctl_command_register("ovsdb-server/sync-status", "",
0, 0, ovsdb_server_get_sync_status,
&server_config);
+ unixctl_command_register("ovsdb-server/get-db-storage-status", "DB", 1, 1,
+ ovsdb_server_get_db_storage_status,
+ &server_config);
/* Simulate the behavior of OVS release prior to version 2.5 that
* does not support the monitor_cond method. */
@@ -540,7 +551,7 @@ close_db(struct server_config *config, struct db *db, char *comment)
static struct ovsdb_error * OVS_WARN_UNUSED_RESULT
parse_txn(struct server_config *config, struct db *db,
- struct ovsdb_schema *schema, const struct json *txn_json,
+ const struct ovsdb_schema *schema, const struct json *txn_json,
const struct uuid *txnid)
{
if (schema) {
@@ -548,7 +559,9 @@ parse_txn(struct server_config *config, struct db *db,
* (first grabbing its storage), then replace it with the new schema.
* The transaction must also include the replacement data.
*
- * Only clustered database schema changes go through this path. */
+ * Only clustered database schema changes and snapshot installs
+ * go through this path.
+ */
ovs_assert(txn_json);
ovs_assert(ovsdb_storage_is_clustered(db->db->storage));
@@ -558,13 +571,17 @@ parse_txn(struct server_config *config, struct db *db,
return error;
}
- ovsdb_jsonrpc_server_reconnect(
- config->jsonrpc, false,
- (db->db->schema
- ? xasprintf("database %s schema changed", db->db->name)
- : xasprintf("database %s connected to storage", db->db->name)));
+ if (!db->db->schema ||
+ strcmp(schema->version, db->db->schema->version)) {
+ ovsdb_jsonrpc_server_reconnect(
+ config->jsonrpc, false,
+ (db->db->schema
+ ? xasprintf("database %s schema changed", db->db->name)
+ : xasprintf("database %s connected to storage",
+ db->db->name)));
+ }
- ovsdb_replace(db->db, ovsdb_create(schema, NULL));
+ ovsdb_replace(db->db, ovsdb_create(ovsdb_schema_clone(schema), NULL));
/* Force update to schema in _Server database. */
db->row_uuid = UUID_ZERO;
@@ -613,6 +630,7 @@ read_db(struct server_config *config, struct db *db)
} else {
error = parse_txn(config, db, schema, txn_json, &txnid);
json_destroy(txn_json);
+ ovsdb_schema_destroy(schema);
if (error) {
break;
}
@@ -1380,7 +1398,7 @@ ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn,
{
struct server_config *config = config_;
- char *err = set_blacklist_tables(argv[1], true);
+ char *err = set_excluded_tables(argv[1], true);
if (!err) {
free(*config->sync_exclude);
*config->sync_exclude = xstrdup(argv[1]);
@@ -1392,7 +1410,7 @@ ovsdb_server_set_sync_exclude_tables(struct unixctl_conn *conn,
config->all_dbs, server_uuid,
*config->replication_probe_interval);
}
- err = set_blacklist_tables(argv[1], false);
+ err = set_excluded_tables(argv[1], false);
}
unixctl_command_reply(conn, err);
free(err);
@@ -1404,7 +1422,7 @@ ovsdb_server_get_sync_exclude_tables(struct unixctl_conn *conn,
const char *argv[] OVS_UNUSED,
void *arg_ OVS_UNUSED)
{
- char *reply = get_blacklist_tables();
+ char *reply = get_excluded_tables();
unixctl_command_reply(conn, reply);
free(reply);
}
@@ -1481,7 +1499,8 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc,
VLOG_INFO("compacting %s database by user request",
node->name);
- struct ovsdb_error *error = ovsdb_snapshot(db->db);
+ struct ovsdb_error *error = ovsdb_snapshot(db->db,
+ trim_memory);
if (error) {
char *s = ovsdb_error_to_string(error);
ds_put_format(&reply, "%s\n", s);
@@ -1504,6 +1523,35 @@ ovsdb_server_compact(struct unixctl_conn *conn, int argc,
ds_destroy(&reply);
}
+/* "ovsdb-server/memory-trim-on-compaction": controls whether ovsdb-server
+ * tries to reclaim heap memory back to system using malloc_trim() after
+ * compaction. */
+static void
+ovsdb_server_memory_trim_on_compaction(struct unixctl_conn *conn,
+ int argc OVS_UNUSED,
+ const char *argv[],
+ void *arg OVS_UNUSED)
+{
+ const char *command = argv[1];
+
+#if !HAVE_DECL_MALLOC_TRIM
+ unixctl_command_reply_error(conn, "memory trimming is not supported");
+ return;
+#endif
+
+ if (!strcmp(command, "on")) {
+ trim_memory = true;
+ } else if (!strcmp(command, "off")) {
+ trim_memory = false;
+ } else {
+ unixctl_command_reply_error(conn, "invalid argument");
+ return;
+ }
+ VLOG_INFO("memory trimming after compaction %s.",
+ trim_memory ? "enabled" : "disabled");
+ unixctl_command_reply(conn, NULL);
+}
+
/* "ovsdb-server/reconnect": makes ovsdb-server drop all of its JSON-RPC
* connections and reconnect. */
static void
@@ -1694,6 +1742,41 @@ ovsdb_server_get_sync_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
ds_destroy(&ds);
}
+static void
+ovsdb_server_get_db_storage_status(struct unixctl_conn *conn,
+ int argc OVS_UNUSED,
+ const char *argv[],
+ void *config_)
+{
+ struct server_config *config = config_;
+ struct shash_node *node;
+
+ node = shash_find(config->all_dbs, argv[1]);
+ if (!node) {
+ unixctl_command_reply_error(conn, "Failed to find the database.");
+ return;
+ }
+
+ struct db *db = node->data;
+
+ if (!db->db) {
+ unixctl_command_reply_error(conn, "Failed to find the database.");
+ return;
+ }
+
+ struct ds ds = DS_EMPTY_INITIALIZER;
+ char *error = ovsdb_storage_get_error(db->db->storage);
+
+ if (!error) {
+ ds_put_cstr(&ds, "status: ok");
+ } else {
+ ds_put_format(&ds, "status: %s", error);
+ free(error);
+ }
+ unixctl_command_reply(conn, ds_cstr(&ds));
+ ds_destroy(&ds);
+}
+
static void
parse_options(int argc, char *argv[],
struct sset *db_filenames, struct sset *remotes,
@@ -1807,7 +1890,7 @@ parse_options(int argc, char *argv[],
break;
case OPT_SYNC_EXCLUDE: {
- char *err = set_blacklist_tables(optarg, false);
+ char *err = set_excluded_tables(optarg, false);
if (err) {
ovs_fatal(0, "%s", err);
}
diff --git a/ovsdb/ovsdb-tool.c b/ovsdb/ovsdb-tool.c
index 91662cab840476153c0e0dfa1920fe855b2774b3..1b49b6fc855c4b8f9e9efba153e691b975ed78fa 100644
--- a/ovsdb/ovsdb-tool.c
+++ b/ovsdb/ovsdb-tool.c
@@ -720,6 +720,7 @@ print_db_changes(struct shash *tables, struct smap *names,
ds_init(&s);
ovsdb_datum_to_string(&datum, type, &s);
value_string = ds_steal_cstr(&s);
+ ovsdb_datum_destroy(&datum, type);
} else {
ovsdb_error_destroy(error);
}
@@ -1497,6 +1498,44 @@ do_check_cluster(struct ovs_cmdl_context *ctx)
}
}
+ /* Check for db consistency:
+ * The serverid must be in the servers list.
+ */
+
+ for (struct server *s = c.servers; s < &c.servers[c.n_servers]; s++) {
+ struct shash *servers_obj = json_object(s->snap->servers);
+ char *server_id = xasprintf(SID_FMT, SID_ARGS(&s->header.sid));
+ bool found = false;
+ const struct shash_node *node;
+
+ SHASH_FOR_EACH (node, servers_obj) {
+ if (!strncmp(server_id, node->name, SID_LEN)) {
+ found = true;
+ }
+ }
+
+ if (!found) {
+ for (struct raft_entry *e = s->entries;
+ e < &s->entries[s->log_end - s->log_start]; e++) {
+ if (e->servers == NULL) {
+ continue;
+ }
+ struct shash *log_servers_obj = json_object(e->servers);
+ SHASH_FOR_EACH (node, log_servers_obj) {
+ if (!strncmp(server_id, node->name, SID_LEN)) {
+ found = true;
+ }
+ }
+ }
+ }
+
+ if (!found) {
+ ovs_fatal(0, "%s: server %s not found in server list",
+ s->filename, server_id);
+ }
+ free(server_id);
+ }
+
/* Clean up. */
for (size_t i = 0; i < c.n_servers; i++) {
diff --git a/ovsdb/ovsdb.c b/ovsdb/ovsdb.c
index cfc96b32f8de90d7aefe05101922809c02b5110d..9042658fa8b8cd36efcd7640fddde6ebb0b1e573 100644
--- a/ovsdb/ovsdb.c
+++ b/ovsdb/ovsdb.c
@@ -17,6 +17,10 @@
#include "ovsdb.h"
+#if HAVE_DECL_MALLOC_TRIM
+#include
+#endif
+
#include "column.h"
#include "file.h"
#include "monitor.h"
@@ -414,7 +418,7 @@ ovsdb_create(struct ovsdb_schema *schema, struct ovsdb_storage *storage)
db->storage = storage;
ovs_list_init(&db->monitors);
ovs_list_init(&db->triggers);
- db->run_triggers = false;
+ db->run_triggers_now = db->run_triggers = false;
shash_init(&db->tables);
if (schema) {
@@ -502,6 +506,10 @@ ovsdb_get_memory_usage(const struct ovsdb *db, struct simap *usage)
}
simap_increase(usage, "cells", cells);
+
+ if (db->storage) {
+ ovsdb_storage_get_memory_usage(db->storage, usage);
+ }
}
struct ovsdb_table *
@@ -511,7 +519,7 @@ ovsdb_get_table(const struct ovsdb *db, const char *name)
}
struct ovsdb_error * OVS_WARN_UNUSED_RESULT
-ovsdb_snapshot(struct ovsdb *db)
+ovsdb_snapshot(struct ovsdb *db, bool trim_memory OVS_UNUSED)
{
if (!db->storage) {
return NULL;
@@ -523,6 +531,12 @@ ovsdb_snapshot(struct ovsdb *db)
schema, data);
json_destroy(schema);
json_destroy(data);
+
+#if HAVE_DECL_MALLOC_TRIM
+ if (!error && trim_memory) {
+ malloc_trim(0);
+ }
+#endif
return error;
}
diff --git a/ovsdb/ovsdb.h b/ovsdb/ovsdb.h
index 32e5333163a413dd368a05ef1903bc824c71a234..72e127c8478b4b3a83f47001f5ced1a0a70c76fb 100644
--- a/ovsdb/ovsdb.h
+++ b/ovsdb/ovsdb.h
@@ -83,6 +83,7 @@ struct ovsdb {
/* Triggers. */
struct ovs_list triggers; /* Contains "struct ovsdb_trigger"s. */
bool run_triggers;
+ bool run_triggers_now;
struct ovsdb_table *rbac_role;
@@ -111,7 +112,8 @@ struct json *ovsdb_execute(struct ovsdb *, const struct ovsdb_session *,
long long int elapsed_msec,
long long int *timeout_msec);
-struct ovsdb_error *ovsdb_snapshot(struct ovsdb *) OVS_WARN_UNUSED_RESULT;
+struct ovsdb_error *ovsdb_snapshot(struct ovsdb *, bool trim_memory)
+ OVS_WARN_UNUSED_RESULT;
void ovsdb_replace(struct ovsdb *dst, struct ovsdb *src);
diff --git a/ovsdb/raft-private.h b/ovsdb/raft-private.h
index ac8656d42fa99a782dbf5af1e59e405932da68b9..a69e37e5c2f2fa1ce14b369ef54a992153095e65 100644
--- a/ovsdb/raft-private.h
+++ b/ovsdb/raft-private.h
@@ -27,6 +27,7 @@
struct ds;
struct ovsdb_parser;
+struct raft_install_snapshot_request;
/* Formatting server IDs and cluster IDs for use in human-readable logs. Do
* not use these in cases where the whole server or cluster ID is needed; use
@@ -83,9 +84,14 @@ struct raft_server {
bool replied; /* Reply to append_request was received from this
node during current election_timeout interval.
*/
+ /* install_snapshot_request has been sent, but there is no response yet. */
+ bool install_snapshot_request_in_progress;
+
/* For use in adding and removing servers: */
struct uuid requester_sid; /* Nonzero if requested via RPC. */
struct unixctl_conn *requester_conn; /* Only if requested via unixctl. */
+
+ long long int last_msg_ts; /* Last received msg timestamp in ms. */
};
void raft_server_destroy(struct raft_server *);
diff --git a/ovsdb/raft-rpc.c b/ovsdb/raft-rpc.c
index 18c83fe9c24e1f6e331c5ed584ad86b12464cde2..dd14d81091fc51da04de242c9835c9bd556d067d 100644
--- a/ovsdb/raft-rpc.c
+++ b/ovsdb/raft-rpc.c
@@ -544,8 +544,8 @@ raft_format_install_snapshot_request(
ds_put_format(s, " last_index=%"PRIu64, rq->last_index);
ds_put_format(s, " last_term=%"PRIu64, rq->last_term);
ds_put_format(s, " last_eid="UUID_FMT, UUID_ARGS(&rq->last_eid));
- ds_put_cstr(s, " last_servers=");
ds_put_format(s, " election_timer=%"PRIu64, rq->election_timer);
+ ds_put_cstr(s, " last_servers=");
struct hmap servers;
struct ovsdb_error *error =
diff --git a/ovsdb/raft.c b/ovsdb/raft.c
index 4789bc4f22e83fa0088e227cbd0d2321d974df6c..ea91d1fdbafbd6d2927aebf76e8481a631a646b1 100644
--- a/ovsdb/raft.c
+++ b/ovsdb/raft.c
@@ -36,6 +36,7 @@
#include "ovsdb/log.h"
#include "raft-rpc.h"
#include "random.h"
+#include "simap.h"
#include "socket-util.h"
#include "stream.h"
#include "timeval.h"
@@ -73,7 +74,8 @@ enum raft_failure_test {
FT_CRASH_BEFORE_SEND_EXEC_REQ,
FT_CRASH_AFTER_SEND_EXEC_REQ,
FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE,
- FT_DELAY_ELECTION
+ FT_DELAY_ELECTION,
+ FT_DONT_SEND_VOTE_REQUEST
};
static enum raft_failure_test failure_test;
@@ -262,6 +264,12 @@ struct raft {
long long int election_base; /* Time of last heartbeat from leader. */
long long int election_timeout; /* Time at which we start an election. */
+ long long int election_start; /* Start election time. */
+ long long int election_won; /* Time of election completion. */
+ bool leadership_transfer; /* Was the leadership transferred? */
+
+ unsigned int n_disconnections;
+
/* Used for joining a cluster. */
bool joining; /* Attempting to join the cluster? */
struct sset remote_addresses; /* Addresses to try to find other servers. */
@@ -298,6 +306,17 @@ struct raft {
bool had_leader; /* There has been leader elected since last
election initiated. This is to help setting
candidate_retrying. */
+
+ /* For all. */
+ bool ever_had_leader; /* There has been leader elected since the raft
+ is initialized, meaning it is ever
+ connected. */
+
+ /* Connection backlog limits. */
+#define DEFAULT_MAX_BACKLOG_N_MSGS 500
+#define DEFAULT_MAX_BACKLOG_N_BYTES UINT32_MAX
+ size_t conn_backlog_max_n_msgs; /* Number of messages. */
+ size_t conn_backlog_max_n_bytes; /* Number of bytes. */
};
/* All Raft structures. */
@@ -405,6 +424,9 @@ raft_alloc(void)
raft->election_timer = ELECTION_BASE_MSEC;
+ raft->conn_backlog_max_n_msgs = DEFAULT_MAX_BACKLOG_N_MSGS;
+ raft->conn_backlog_max_n_bytes = DEFAULT_MAX_BACKLOG_N_BYTES;
+
return raft;
}
@@ -932,6 +954,9 @@ raft_add_conn(struct raft *raft, struct jsonrpc_session *js,
&conn->sid);
conn->incoming = incoming;
conn->js_seqno = jsonrpc_session_get_seqno(conn->js);
+ jsonrpc_session_set_probe_interval(js, 0);
+ jsonrpc_session_set_backlog_threshold(js, raft->conn_backlog_max_n_msgs,
+ raft->conn_backlog_max_n_bytes);
}
/* Starts the local server in an existing Raft cluster, using the local copy of
@@ -1007,6 +1032,23 @@ raft_get_sid(const struct raft *raft)
return &raft->sid;
}
+/* Adds memory consumption info to 'usage' for later use by memory_report(). */
+void
+raft_get_memory_usage(const struct raft *raft, struct simap *usage)
+{
+ struct raft_conn *conn;
+ uint64_t backlog = 0;
+ int cnt = 0;
+
+ LIST_FOR_EACH (conn, list_node, &raft->conns) {
+ backlog += jsonrpc_session_get_backlog(conn->js);
+ cnt++;
+ }
+ simap_increase(usage, "raft-backlog-kB", backlog / 1000);
+ simap_increase(usage, "raft-connections", cnt);
+ simap_increase(usage, "raft-log", raft->log_end - raft->log_start);
+}
+
/* Returns true if 'raft' has completed joining its cluster, has not left or
* initiated leaving the cluster, does not have failed disk storage, and is
* apparently connected to the leader in a healthy way (or is itself the
@@ -1020,12 +1062,22 @@ raft_get_sid(const struct raft *raft)
bool
raft_is_connected(const struct raft *raft)
{
+ static bool last_state = false;
bool ret = (!raft->candidate_retrying
&& !raft->joining
&& !raft->leaving
&& !raft->left
- && !raft->failed);
- VLOG_DBG("raft_is_connected: %s\n", ret? "true": "false");
+ && !raft->failed
+ && raft->ever_had_leader);
+
+ if (!ret) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ VLOG_DBG_RL(&rl, "raft_is_connected: false");
+ } else if (!last_state) {
+ VLOG_DBG("raft_is_connected: true");
+ }
+ last_state = ret;
+
return ret;
}
@@ -1397,8 +1449,19 @@ raft_conn_run(struct raft *raft, struct raft_conn *conn)
jsonrpc_session_run(conn->js);
unsigned int new_seqno = jsonrpc_session_get_seqno(conn->js);
- bool just_connected = (new_seqno != conn->js_seqno
+ bool reconnected = new_seqno != conn->js_seqno;
+ bool just_connected = (reconnected
&& jsonrpc_session_is_connected(conn->js));
+
+ if (reconnected) {
+ /* Clear 'install_snapshot_request_in_progress' since it might not
+ * reach the destination or server was restarted. */
+ struct raft_server *server = raft_find_server(raft, &conn->sid);
+ if (server) {
+ server->install_snapshot_request_in_progress = false;
+ }
+ }
+
conn->js_seqno = new_seqno;
if (just_connected) {
if (raft->joining) {
@@ -1641,6 +1704,8 @@ raft_start_election(struct raft *raft, bool leadership_transfer)
}
ovs_assert(raft->role != RAFT_LEADER);
+
+ raft->leader_sid = UUID_ZERO;
raft->role = RAFT_CANDIDATE;
/* If there was no leader elected since last election, we know we are
* retrying now. */
@@ -1649,6 +1714,10 @@ raft_start_election(struct raft *raft, bool leadership_transfer)
raft->n_votes = 0;
+ raft->election_start = time_msec();
+ raft->election_won = 0;
+ raft->leadership_transfer = leadership_transfer;
+
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
if (!VLOG_DROP_INFO(&rl)) {
long long int now = time_msec();
@@ -1684,7 +1753,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer)
.leadership_transfer = leadership_transfer,
},
};
- raft_send(raft, &rq);
+ if (failure_test != FT_DONT_SEND_VOTE_REQUEST) {
+ raft_send(raft, &rq);
+ }
}
/* Vote for ourselves. */
@@ -1796,6 +1867,7 @@ raft_run(struct raft *raft)
struct raft_conn *next;
LIST_FOR_EACH_SAFE (conn, next, list_node, &raft->conns) {
if (!raft_conn_should_stay_open(raft, conn)) {
+ raft->n_disconnections++;
raft_conn_close(conn);
}
}
@@ -2513,13 +2585,14 @@ raft_server_init_leader(struct raft *raft, struct raft_server *s)
s->match_index = 0;
s->phase = RAFT_PHASE_STABLE;
s->replied = false;
+ s->install_snapshot_request_in_progress = false;
}
static void
raft_set_leader(struct raft *raft, const struct uuid *sid)
{
raft->leader_sid = *sid;
- raft->had_leader = true;
+ raft->ever_had_leader = raft->had_leader = true;
raft->candidate_retrying = false;
}
@@ -2535,6 +2608,7 @@ raft_become_leader(struct raft *raft)
ovs_assert(raft->role != RAFT_LEADER);
raft->role = RAFT_LEADER;
+ raft->election_won = time_msec();
raft_set_leader(raft, &raft->sid);
raft_reset_election_timer(raft);
raft_reset_ping_timer(raft);
@@ -2547,7 +2621,6 @@ raft_become_leader(struct raft *raft)
raft->election_timer_new = 0;
raft_update_our_match_index(raft, raft->log_end - 1);
- raft_send_heartbeats(raft);
/* Write the fact that we are leader to the log. This is not used by the
* algorithm (although it could be, for quick restart), but it is used for
@@ -2960,6 +3033,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid)
};
ignore(ovsdb_log_write_and_free(raft->log, raft_record_to_json(&r)));
}
+ if (raft->role == RAFT_CANDIDATE) {
+ /* Section 3.4: While waiting for votes, a candidate may
+ * receive an AppendEntries RPC from another server claiming to
+ * be leader. If the leader’s term (included in its RPC) is at
+ * least as large as the candidate’s current term, then the
+ * candidate recognizes the leader as legitimate and returns to
+ * follower state. */
+ raft->role = RAFT_FOLLOWER;
+ }
return true;
}
@@ -3260,7 +3342,20 @@ raft_send_install_snapshot_request(struct raft *raft,
.election_timer = raft->election_timer, /* use latest value */
}
};
- raft_send(raft, &rpc);
+
+ if (s->install_snapshot_request_in_progress) {
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+
+ VLOG_INFO_RL(&rl, "not sending snapshot to server %s, "
+ "already in progress", s->nickname);
+ return;
+ }
+
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+ VLOG_INFO_RL(&rl, "sending snapshot to server %s, %"PRIu64":%"PRIu64".",
+ s->nickname, raft->term, raft->log_start - 1);
+ CONST_CAST(struct raft_server *, s)->install_snapshot_request_in_progress
+ = raft_send(raft, &rpc);
}
static void
@@ -3339,7 +3434,7 @@ raft_handle_append_reply(struct raft *raft,
raft_send_install_snapshot_request(raft, s, NULL);
} else if (s->next_index < raft->log_end) {
/* Case 2. */
- raft_send_append_request(raft, s, 1, NULL);
+ raft_send_append_request(raft, s, raft->log_end - s->next_index, NULL);
} else {
/* Case 3. */
if (s->phase == RAFT_PHASE_CATCHUP) {
@@ -3631,6 +3726,7 @@ raft_handle_add_server_request(struct raft *raft,
s->requester_sid = rq->common.sid;
s->requester_conn = NULL;
s->phase = RAFT_PHASE_CATCHUP;
+ s->last_msg_ts = time_msec();
/* Start sending the log. If this is the first time we've tried to add
* this server, then this will quickly degenerate into an InstallSnapshot
@@ -3913,7 +4009,7 @@ raft_handle_install_snapshot_request__(
struct ovsdb_error *error = raft_save_snapshot(raft, new_log_start,
&new_snapshot);
if (error) {
- char *error_s = ovsdb_error_to_string(error);
+ char *error_s = ovsdb_error_to_string_free(error);
VLOG_WARN("could not save snapshot: %s", error_s);
free(error_s);
return false;
@@ -3977,6 +4073,8 @@ raft_handle_install_snapshot_reply(
}
}
+ s->install_snapshot_request_in_progress = false;
+
if (rpy->last_index != raft->log_start - 1 ||
rpy->last_term != raft->snap.term) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
@@ -3992,8 +4090,9 @@ raft_handle_install_snapshot_reply(
VLOG_INFO_RL(&rl, "cluster "CID_FMT": installed snapshot on server %s "
" up to %"PRIu64":%"PRIu64, CID_ARGS(&raft->cid),
s->nickname, rpy->last_term, rpy->last_index);
- s->next_index = raft->log_end;
- raft_send_append_request(raft, s, 0, "snapshot installed");
+ s->next_index = raft->log_start;
+ raft_send_append_request(raft, s, raft->log_end - s->next_index,
+ "snapshot installed");
}
/* Returns true if 'raft' has grown enough since the last snapshot that
@@ -4143,9 +4242,7 @@ raft_handle_execute_command_request__(
cmd->sid = rq->common.sid;
enum raft_command_status status = cmd->status;
- if (status != RAFT_CMD_INCOMPLETE) {
- raft_command_unref(cmd);
- }
+ raft_command_unref(cmd);
return status;
}
@@ -4189,6 +4286,11 @@ raft_handle_execute_command_reply(
static void
raft_handle_rpc(struct raft *raft, const union raft_rpc *rpc)
{
+ struct raft_server *s = raft_find_server(raft, &rpc->common.sid);
+ if (s) {
+ s->last_msg_ts = time_msec();
+ }
+
uint64_t term = raft_rpc_get_term(rpc);
if (term
&& !raft_should_suppress_disruptive_server(raft, rpc)
@@ -4401,6 +4503,17 @@ raft_unixctl_status(struct unixctl_conn *conn,
raft_put_sid("Vote", &raft->vote, raft, &s);
ds_put_char(&s, '\n');
+ if (raft->election_start) {
+ ds_put_format(&s,
+ "Last Election started %"PRIu64" ms ago, reason: %s\n",
+ (uint64_t) (time_msec() - raft->election_start),
+ raft->leadership_transfer
+ ? "leadership_transfer" : "timeout");
+ }
+ if (raft->election_won) {
+ ds_put_format(&s, "Last Election won: %"PRIu64" ms ago\n",
+ (uint64_t) (time_msec() - raft->election_won));
+ }
ds_put_format(&s, "Election timer: %"PRIu64, raft->election_timer);
if (raft->role == RAFT_LEADER && raft->election_timer_new) {
ds_put_format(&s, " (changing to %"PRIu64")",
@@ -4428,6 +4541,8 @@ raft_unixctl_status(struct unixctl_conn *conn,
}
ds_put_char(&s, '\n');
+ ds_put_format(&s, "Disconnections: %u\n", raft->n_disconnections);
+
ds_put_cstr(&s, "Servers:\n");
struct raft_server *server;
HMAP_FOR_EACH (server, hmap_node, &raft->servers) {
@@ -4452,6 +4567,10 @@ raft_unixctl_status(struct unixctl_conn *conn,
ds_put_format(&s, " next_index=%"PRIu64" match_index=%"PRIu64,
server->next_index, server->match_index);
}
+ if (server->last_msg_ts) {
+ ds_put_format(&s, " last msg %"PRIu64" ms ago",
+ (uint64_t) (time_msec() - server->last_msg_ts));
+ }
ds_put_char(&s, '\n');
}
@@ -4639,6 +4758,42 @@ raft_unixctl_change_election_timer(struct unixctl_conn *conn,
unixctl_command_reply(conn, "change of election timer initiated.");
}
+static void
+raft_unixctl_set_backlog_threshold(struct unixctl_conn *conn,
+ int argc OVS_UNUSED, const char *argv[],
+ void *aux OVS_UNUSED)
+{
+ const char *cluster_name = argv[1];
+ unsigned long long n_msgs, n_bytes;
+ struct raft_conn *r_conn;
+
+ struct raft *raft = raft_lookup_by_name(cluster_name);
+ if (!raft) {
+ unixctl_command_reply_error(conn, "unknown cluster");
+ return;
+ }
+
+ if (!str_to_ullong(argv[2], 10, &n_msgs)
+ || !str_to_ullong(argv[3], 10, &n_bytes)) {
+ unixctl_command_reply_error(conn, "invalid argument");
+ return;
+ }
+
+ if (n_msgs < 50 || n_msgs > SIZE_MAX || n_bytes > SIZE_MAX) {
+ unixctl_command_reply_error(conn, "values out of range");
+ return;
+ }
+
+ raft->conn_backlog_max_n_msgs = n_msgs;
+ raft->conn_backlog_max_n_bytes = n_bytes;
+
+ LIST_FOR_EACH (r_conn, list_node, &raft->conns) {
+ jsonrpc_session_set_backlog_threshold(r_conn->js, n_msgs, n_bytes);
+ }
+
+ unixctl_command_reply(conn, NULL);
+}
+
static void
raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
int argc OVS_UNUSED, const char *argv[],
@@ -4667,6 +4822,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
raft_reset_election_timer(raft);
}
}
+ } else if (!strcmp(test, "dont-send-vote-request")) {
+ failure_test = FT_DONT_SEND_VOTE_REQUEST;
} else if (!strcmp(test, "clear")) {
failure_test = FT_NO_TEST;
unixctl_command_reply(conn, "test dismissed");
@@ -4697,6 +4854,9 @@ raft_init(void)
raft_unixctl_kick, NULL);
unixctl_command_register("cluster/change-election-timer", "DB TIME", 2, 2,
raft_unixctl_change_election_timer, NULL);
+ unixctl_command_register("cluster/set-backlog-threshold",
+ "DB N_MSGS N_BYTES", 3, 3,
+ raft_unixctl_set_backlog_threshold, NULL);
unixctl_command_register("cluster/failure-test", "FAILURE SCENARIO", 1, 1,
raft_unixctl_failure_test, NULL);
ovsthread_once_done(&once);
diff --git a/ovsdb/raft.h b/ovsdb/raft.h
index 3d448995af21d2ec65278472f6b16e31797f68c0..99d5307e54684ee292d18f569cd15ffb154deca2 100644
--- a/ovsdb/raft.h
+++ b/ovsdb/raft.h
@@ -67,6 +67,7 @@
struct json;
struct ovsdb_log;
struct raft;
+struct simap;
struct sset;
#define RAFT_MAGIC "CLUSTER"
@@ -113,6 +114,7 @@ const struct uuid *raft_get_cid(const struct raft *);
const struct uuid *raft_get_sid(const struct raft *);
bool raft_is_connected(const struct raft *);
bool raft_is_leader(const struct raft *);
+void raft_get_memory_usage(const struct raft *, struct simap *usage);
/* Joining a cluster. */
bool raft_is_joining(const struct raft *);
diff --git a/ovsdb/rbac.c b/ovsdb/rbac.c
index b85ca9a93969f381285101af087cf893540830d5..2986027c90cc6a95634eb2d8eb2a32cbee2885ce 100644
--- a/ovsdb/rbac.c
+++ b/ovsdb/rbac.c
@@ -21,7 +21,6 @@
#include "column.h"
#include "condition.h"
-#include "condition.h"
#include "file.h"
#include "mutation.h"
#include "openvswitch/vlog.h"
diff --git a/ovsdb/replication.c b/ovsdb/replication.c
index cbbce64dfbf529421960ebf36587d1bb6aba4ef5..bb1bd4250bd541170e5a9bd3ad1d7e352f7040af 100644
--- a/ovsdb/replication.c
+++ b/ovsdb/replication.c
@@ -68,11 +68,11 @@ static struct ovsdb_error *execute_update(struct ovsdb_txn *txn,
struct json *new);
/* Maps from db name to sset of table names. */
-static struct shash blacklist_tables = SHASH_INITIALIZER(&blacklist_tables);
+static struct shash excluded_tables = SHASH_INITIALIZER(&excluded_tables);
-static void blacklist_tables_clear(void);
-static void blacklist_tables_add(const char *database, const char *table);
-static bool blacklist_tables_find(const char *database, const char* table);
+static void excluded_tables_clear(void);
+static void excluded_tables_add(const char *database, const char *table);
+static bool excluded_tables_find(const char *database, const char *table);
/* Keep track of request IDs of all outstanding OVSDB requests. */
@@ -131,7 +131,7 @@ replication_init(const char *sync_from_, const char *exclude_tables,
sync_from = xstrdup(sync_from_);
/* Caller should have verified that the 'exclude_tables' is
* parseable. An error here is unexpected. */
- ovs_assert(!set_blacklist_tables(exclude_tables, false));
+ ovs_assert(!set_excluded_tables(exclude_tables, false));
replication_dbs_destroy();
@@ -407,38 +407,38 @@ replication_wait(void)
}
}
-/* Parse 'blacklist' to rebuild 'blacklist_tables'. If 'dryrun' is false, the
- * current black list tables will be wiped out, regardless of whether
- * 'blacklist' can be parsed. If 'dryrun' is true, only parses 'blacklist' and
- * reports any errors, without modifying the blacklist.
+/* Parse 'excluded' to rebuild 'excluded_tables'. If 'dryrun' is false, the
+ * current set of excluded tables will be wiped out, regardless of whether
+ * 'excluded' can be parsed. If 'dryrun' is true, only parses 'excluded' and
+ * reports any errors, without modifying the list of exclusions.
*
* On error, returns the error string, which the caller is
* responsible for freeing. Returns NULL otherwise. */
char * OVS_WARN_UNUSED_RESULT
-set_blacklist_tables(const char *blacklist, bool dryrun)
+set_excluded_tables(const char *excluded, bool dryrun)
{
struct sset set = SSET_INITIALIZER(&set);
char *err = NULL;
- if (blacklist) {
+ if (excluded) {
const char *longname;
if (!dryrun) {
/* Can only add to an empty shash. */
- blacklist_tables_clear();
+ excluded_tables_clear();
}
- sset_from_delimited_string(&set, blacklist, " ,");
+ sset_from_delimited_string(&set, excluded, " ,");
SSET_FOR_EACH (longname, &set) {
char *database = xstrdup(longname), *table = NULL;
strtok_r(database, ":", &table);
if (table && !dryrun) {
- blacklist_tables_add(database, table);
+ excluded_tables_add(database, table);
}
free(database);
if (!table) {
- err = xasprintf("Can't parse black list table: %s", longname);
+ err = xasprintf("Can't parse excluded table: %s", longname);
goto done;
}
}
@@ -447,19 +447,19 @@ set_blacklist_tables(const char *blacklist, bool dryrun)
done:
sset_destroy(&set);
if (err && !dryrun) {
- /* On error, destroy the partially built 'blacklist_tables'. */
- blacklist_tables_clear();
+ /* On error, destroy the partially built 'excluded_tables'. */
+ excluded_tables_clear();
}
return err;
}
char * OVS_WARN_UNUSED_RESULT
-get_blacklist_tables(void)
+get_excluded_tables(void)
{
struct shash_node *node;
struct sset set = SSET_INITIALIZER(&set);
- SHASH_FOR_EACH (node, &blacklist_tables) {
+ SHASH_FOR_EACH (node, &excluded_tables) {
const char *database = node->name;
const char *table;
struct sset *tables = node->data;
@@ -489,35 +489,35 @@ get_blacklist_tables(void)
}
static void
-blacklist_tables_clear(void)
+excluded_tables_clear(void)
{
struct shash_node *node;
- SHASH_FOR_EACH (node, &blacklist_tables) {
+ SHASH_FOR_EACH (node, &excluded_tables) {
struct sset *tables = node->data;
sset_destroy(tables);
}
- shash_clear_free_data(&blacklist_tables);
+ shash_clear_free_data(&excluded_tables);
}
static void
-blacklist_tables_add(const char *database, const char *table)
+excluded_tables_add(const char *database, const char *table)
{
- struct sset *tables = shash_find_data(&blacklist_tables, database);
+ struct sset *tables = shash_find_data(&excluded_tables, database);
if (!tables) {
tables = xmalloc(sizeof *tables);
sset_init(tables);
- shash_add(&blacklist_tables, database, tables);
+ shash_add(&excluded_tables, database, tables);
}
sset_add(tables, table);
}
static bool
-blacklist_tables_find(const char *database, const char *table)
+excluded_tables_find(const char *database, const char *table)
{
- struct sset *tables = shash_find_data(&blacklist_tables, database);
+ struct sset *tables = shash_find_data(&excluded_tables, database);
return tables && sset_contains(tables, table);
}
@@ -531,8 +531,8 @@ disconnect_active_server(void)
void
replication_destroy(void)
{
- blacklist_tables_clear();
- shash_destroy(&blacklist_tables);
+ excluded_tables_clear();
+ shash_destroy(&excluded_tables);
if (sync_from) {
free(sync_from);
@@ -558,8 +558,8 @@ reset_database(struct ovsdb *db)
struct shash_node *table_node;
SHASH_FOR_EACH (table_node, &db->tables) {
- /* Delete all rows if the table is not blacklisted. */
- if (!blacklist_tables_find(db->schema->name, table_node->name)) {
+ /* Delete all rows if the table is not excluded. */
+ if (!excluded_tables_find(db->schema->name, table_node->name)) {
struct ovsdb_table *table = table_node->data;
struct ovsdb_row *row, *next;
HMAP_FOR_EACH_SAFE (row, next, hmap_node, &table->rows) {
@@ -572,7 +572,7 @@ reset_database(struct ovsdb *db)
}
/* Create a monitor request for 'db'. The monitor request will include
- * any tables from 'blacklisted_tables'
+ * any tables from 'excluded_tables'
*
* Caller is responsible for disposing 'request'.
*/
@@ -590,8 +590,8 @@ create_monitor_request(struct ovsdb_schema *schema)
for (int j = 0; j < n; j++) {
struct ovsdb_table_schema *table = nodes[j]->data;
- /* Monitor all tables not blacklisted. */
- if (!blacklist_tables_find(db_name, table->name)) {
+ /* Monitor all tables not excluded. */
+ if (!excluded_tables_find(db_name, table->name)) {
add_monitored_table(table, monitor_request);
}
}
@@ -914,10 +914,10 @@ replication_status(void)
}
ds_chomp(&ds, ',');
- if (!shash_is_empty(&blacklist_tables)) {
+ if (!shash_is_empty(&excluded_tables)) {
ds_put_char(&ds, '\n');
ds_put_cstr(&ds, "exclude: ");
- ds_put_and_free_cstr(&ds, get_blacklist_tables());
+ ds_put_and_free_cstr(&ds, get_excluded_tables());
}
break;
}
diff --git a/ovsdb/replication.h b/ovsdb/replication.h
index c45f33e2629fcbbf9e1ad0889f27b376633ac4fd..6d1be820f3d18d712aa842c955315623b18e5107 100644
--- a/ovsdb/replication.h
+++ b/ovsdb/replication.h
@@ -39,9 +39,9 @@ struct ovsdb;
* replication_get_last_error() should be call within the main loop
* whenever OVSDB server runs in the backup mode.
*
- * - set_blacklist_tables(), get_blacklist_tables(),
- * disconnect_active_server() and replication_usage() are support functions
- * used mainly by uinxctl commands.
+ * - set_excluded_tables(), get_excluded_tables(), disconnect_active_server()
+ * and replication_usage() are support functions used mainly by unixctl
+ * commands.
*/
#define REPLICATION_DEFAULT_PROBE_INTERVAL 60000
@@ -58,9 +58,9 @@ int replication_get_last_error(void);
char *replication_status(void);
void replication_set_probe_interval(int);
-char *set_blacklist_tables(const char *blacklist, bool dryrun)
+char *set_excluded_tables(const char *excluded, bool dryrun)
OVS_WARN_UNUSED_RESULT;
-char *get_blacklist_tables(void) OVS_WARN_UNUSED_RESULT;
+char *get_excluded_tables(void) OVS_WARN_UNUSED_RESULT;
void disconnect_active_server(void);
#endif /* ovsdb/replication.h */
diff --git a/ovsdb/storage.c b/ovsdb/storage.c
index e26252b066f59088a01e90da061f819b5416f94f..f662e9056691ebc5fb74fc83281bb0a6edda834b 100644
--- a/ovsdb/storage.c
+++ b/ovsdb/storage.c
@@ -26,6 +26,7 @@
#include "ovsdb.h"
#include "raft.h"
#include "random.h"
+#include "simap.h"
#include "timeval.h"
#include "util.h"
@@ -188,6 +189,25 @@ ovsdb_storage_get_applied_index(const struct ovsdb_storage *storage)
return storage->raft ? raft_get_applied_index(storage->raft) : 0;
}
+void
+ovsdb_storage_get_memory_usage(const struct ovsdb_storage *storage,
+ struct simap *usage)
+{
+ if (storage->raft) {
+ raft_get_memory_usage(storage->raft, usage);
+ }
+}
+
+char *
+ovsdb_storage_get_error(const struct ovsdb_storage *storage)
+{
+ if (storage->error) {
+ return ovsdb_error_to_string(storage->error);
+ }
+
+ return NULL;
+}
+
void
ovsdb_storage_run(struct ovsdb_storage *storage)
{
diff --git a/ovsdb/storage.h b/ovsdb/storage.h
index 8a9bbab709e35280fb34755d002677052f38b4c6..02b6e7e6c55eaf56bbec7758ddb2695de55308cd 100644
--- a/ovsdb/storage.h
+++ b/ovsdb/storage.h
@@ -23,6 +23,7 @@
struct json;
struct ovsdb_schema;
struct ovsdb_storage;
+struct simap;
struct uuid;
struct ovsdb_error *ovsdb_storage_open(const char *filename, bool rw,
@@ -39,6 +40,9 @@ bool ovsdb_storage_is_leader(const struct ovsdb_storage *);
const struct uuid *ovsdb_storage_get_cid(const struct ovsdb_storage *);
const struct uuid *ovsdb_storage_get_sid(const struct ovsdb_storage *);
uint64_t ovsdb_storage_get_applied_index(const struct ovsdb_storage *);
+void ovsdb_storage_get_memory_usage(const struct ovsdb_storage *,
+ struct simap *usage);
+char *ovsdb_storage_get_error(const struct ovsdb_storage *);
void ovsdb_storage_run(struct ovsdb_storage *);
void ovsdb_storage_wait(struct ovsdb_storage *);
diff --git a/ovsdb/transaction.c b/ovsdb/transaction.c
index 369436bffbf5c917152b39bddf93583ebd2a62b8..8ffefcf7c9d0aa44baa90eb91460555e5ed695df 100644
--- a/ovsdb/transaction.c
+++ b/ovsdb/transaction.c
@@ -967,7 +967,7 @@ ovsdb_txn_complete(struct ovsdb_txn *txn)
{
if (!ovsdb_txn_is_empty(txn)) {
- txn->db->run_triggers = true;
+ txn->db->run_triggers_now = txn->db->run_triggers = true;
ovsdb_monitors_commit(txn->db, txn);
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_update_weak_refs));
ovsdb_error_assert(for_each_txn_row(txn, ovsdb_txn_row_commit));
diff --git a/ovsdb/trigger.c b/ovsdb/trigger.c
index 7e62e90ae381014b6caea4890ee406031126f9e6..0372302af42d1aaef75ff755c3bf65dbfa1cc2c4 100644
--- a/ovsdb/trigger.c
+++ b/ovsdb/trigger.c
@@ -141,7 +141,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now)
struct ovsdb_trigger *t, *next;
bool run_triggers = db->run_triggers;
- db->run_triggers = false;
+ db->run_triggers_now = db->run_triggers = false;
bool disconnect_all = false;
@@ -160,7 +160,7 @@ ovsdb_trigger_run(struct ovsdb *db, long long int now)
void
ovsdb_trigger_wait(struct ovsdb *db, long long int now)
{
- if (db->run_triggers) {
+ if (db->run_triggers_now) {
poll_immediate_wake();
} else {
long long int deadline = LLONG_MAX;
@@ -319,9 +319,16 @@ ovsdb_trigger_try(struct ovsdb_trigger *t, long long int now)
if (!strcmp(ovsdb_error_get_tag(error), "cluster error")) {
/* Temporary error. Transition back to "initialized" state to
* try again. */
+ char *err_s = ovsdb_error_to_string(error);
+ VLOG_DBG("cluster error %s", err_s);
+
jsonrpc_msg_destroy(t->reply);
t->reply = NULL;
t->db->run_triggers = true;
+ if (!strstr(err_s, "not leader")) {
+ t->db->run_triggers_now = true;
+ }
+ free(err_s);
ovsdb_error_destroy(error);
} else {
/* Permanent error. Transition to "completed" state to report
diff --git a/python/automake.mk b/python/automake.mk
index 2f08c77014845ed27a6dd8f9e9d775b26bea41dd..767512f1757f5d5ff4e75fd8df2d4844ee52c86b 100644
--- a/python/automake.mk
+++ b/python/automake.mk
@@ -74,12 +74,12 @@ ovs-install-data-local:
$(MKDIR_P) python/ovs
sed \
-e '/^##/d' \
- -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \
- -e 's,[@]RUNDIR[@],$(RUNDIR),g' \
- -e 's,[@]LOGDIR[@],$(LOGDIR),g' \
- -e 's,[@]bindir[@],$(bindir),g' \
- -e 's,[@]sysconfdir[@],$(sysconfdir),g' \
- -e 's,[@]DBDIR[@],$(DBDIR),g' \
+ -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \
+ -e 's,[@]RUNDIR[@],$(RUNDIR),g' \
+ -e 's,[@]LOGDIR[@],$(LOGDIR),g' \
+ -e 's,[@]bindir[@],$(bindir),g' \
+ -e 's,[@]sysconfdir[@],$(sysconfdir),g' \
+ -e 's,[@]DBDIR[@],$(DBDIR),g' \
< $(srcdir)/python/ovs/dirs.py.template \
> python/ovs/dirs.py.tmp
$(MKDIR_P) $(DESTDIR)$(pkgdatadir)/python/ovs
@@ -107,12 +107,13 @@ ALL_LOCAL += $(srcdir)/python/ovs/dirs.py
$(srcdir)/python/ovs/dirs.py: python/ovs/dirs.py.template
$(AM_V_GEN)sed \
-e '/^##/d' \
- -e 's,[@]pkgdatadir[@],/usr/local/share/openvswitch,g' \
- -e 's,[@]RUNDIR[@],/var/run,g' \
- -e 's,[@]LOGDIR[@],/usr/local/var/log,g' \
- -e 's,[@]bindir[@],/usr/local/bin,g' \
- -e 's,[@]sysconfdir[@],/usr/local/etc,g' \
- -e 's,[@]DBDIR[@],/usr/local/etc/openvswitch,g' \
+ -e 's,[@]pkgdatadir[@],$(pkgdatadir),g' \
+ -e 's,[@]RUNDIR[@],$(RUNDIR),g' \
+ -e 's,[@]LOGDIR[@],$(LOGDIR),g' \
+ -e 's,[@]bindir[@],$(bindir),g' \
+ -e 's,[@]sysconfdir[@],$(sysconfdir),g' \
+ -e 's,[@]DBDIR[@],$(sysconfdir)/openvswitch,g' \
< $? > $@.tmp && \
mv $@.tmp $@
EXTRA_DIST += python/ovs/dirs.py.template
+CLEANFILES += python/ovs/dirs.py
diff --git a/python/build/nroff.py b/python/build/nroff.py
index a949077570e96ee928157cd46cfca38e57e8412c..09795ab52bece9c9301b706ecff2e0838a921576 100644
--- a/python/build/nroff.py
+++ b/python/build/nroff.py
@@ -290,6 +290,11 @@ fillval = .2
\\}"""
+def flatten_header(s):
+ s = s.strip()
+ return re.sub(r'\s+', ' ', s)
+
+
def block_xml_to_nroff(nodes, para='.PP'):
HEADER_TAGS = ('h1', 'h2', 'h3', 'h4')
s = ''
@@ -373,7 +378,9 @@ def block_xml_to_nroff(nodes, para='.PP'):
to_upper = node.tagName == 'h1'
s += ".%s \"" % nroffTag
for child_node in node.childNodes:
- s += inline_xml_to_nroff(child_node, font, to_upper)
+ s += flatten_header(
+ inline_xml_to_nroff(child_node, font, to_upper)
+ )
s += "\"\n"
elif node.tagName == 'pre':
fixed = node.getAttribute('fixed')
diff --git a/python/build/soutil.py b/python/build/soutil.py
index b8027af8634d42db6d1d17c6b4e764c44b98dcb0..a658823028fa21b2bbaf8f02bba5f692ed171fa4 100755
--- a/python/build/soutil.py
+++ b/python/build/soutil.py
@@ -1,6 +1,6 @@
-#! /usr/bin/env python
+#!/usr/bin/env python3
-# Copyright (c) 2008, 2017 Nicira, Inc.
+# Copyright (c) 2008, 2017, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/python/ovs/.gitignore b/python/ovs/.gitignore
index 98527864664d32f798edc06a53131e8d5a068295..8bbcd824f472e290b194e49a0c9022aef0ef5ffb 100644
--- a/python/ovs/.gitignore
+++ b/python/ovs/.gitignore
@@ -1 +1,2 @@
version.py
+dirs.py
diff --git a/python/ovs/db/idl.py b/python/ovs/db/idl.py
index 020291d486be764154a7173301f4daac42f21625..5850ac7abfc3a65077d1262a81ac058a75e872af 100644
--- a/python/ovs/db/idl.py
+++ b/python/ovs/db/idl.py
@@ -1567,10 +1567,9 @@ class Transaction(object):
for col, val in row._mutations['_inserts'].items():
column = row._table.columns[col]
if column.type.is_map():
- opdat = ["map"]
datum = data.Datum.from_python(column.type, val,
_row_to_uuid)
- opdat.append(datum.as_list())
+ opdat = self._substitute_uuids(datum.to_json())
else:
opdat = ["set"]
inner_opdat = []
diff --git a/python/ovs/db/types.py b/python/ovs/db/types.py
index 3b47b9b30ba0f4b34624253a804e2f3ad0a29b05..626ae8fc4466e645dee30febe1e84cd54ff11be7 100644
--- a/python/ovs/db/types.py
+++ b/python/ovs/db/types.py
@@ -591,7 +591,16 @@ class Type(object):
if self.value:
return "map of %s%s-%s pairs" % (quantity, keyName, valueName)
else:
- if keyName.endswith('s'):
+ # Extract the last word from 'keyName' so we can make it
+ # plural. For linguistic analysis, turn it into English
+ # without formatting so that we don't consider any prefix or
+ # suffix added by escapeLiteral.
+ plainKeyName = (self.key.toEnglish(returnUnchanged)
+ .rpartition(' ')[2].lower())
+
+ if plainKeyName == 'chassis':
+ plural = keyName
+ elif plainKeyName.endswith('s'):
plural = keyName + "es"
else:
plural = keyName + "s"
diff --git a/python/ovs/dirs.py b/python/ovs/dirs.py
deleted file mode 100644
index c67aecbb46dac3e444695afa5ad3853d204f91e1..0000000000000000000000000000000000000000
--- a/python/ovs/dirs.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# The @variables@ in this file are replaced by default directories for
-# use in python/ovs/dirs.py in the source directory and replaced by the
-# configured directories for use in the installed python/ovs/dirs.py.
-#
-import os
-
-# Note that the use of """ is to aid in dealing with paths with quotes in them.
-PKGDATADIR = os.environ.get("OVS_PKGDATADIR", """/usr/local/share/openvswitch""")
-RUNDIR = os.environ.get("OVS_RUNDIR", """/var/run""")
-LOGDIR = os.environ.get("OVS_LOGDIR", """/usr/local/var/log""")
-BINDIR = os.environ.get("OVS_BINDIR", """/usr/local/bin""")
-
-DBDIR = os.environ.get("OVS_DBDIR")
-if not DBDIR:
- sysconfdir = os.environ.get("OVS_SYSCONFDIR")
- if sysconfdir:
- DBDIR = "%s/openvswitch" % sysconfdir
- else:
- DBDIR = """/usr/local/etc/openvswitch"""
diff --git a/python/ovs/stream.py b/python/ovs/stream.py
index e9bb0c85486eb092e1d6494ac7caea6237370d59..f5a520862c0230a328eb26f20bdd0b76c5540601 100644
--- a/python/ovs/stream.py
+++ b/python/ovs/stream.py
@@ -132,6 +132,10 @@ class Stream(object):
IPTOS_PREC_INTERNETCONTROL = 0xc0
DSCP_DEFAULT = IPTOS_PREC_INTERNETCONTROL >> 2
+ @staticmethod
+ def check_connection_completion(sock):
+ return ovs.socket_util.check_connection_completion(sock)
+
@staticmethod
def open(name, dscp=DSCP_DEFAULT):
"""Attempts to connect a stream to a remote peer. 'name' is a
@@ -189,7 +193,7 @@ class Stream(object):
if error:
return error, None
else:
- err = ovs.socket_util.check_connection_completion(sock)
+ err = cls.check_connection_completion(sock)
if err == errno.EAGAIN or err == errno.EINPROGRESS:
status = errno.EAGAIN
err = 0
@@ -261,7 +265,7 @@ class Stream(object):
def __scs_connecting(self):
if self.socket is not None:
- retval = ovs.socket_util.check_connection_completion(self.socket)
+ retval = self.check_connection_completion(self.socket)
assert retval != errno.EINPROGRESS
elif sys.platform == 'win32':
if self.retry_connect:
@@ -761,6 +765,13 @@ Stream.register_method("tcp", TCPStream)
class SSLStream(Stream):
+ @staticmethod
+ def check_connection_completion(sock):
+ try:
+ return Stream.check_connection_completion(sock)
+ except SSL.SysCallError as e:
+ return ovs.socket_util.get_exception_errno(e)
+
@staticmethod
def needs_probes():
return True
diff --git a/python/setup.py b/python/setup.py
index b7252800c1c11e8d2087c9c3d4f6c891ed44b1a3..d385d837223914e5759c516faefa0a2123c6e820 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -30,6 +30,15 @@ except IOError:
file=sys.stderr)
sys.exit(-1)
+try:
+ # Try to open generated ovs/dirs.py. However, in this case we
+ # don't need to exec()
+ open("ovs/dirs.py")
+except IOError:
+ print("Ensure dirs.py is created by running make python/ovs/dirs.py",
+ file=sys.stderr)
+ sys.exit(-1)
+
ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError)
if sys.platform == 'win32':
ext_errors += (IOError, ValueError)
diff --git a/rhel/README.RHEL.rst b/rhel/README.RHEL.rst
index 1cd2065eff96ce240d35d12eeecef1f0021bc826..98175dfd38af17a5e7ffb7bcfc6168bd22732ee3 100644
--- a/rhel/README.RHEL.rst
+++ b/rhel/README.RHEL.rst
@@ -36,9 +36,6 @@ TYPE
* ``OVSDPDKPort``, if ```` is a physical DPDK NIC port (name must start
with ``dpdk`` and end with portid, eg ``dpdk0``)
- * ``OVSDPDKRPort``, if ```` is a DPDK ring port (name must start with
- ``dpdkr`` and end with portid, e.g. ``dpdkr0``)
-
* ``OVSDPDKVhostUserPort`` if ```` is a DPDK vhost-user port
* ``OVSDPDKBond`` if ```` is an OVS DPDK bond.
diff --git a/rhel/etc_sysconfig_network-scripts_ifdown-ovs b/rhel/etc_sysconfig_network-scripts_ifdown-ovs
index 63d048b22f9d31320bf9a9a0830ad0e9d713f205..343ac094522ffee0f44604aa98114495b4fbd4da 100755
--- a/rhel/etc_sysconfig_network-scripts_ifdown-ovs
+++ b/rhel/etc_sysconfig_network-scripts_ifdown-ovs
@@ -59,7 +59,7 @@ case "$TYPE" in
OVSPatchPort|OVSTunnel)
ovs-vsctl -t ${TIMEOUT} -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE"
;;
- OVSDPDKPort|OVSDPDKRPort|OVSDPDKVhostUserPort|OVSDPDKBond)
+ OVSDPDKPort|OVSDPDKVhostUserPort|OVSDPDKBond)
ovs-vsctl -t ${TIMEOUT} -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE"
;;
*)
diff --git a/rhel/etc_sysconfig_network-scripts_ifup-ovs b/rhel/etc_sysconfig_network-scripts_ifup-ovs
index b01461cc42e2037d215059c781a63fec0fb6dfea..0955c0e1fb0f49840c039bbc72f8cf3acdf4df4a 100755
--- a/rhel/etc_sysconfig_network-scripts_ifup-ovs
+++ b/rhel/etc_sysconfig_network-scripts_ifup-ovs
@@ -180,13 +180,6 @@ case "$TYPE" in
${OTHERSCRIPT} "$OVS_BRIDGE"
fi
;;
- OVSDPDKRPort)
- ifup_ovs_bridge
- ovs-vsctl -t ${TIMEOUT} \
- -- --if-exists del-port "$OVS_BRIDGE" "$DEVICE" \
- -- add-port "$OVS_BRIDGE" "$DEVICE" $OVS_OPTIONS \
- -- set Interface "$DEVICE" type=dpdkr ${OVS_EXTRA+-- $OVS_EXTRA}
- ;;
OVSDPDKVhostUserPort)
ifup_ovs_bridge
PORT_TYPE="dpdkvhostuser"
diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
index 7bc8c34b80afbbad3d9c3d7186978c018f10bbcd..2c0c4fa186a37da092843e0ea4c88ee86c89507b 100644
--- a/rhel/openvswitch-fedora.spec.in
+++ b/rhel/openvswitch-fedora.spec.in
@@ -318,18 +318,19 @@ exit 0
%post
%if %{with libcapng}
if [ $1 -eq 1 ]; then
- sed -i 's:^#OVS_USER_ID=:OVS_USER_ID=:' /etc/sysconfig/openvswitch
- sed -i 's:\(.*su\).*:\1 openvswitch openvswitch:' %{_sysconfdir}/logrotate.d/openvswitch
-
%if %{with dpdk}
- sed -i \
- 's@OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:hugetlbfs"@'\
- /etc/sysconfig/openvswitch
+ %define gname hugetlbfs
+%else
+ %define gname openvswitch
%endif
+ sed -i \
+ 's@^#OVS_USER_ID="openvswitch:openvswitch"@OVS_USER_ID="openvswitch:%{gname}"@'\
+ %{_sysconfdir}/sysconfig/openvswitch
+ sed -i 's:\(.*su\).*:\1 openvswitch %{gname}:' %{_sysconfdir}/logrotate.d/openvswitch
- # In the case of upgrade, this is not needed.
- chown -R openvswitch:openvswitch /etc/openvswitch
- chown -R openvswitch:openvswitch /var/log/openvswitch
+ # In the case of upgrade, this is not needed
+ chown -R openvswitch:openvswitch %{_sysconfdir}/openvswitch
+ chown -R openvswitch:%{gname} %{_localstatedir}/log/openvswitch
fi
%endif
@@ -402,6 +403,7 @@ fi
%{_includedir}/openvswitch/*
%{_includedir}/openflow/*
%exclude %{_libdir}/*.la
+%exclude %{_libdir}/*.a
%if 0%{?rhel} > 7 || 0%{?fedora} > 28
%files -n network-scripts-%{name}
diff --git a/rhel/openvswitch-kmod-fedora.spec.in b/rhel/openvswitch-kmod-fedora.spec.in
index c94f2f5358a7ccfbe0edf4ea39b1bcf9749f9811..ff190064f92878f9aaca6e658a429bbbd9a8f256 100644
--- a/rhel/openvswitch-kmod-fedora.spec.in
+++ b/rhel/openvswitch-kmod-fedora.spec.in
@@ -17,7 +17,9 @@
# - 3.10.0 major revision 693 (RHEL 7.4)
# - 3.10.0 major revision 957 (RHEL 7.6)
# - 3.10.0 major revision 1062 (RHEL 7.7)
-# - 3.10.0 major revision 1101 (RHEL 7.8)
+# - 3.10.0 major revision 1101 (RHEL 7.8 Beta)
+# - 3.10.0 major revision 1127 (RHEL 7.8 GA)
+# - 3.10.0 major revision 1160 (RHEL 7.9 GA)
# By default, build against the current running kernel version
#%define kernel 3.1.5-1.fc16.x86_64
#define kernel %{kernel_source}
@@ -97,8 +99,9 @@ if grep -qs "suse" /etc/os-release; then
elif [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ] &&
{ [ "$major_rev" = "327" ] || [ "$major_rev" = "693" ] || \
[ "$major_rev" = "957" ] || [ "$major_rev" == "1062" ] || \
- [ "$major_rev" = "1101" ]; }; then
- # For RHEL 7.2, 7.4, 7.6, 7.7, and 7.8
+ [ "$major_rev" = "1101" ] || [ "$major_rev" = "1127" ] || \
+ [ "$major_rev" = "1160" ] ; }; then
+ # For RHEL 7.2, 7.4, 7.6, 7.7, 7.8 and 7.9
if [ -x "%{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh" ]; then
%{_datadir}/openvswitch/scripts/ovs-kmod-manage.sh
fi
diff --git a/rhel/usr_lib_systemd_system_ovsdb-server.service b/rhel/usr_lib_systemd_system_ovsdb-server.service
index 4c170c09b48a9e0fb6cc87cf1fbba5d489a1aeac..98338b9dfbc9e9440d5ef2f1e3082eb1fde87f0d 100644
--- a/rhel/usr_lib_systemd_system_ovsdb-server.service
+++ b/rhel/usr_lib_systemd_system_ovsdb-server.service
@@ -11,10 +11,16 @@ PIDFile=/var/run/openvswitch/ovsdb-server.pid
Restart=on-failure
EnvironmentFile=/etc/openvswitch/default.conf
EnvironmentFile=-/etc/sysconfig/openvswitch
+EnvironmentFile=-/run/openvswitch.useropts
+
+# Environment is reloaded for each Exec*, make sure to
+# remove openvswitch.useropts first to reload a fresh
+# OVS_USER_ID from default.conf or sysconfig.
+ExecStartPre=/usr/bin/rm -f /run/openvswitch.useropts
+
ExecStartPre=-/usr/bin/chown ${OVS_USER_ID} /var/run/openvswitch /var/log/openvswitch
-ExecStartPre=/bin/sh -c 'rm -f /run/openvswitch.useropts; /usr/bin/echo "OVS_USER_ID=${OVS_USER_ID}" > /run/openvswitch.useropts'
+ExecStartPre=/bin/sh -c '/usr/bin/echo "OVS_USER_ID=${OVS_USER_ID}" > /run/openvswitch.useropts'
ExecStartPre=/bin/sh -c 'if [ "$${OVS_USER_ID/:*/}" != "root" ]; then /usr/bin/echo "OVS_USER_OPT=--ovs-user=${OVS_USER_ID}" >> /run/openvswitch.useropts; fi'
-EnvironmentFile=-/run/openvswitch.useropts
ExecStart=/usr/share/openvswitch/scripts/ovs-ctl \
--no-ovs-vswitchd --no-monitor --system-id=random \
${OVS_USER_OPT} \
diff --git a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh
index a9b5cdd817da554f036a675bf34e3128b7b7d3cd..9bf25a46b590616ab8b298d56d008e2fab54b6c3 100644
--- a/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh
+++ b/rhel/usr_share_openvswitch_scripts_ovs-kmod-manage.sh
@@ -19,7 +19,9 @@
# - 3.10.0 major revision 693 (RHEL 7.4)
# - 3.10.0 major revision 957 (RHEL 7.6)
# - 3.10.0 major revision 1062 (RHEL 7.7)
-# - 3.10.0 major revision 1101 (RHEL 7.8)
+# - 3.10.0 major revision 1101 (RHEL 7.8 Beta)
+# - 3.10.0 major revision 1127 (RHEL 7.8 GA)
+# - 3.10.0 major revision 1160 (RHEL 7.9)
# - 4.4.x, x >= 73 (SLES 12 SP3)
# - 4.12.x, x >= 14 (SLES 12 SP4).
# It is packaged in the openvswitch kmod RPM and run in the post-install
@@ -112,6 +114,16 @@ if [ "$mainline_major" = "3" ] && [ "$mainline_minor" = "10" ]; then
comp_ver=10
ver_offset=4
installed_ver="$minor_rev"
+ elif [ "$major_rev" = "1127" ]; then
+# echo "rhel78"
+ comp_ver=10
+ ver_offset=4
+ installed_ver="$minor_rev"
+ elif [ "$major_rev" = "1160" ]; then
+# echo "rhel79"
+ comp_ver=10
+ ver_offset=4
+ installed_ver="$minor_rev"
fi
elif [ "$mainline_major" = "4" ] && [ "$mainline_minor" = "4" ]; then
if [ "$mainline_patch" -ge "73" ]; then
diff --git a/selinux/openvswitch-custom.te.in b/selinux/openvswitch-custom.te.in
index 2adaf231fe63c5c76386cebe25f249ffd18fd50c..beb0ab0d66128d6bcfbd88d0316febeb03ee3aaf 100644
--- a/selinux/openvswitch-custom.te.in
+++ b/selinux/openvswitch-custom.te.in
@@ -19,6 +19,7 @@ require {
type kernel_t;
type hostname_exec_t;
type modules_conf_t;
+ type modules_dep_t;
type modules_object_t;
type passwd_file_t;
type plymouth_exec_t;
@@ -121,6 +122,7 @@ allow openvswitch_load_module_t insmod_exec_t:file { execute execute_no_trans ge
allow openvswitch_load_module_t kernel_t:system module_request;
allow openvswitch_load_module_t modules_conf_t:dir { getattr open read search };
allow openvswitch_load_module_t modules_conf_t:file { getattr open read };
+allow openvswitch_load_module_t modules_dep_t:file { getattr map open read };
allow openvswitch_load_module_t modules_object_t:file { map getattr open read };
allow openvswitch_load_module_t modules_object_t:dir { getattr open read search };
allow openvswitch_load_module_t openvswitch_load_module_exec_t:file { entrypoint };
diff --git a/tests/.gitignore b/tests/.gitignore
index c5abb32d025a5e36fa7bc9fb4f37ab3d22b3ca33..45b4f67b2a4326c26988e36baa14d9acdd14300e 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -7,7 +7,6 @@
/idltest.h
/idltest.ovsidl
/ovstest
-/test-dpdkr
/ovs-pki.log
/ovsdb-cluster-testsuite
/ovsdb-cluster-testsuite.dir/
@@ -25,6 +24,9 @@
/system-userspace-testsuite
/system-userspace-testsuite.dir/
/system-userspace-testsuite.log
+/system-tso-testsuite
+/system-tso-testsuite.dir/
+/system-tso-testsuite.log
/system-offloads-testsuite
/system-offloads-testsuite.dir/
/system-offloads-testsuite.log
diff --git a/tests/atlocal.in b/tests/atlocal.in
index 1dc7cd5d087af9fc5ee42461365b1af11171b1ac..02e2dc57f292e3e1941fe485808abdda40a013b8 100644
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -111,6 +111,16 @@ if test "$IS_WIN32" = yes; then
export PYTHONLEGACYWINDOWSSTDIO
fi
+# Check for CPU architecture
+case `uname -m` in
+aarch64)
+ IS_ARM64="yes"
+ ;;
+*)
+ IS_ARM64="no"
+ ;;
+esac
+
# Check whether to run IPv6 tests.
$PYTHON3 -c '
import errno
diff --git a/tests/automake.mk b/tests/automake.mk
index 9c7ebdce9bf07c046b0c127f446b2f40a40455b1..677b99a6b487147e5eab34ed80c9c9e89fb63501 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -4,6 +4,7 @@ EXTRA_DIST += \
$(SYSTEM_TESTSUITE_AT) \
$(SYSTEM_KMOD_TESTSUITE_AT) \
$(SYSTEM_USERSPACE_TESTSUITE_AT) \
+ $(SYSTEM_TSO_TESTSUITE_AT) \
$(SYSTEM_AFXDP_TESTSUITE_AT) \
$(SYSTEM_OFFLOADS_TESTSUITE_AT) \
$(SYSTEM_DPDK_TESTSUITE_AT) \
@@ -11,6 +12,7 @@ EXTRA_DIST += \
$(TESTSUITE) \
$(SYSTEM_KMOD_TESTSUITE) \
$(SYSTEM_USERSPACE_TESTSUITE) \
+ $(SYSTEM_TSO_TESTSUITE) \
$(SYSTEM_AFXDP_TESTSUITE) \
$(SYSTEM_OFFLOADS_TESTSUITE) \
$(SYSTEM_DPDK_TESTSUITE) \
@@ -152,7 +154,13 @@ SYSTEM_KMOD_TESTSUITE_AT = \
SYSTEM_USERSPACE_TESTSUITE_AT = \
tests/system-userspace-testsuite.at \
tests/system-userspace-macros.at \
- tests/system-userspace-packet-type-aware.at
+ tests/system-userspace-packet-type-aware.at \
+ tests/system-route.at
+
+SYSTEM_TSO_TESTSUITE_AT = \
+ tests/system-tso-testsuite.at \
+ tests/system-tap.at \
+ tests/system-tso-macros.at
SYSTEM_AFXDP_TESTSUITE_AT = \
tests/system-userspace-macros.at \
@@ -183,6 +191,7 @@ TESTSUITE = $(srcdir)/tests/testsuite
TESTSUITE_PATCH = $(srcdir)/tests/testsuite.patch
SYSTEM_KMOD_TESTSUITE = $(srcdir)/tests/system-kmod-testsuite
SYSTEM_USERSPACE_TESTSUITE = $(srcdir)/tests/system-userspace-testsuite
+SYSTEM_TSO_TESTSUITE = $(srcdir)/tests/system-tso-testsuite
SYSTEM_AFXDP_TESTSUITE = $(srcdir)/tests/system-afxdp-testsuite
SYSTEM_OFFLOADS_TESTSUITE = $(srcdir)/tests/system-offloads-testsuite
SYSTEM_DPDK_TESTSUITE = $(srcdir)/tests/system-dpdk-testsuite
@@ -296,6 +305,12 @@ check-offloads-valgrind: all $(valgrind_wrappers) $(check_DATA)
@echo '----------------------------------------------------------------------'
@echo 'Valgrind output can be found in tests/system-offloads-testsuite.dir/*/valgrind.*'
@echo '----------------------------------------------------------------------'
+check-tso-valgrind: all $(valgrind_wrappers) $(check_DATA)
+ $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests VALGRIND='$(VALGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS) -j1
+ @echo
+ @echo '----------------------------------------------------------------------'
+ @echo 'Valgrind output can be found in tests/system-tso-testsuite.dir/*/valgrind.*'
+ @echo '----------------------------------------------------------------------'
check-helgrind: all $(valgrind_wrappers) $(check_DATA)
-$(SHELL) '$(TESTSUITE)' -C tests CHECK_VALGRIND=true VALGRIND='$(HELGRIND)' AUTOTEST_PATH='tests/valgrind:$(AUTOTEST_PATH)' -d $(TESTSUITEFLAGS)
@@ -326,6 +341,10 @@ check-system-userspace: all
set $(SHELL) '$(SYSTEM_USERSPACE_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \
"$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
+check-system-tso: all
+ set $(SHELL) '$(SYSTEM_TSO_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)'; \
+ "$$@" $(TESTSUITEFLAGS) -j1 || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
+
check-afxdp: all
set $(SHELL) '$(SYSTEM_AFXDP_TESTSUITE)' -C tests AUTOTEST_PATH='$(AUTOTEST_PATH)' $(TESTSUITEFLAGS) -j1; \
"$$@" || (test X'$(RECHECK)' = Xyes && "$$@" --recheck)
@@ -367,6 +386,10 @@ $(SYSTEM_USERSPACE_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_USERSP
$(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
$(AM_V_at)mv $@.tmp $@
+$(SYSTEM_TSO_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_TSO_TESTSUITE_AT) $(COMMON_MACROS_AT)
+ $(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
+ $(AM_V_at)mv $@.tmp $@
+
$(SYSTEM_AFXDP_TESTSUITE): package.m4 $(SYSTEM_TESTSUITE_AT) $(SYSTEM_AFXDP_TESTSUITE_AT) $(COMMON_MACROS_AT)
$(AM_V_GEN)$(AUTOTEST) -I '$(srcdir)' -o $@.tmp $@.at
$(AM_V_at)mv $@.tmp $@
@@ -414,13 +437,6 @@ tests/idltest.ovsidl: $(IDLTEST_IDL_FILES)
tests/idltest.c: tests/idltest.h
-if DPDK_NETDEV
-noinst_PROGRAMS += tests/test-dpdkr
-tests_test_dpdkr_SOURCES = \
- tests/dpdk/ring_client.c
-tests_test_dpdkr_LDADD = lib/libopenvswitch.la $(LIBS)
-endif
-
noinst_PROGRAMS += tests/ovstest
tests_ovstest_SOURCES = \
tests/ovstest.c \
diff --git a/tests/bfd.at b/tests/bfd.at
index 7af7be54aa44a6b7240d404580f6e974358cdb95..f5c6409f6c656ad6e807c85056ff0c51ca4a645a 100644
--- a/tests/bfd.at
+++ b/tests/bfd.at
@@ -266,6 +266,7 @@ AT_CLEANUP
# Tests below are for bfd decay features.
AT_SETUP([bfd - bfd decay])
+AT_SKIP_IF([test "$IS_ARM64" = "yes"])
OVS_VSWITCHD_START([add-br br1 -- set bridge br1 datapath-type=dummy -- \
add-port br1 p1 -- set Interface p1 type=patch \
options:peer=p0 ofport_request=2 -- \
@@ -1100,3 +1101,35 @@ BFD_CHECK_MULT([p1], [3], [3])
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([bfd - overlay])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
+ options:remote_ip=2.2.2.2 ofport_request=1 -- \
+ set interface p1 bfd:enable=true bfd:bfd_src_ip=2.2.2.1 -- \
+ set bridge br0 fail-mode=standalone])
+
+# Userspace slow path handles normal BFD packets.
+AT_CHECK([ovs-appctl ofproto/trace --l7-len 0 ovs-dummy 'tunnel(tun_id=0x0,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,tp_src=0,tp_dst=0,flags()),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:55,dst=00:23:20:00:00:01),eth_type(0x0800),ipv4(src=2.2.2.2/0.0.0.0,dst=2.2.2.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout])
+# check that the packet should be handled as BFD packet.
+AT_CHECK([tail -2 stdout], [0], [dnl
+This flow is handled by the userspace slow path because it:
+ - Consists of BFD packets.
+], [])
+
+# Userspace slow path won't handle overlay BFD packets. Instead, other OVS flows, if configured, will handle them.
+AT_CHECK([ovs-appctl ofproto/trace --l7-len 0 ovs-dummy 'tunnel(tun_id=0x0,src=2.2.2.2,dst=2.2.2.1,tos=0x0,ttl=64,tp_src=0,tp_dst=0,flags()),in_port(1),skb_mark(0/0),eth(src=00:11:22:33:44:66,dst=00:23:20:00:00:77),eth_type(0x0800),ipv4(src=192.168.2.2/0.0.0.0,dst=192.168.2.1/0.0.0.0,proto=17/0xff,tos=0/0,ttl=255/0,frag=no),udp(src=49152/0,dst=3784/0xffff)' -generate], [0], [stdout])
+AT_CHECK([tail -10 stdout], [0], [dnl
+bridge("br0")
+-------------
+ 0. priority 0
+ NORMAL
+ -> learned that 00:11:22:33:44:66 is on port p1 in VLAN 0
+ -> no learned MAC for destination, flooding
+
+Final flow: unchanged
+Megaflow: recirc_id=0,eth,udp,tun_id=0,tun_src=2.2.2.2,tun_dst=2.2.2.1,tun_tos=0,tun_flags=-df-csum+key,in_port=1,dl_src=00:11:22:33:44:66,dl_dst=00:23:20:00:00:77,nw_frag=no,tp_dst=3784
+Datapath actions: 100
+], [])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/bridge.at b/tests/bridge.at
index d48463e263bce766d067f8f70fe0e8e514172311..904f1381c785c3dee89ef97efd65461c4350d2db 100644
--- a/tests/bridge.at
+++ b/tests/bridge.at
@@ -103,3 +103,20 @@ AT_CHECK([ovs-appctl -t ovs-vswitchd version], [0], [ignore])
OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
OVS_APP_EXIT_AND_WAIT([ovsdb-server])
AT_CLEANUP
+
+AT_SETUP([bridge - change ofproto versions])
+dnl Start vswitch and add a version test bridge
+OVS_VSWITCHD_START(
+ [add-br vr_test0 -- \
+ set bridge vr_test0 datapath-type=dummy \
+ protocols=OpenFlow10])
+
+dnl set the version to include, say, OpenFlow14
+AT_CHECK([ovs-vsctl set bridge vr_test0 protocols=OpenFlow10,OpenFlow14])
+
+dnl now try to use bundle action on a flow
+AT_CHECK([ovs-ofctl add-flow vr_test0 --bundle actions=normal])
+
+OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
+OVS_APP_EXIT_AND_WAIT([ovsdb-server])
+AT_CLEANUP
diff --git a/tests/bundle.at b/tests/bundle.at
index 0a4eadc1e0f3ec3e77ea3d55a56a11fc285eb896..2c2396cb89b933d5f97c89232dcfd1ebc19ea013 100644
--- a/tests/bundle.at
+++ b/tests/bundle.at
@@ -9,7 +9,7 @@ AT_BANNER([bundle link selection])
AT_SETUP([hrw bundle link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],members:1,2,3,4,5']],
[0], [ignore])
# 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00
# 110000: disruption=0.50 (perfect=0.50) 0.50 0.50 0.00 0.00 0.00 0.00
@@ -80,7 +80,7 @@ AT_CLEANUP
AT_SETUP([active_backup bundle link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,active_backup,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5,6']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,active_backup,ofport,NXM_NX_REG0[],members:1,2,3,4,5,6']],
[0],
[100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00
110000: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00
@@ -152,7 +152,7 @@ AT_CLEANUP
AT_SETUP([hrw bundle single link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:1']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],members:1']],
[0], [ignore])
# 1: disruption=1.00 (perfect=1.00) 1.00
# 0: disruption=1.00 (perfect=1.00) 0.00
@@ -161,7 +161,7 @@ AT_CLEANUP
AT_SETUP([hrw bundle no link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],slaves:']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l4,60,hrw,ofport,NXM_NX_REG0[],members:']],
[0], [ignore])
AT_CLEANUP
#: disruption=0.00 (perfect=0.00)
@@ -176,29 +176,29 @@ AT_CLEANUP
AT_SETUP([bundle action bad fields])
AT_KEYWORDS([bundle_action])
-AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(xyzzy,60,hrw,ofport,slaves:1,2))'], [1], [],
- [ovs-ofctl: xyzzy,60,hrw,ofport,slaves:1,2: unknown fields `xyzzy'
+AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(xyzzy,60,hrw,ofport,members:1,2))'], [1], [],
+ [ovs-ofctl: xyzzy,60,hrw,ofport,members:1,2: unknown fields `xyzzy'
])
AT_CLEANUP
AT_SETUP([bundle action bad algorithm])
AT_KEYWORDS([bundle_action])
-AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,fubar,ofport,slaves:1,2))'], [1], [],
- [ovs-ofctl: symmetric_l4,60,fubar,ofport,slaves:1,2: unknown algorithm `fubar'
+AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,fubar,ofport,members:1,2))'], [1], [],
+ [ovs-ofctl: symmetric_l4,60,fubar,ofport,members:1,2: unknown algorithm `fubar'
])
AT_CLEANUP
-AT_SETUP([bundle action bad slave type])
+AT_SETUP([bundle action bad member type])
AT_KEYWORDS([bundle_action])
-AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,robot,slaves:1,2))'], [1], [],
- [ovs-ofctl: symmetric_l4,60,hrw,robot,slaves:1,2: unknown slave_type `robot'
+AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,robot,members:1,2))'], [1], [],
+ [ovs-ofctl: symmetric_l4,60,hrw,robot,members:1,2: unknown member_type `robot'
])
AT_CLEANUP
-AT_SETUP([bundle action bad slave delimiter])
+AT_SETUP([bundle action bad member delimiter])
AT_KEYWORDS([bundle_action])
AT_CHECK([ovs-ofctl parse-flow 'actions=bundle(symmetric_l4,60,hrw,ofport,robot:1,2))'], [1], [],
- [ovs-ofctl: symmetric_l4,60,hrw,ofport,robot:1,2: missing slave delimiter, expected `slaves' got `robot'
+ [ovs-ofctl: symmetric_l4,60,hrw,ofport,robot:1,2: missing member delimiter, expected `members', got `robot'
])
AT_CLEANUP
@@ -211,9 +211,9 @@ dnl Valgrind warnings for use-after-free bugs.
AT_SETUP([bundle action with many ports])
AT_KEYWORDS([bundle_action])
OVS_VSWITCHD_START
-AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l4,0,hrw,ofport,slaves:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])'])
+AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l4,0,hrw,ofport,members:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])'])
AT_CHECK([ovs-ofctl dump-flows br0 --no-stats], [0], [dnl
- actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l4,0,hrw,ofport,slaves:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)
+ actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l4,0,hrw,ofport,members:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)
])
OVS_VSWITCHD_STOP
AT_CLEANUP
@@ -226,7 +226,7 @@ OVS_VSWITCHD_START([dnl
add-port br0 p2 -- set Interface p2 type=dummy -- \
set Interface p2 ofport_request=2
])
-AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle(eth_src,50,active_backup,ofport,slaves:1,2)'])
+AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle(eth_src,50,active_backup,ofport,members:1,2)'])
AT_CHECK([ovs-ofctl mod-port br0 p1 up])
AT_CHECK([ovs-ofctl mod-port br0 p2 up])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=LOCAL,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06'], [0], [stdout])
@@ -264,7 +264,7 @@ OVS_VSWITCHD_START([dnl
add-port br0 p2 -- set Interface p2 type=dummy -- \
set Interface p2 ofport_request=2
])
-AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle_load(eth_src,50,hrw,ofport,OXM_OF_ETH_SRC[[0..15]],slaves:1,2)'])
+AT_CHECK([ovs-ofctl add-flow br0 'actions=bundle_load(eth_src,50,hrw,ofport,OXM_OF_ETH_SRC[[0..15]],members:1,2)'])
AT_CHECK([ovs-ofctl mod-port br0 p1 down])
AT_CHECK([ovs-ofctl mod-port br0 p2 down])
AT_CHECK([ovs-appctl ofproto/trace br0 'in_port=LOCAL,dl_src=50:54:00:00:00:05,dl_dst=50:54:00:00:00:06'], [0], [stdout])
@@ -276,7 +276,7 @@ AT_CLEANUP
AT_SETUP([hrw bundle symmetric_l3 link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:1,2,3,4,5']],
[0], [ignore])
# 100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00
# 110000: disruption=0.50 (perfect=0.50) 0.50 0.50 0.00 0.00 0.00 0.00
@@ -347,7 +347,7 @@ AT_CLEANUP
AT_SETUP([active_backup bundle symmetric_l3 link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,active_backup,ofport,NXM_NX_REG0[],slaves:1,2,3,4,5,6']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,active_backup,ofport,NXM_NX_REG0[],members:1,2,3,4,5,6']],
[0],
[100000: disruption=1.00 (perfect=1.00) 1.00 0.00 0.00 0.00 0.00 0.00
110000: disruption=0.00 (perfect=0.00) 1.00 0.00 0.00 0.00 0.00 0.00
@@ -419,7 +419,7 @@ AT_CLEANUP
AT_SETUP([hrw bundle symmetric_l3 single link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:1']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:1']],
[0], [ignore])
# 1: disruption=1.00 (perfect=1.00) 1.00
# 0: disruption=1.00 (perfect=1.00) 0.00
@@ -428,7 +428,7 @@ AT_CLEANUP
AT_SETUP([hrw bundle symmetric_l3 single link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:1']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:1']],
[0], [ignore])
# 1: disruption=1.00 (perfect=1.00) 1.00
# 0: disruption=1.00 (perfect=1.00) 0.00
@@ -437,7 +437,7 @@ AT_CLEANUP
AT_SETUP([hrw bundle symmetric_l3 no link selection])
AT_KEYWORDS([bundle_action])
-AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],slaves:']],
+AT_CHECK([[ovstest test-bundle 'symmetric_l3,60,hrw,ofport,NXM_NX_REG0[],members:']],
[0], [ignore])
AT_CLEANUP
#: disruption=0.00 (perfect=0.00)
@@ -446,9 +446,9 @@ AT_CLEANUP
AT_SETUP([bundle symmetric_l3 action with many ports])
AT_KEYWORDS([bundle_action])
OVS_VSWITCHD_START
-AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l3,0,hrw,ofport,slaves:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])'])
+AT_CHECK([ovs-ofctl add-flow br0 'actions=set_field:0x1->metadata,set_field:0x2->metadata,set_field:0x3->metadata,set_field:0x4->metadata,bundle(symmetric_l3,0,hrw,ofport,members:[[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40]])'])
AT_CHECK([ovs-ofctl dump-flows br0 --no-stats], [0], [dnl
- actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l3,0,hrw,ofport,slaves:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)
+ actions=load:0x1->OXM_OF_METADATA[[]],load:0x2->OXM_OF_METADATA[[]],load:0x3->OXM_OF_METADATA[[]],load:0x4->OXM_OF_METADATA[[]],bundle(symmetric_l3,0,hrw,ofport,members:1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40)
])
OVS_VSWITCHD_STOP
AT_CLEANUP
diff --git a/tests/checkpatch.at b/tests/checkpatch.at
index 6c739477227051146a1b4bc1849555eb13267811..a51e46e7ae3d14f71d09eadf97e607edcb0bb235 100755
--- a/tests/checkpatch.at
+++ b/tests/checkpatch.at
@@ -326,3 +326,20 @@ try_checkpatch \
"
AT_CLEANUP
+
+AT_SETUP([checkpatch - whitespace around cast])
+try_checkpatch \
+ "COMMON_PATCH_HEADER
+ + (int) a;
+ "
+
+try_checkpatch \
+ "COMMON_PATCH_HEADER
+ + (int)a;
+ " \
+ "ERROR: Inappropriate spacing around cast
+ #8 FILE: A.c:1:
+ (int)a;
+"
+
+AT_CLEANUP
diff --git a/tests/dpdk/ring_client.c b/tests/dpdk/ring_client.c
deleted file mode 100644
index 8cc3fb53364348f0b4f2750b9e2a1e44cae80364..0000000000000000000000000000000000000000
--- a/tests/dpdk/ring_client.c
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#include
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-#include "util.h"
-
-/* Number of packets to attempt to read from queue. */
-#define PKT_READ_SIZE ((uint16_t)32)
-
-/* Define common names for structures shared between ovs_dpdk and client. */
-#define MP_CLIENT_RXQ_NAME "dpdkr%u_tx"
-#define MP_CLIENT_TXQ_NAME "dpdkr%u_rx"
-
-#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
-
-/* Our client id number - tells us which rx queue to read, and tx
- * queue to write to.
- */
-static unsigned int client_id;
-
-/*
- * Given the rx queue name template above, get the queue name.
- */
-static inline const char *
-get_rx_queue_name(unsigned int id)
-{
- /* Buffer for return value. */
- static char buffer[RTE_RING_NAMESIZE];
-
- snprintf(buffer, sizeof(buffer), MP_CLIENT_RXQ_NAME, id);
- return buffer;
-}
-
-/*
- * Given the tx queue name template above, get the queue name.
- */
-static inline const char *
-get_tx_queue_name(unsigned int id)
-{
- /* Buffer for return value. */
- static char buffer[RTE_RING_NAMESIZE];
-
- snprintf(buffer, sizeof(buffer), MP_CLIENT_TXQ_NAME, id);
- return buffer;
-}
-
-/*
- * Print a usage message.
- */
-static void
-usage(const char *progname)
-{
- printf("\nUsage: %s [EAL args] -- -n \n", progname);
-}
-
-/*
- * Convert the client id number from a string to an usigned int.
- */
-static int
-parse_client_num(const char *client)
-{
- if (str_to_uint(client, 10, &client_id)) {
- return 0;
- } else {
- return -1;
- }
-}
-
-/*
- * Parse the application arguments to the client app.
- */
-static int
-parse_app_args(int argc, char *argv[])
-{
- int option_index = 0, opt = 0;
- char **argvopt = argv;
- const char *progname = NULL;
- static struct option lgopts[] = {
- {NULL, 0, NULL, 0 }
- };
- progname = argv[0];
-
- while ((opt = getopt_long(argc, argvopt, "n:", lgopts,
- &option_index)) != EOF) {
- switch (opt) {
- case 'n':
- if (parse_client_num(optarg) != 0) {
- usage(progname);
- return -1;
- }
- break;
- default:
- usage(progname);
- return -1;
- }
- }
-
- return 0;
-}
-
-/*
- * Application main function - loops through
- * receiving and processing packets. Never returns
- */
-int
-main(int argc, char *argv[])
-{
- struct rte_ring *rx_ring = NULL;
- struct rte_ring *tx_ring = NULL;
- int retval = 0;
- void *pkts[PKT_READ_SIZE];
- int rslt = 0;
-
- if ((retval = rte_eal_init(argc, argv)) < 0) {
- return -1;
- }
-
- argc -= retval;
- argv += retval;
-
- if (parse_app_args(argc, argv) < 0) {
- rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n");
- }
-
- rx_ring = rte_ring_lookup(get_rx_queue_name(client_id));
- if (rx_ring == NULL) {
- rte_exit(EXIT_FAILURE,
- "Cannot get RX ring - is server process running?\n");
- }
-
- tx_ring = rte_ring_lookup(get_tx_queue_name(client_id));
- if (tx_ring == NULL) {
- rte_exit(EXIT_FAILURE,
- "Cannot get TX ring - is server process running?\n");
- }
-
- RTE_LOG(INFO, APP, "Finished Process Init.\n");
-
- printf("\nClient process %u handling packets\n", client_id);
- printf("[Press Ctrl-C to quit ...]\n");
-
- for (;;) {
- unsigned rx_pkts = PKT_READ_SIZE;
-
- /* Try dequeuing max possible packets first, if that fails, get the
- * most we can. Loop body should only execute once, maximum.
- */
- while (unlikely(rte_ring_dequeue_bulk(rx_ring, pkts,
- rx_pkts, NULL) != 0) && rx_pkts > 0) {
- rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE);
- }
-
- if (rx_pkts > 0) {
- /* blocking enqueue */
- do {
- rslt = rte_ring_enqueue_bulk(tx_ring, pkts, rx_pkts, NULL);
- } while (rslt == -ENOBUFS);
- }
- }
-}
diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at
index 0aeb4e788fa972de5998320601b53b421fb1c233..2862a3c9b96d7cd023f9774ca03d45a23b51cad7 100644
--- a/tests/dpif-netdev.at
+++ b/tests/dpif-netdev.at
@@ -13,6 +13,7 @@ strip_timers () {
strip_xout () {
sed '
+ s/mega_ufid:[-0-9a-f]* //
s/ufid:[-0-9a-f]* //
s/used:[0-9]*\.[0-9]*/used:0.0/
s/actions:.*/actions: /
@@ -23,6 +24,7 @@ strip_xout () {
strip_xout_keep_actions () {
sed '
+ s/mega_ufid:[-0-9a-f]* //
s/ufid:[-0-9a-f]* //
s/used:[0-9]*\.[0-9]*/used:0.0/
s/packets:[0-9]*/packets:0/
@@ -68,11 +70,13 @@ AT_CHECK([ovs-ofctl add-flow br0 action=normal])
ovs-appctl time/stop
ovs-appctl time/warp 5000
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:01,dst=50:54:00:00:02:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack)'])
+ OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log])
AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:01,dst=50:54:00:00:02:00),eth_type(0x0800),ipv4(src=10.0.0.1,dst=10.0.0.2,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack)
])
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:06:00),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.6,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack)' --len 1024])
+ OVS_WAIT_UNTIL([test `grep -c "miss upcall" ovs-vswitchd.log` -ge 2])
AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:06:00),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.6,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=9),tcp_flags(ack)
])
@@ -132,8 +136,8 @@ m4_define([DPIF_NETDEV_MISS_FLOW_INSTALL],
AT_CHECK([ovs-ofctl add-flow br0 action=normal])
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
ovs-appctl ofproto/trace 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'
- sleep 1
+ OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log])
AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)
])
@@ -145,8 +149,8 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:
AT_CHECK([ovs-appctl upcall/disable-megaflows], [0], [megaflows disabled
])
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
- sleep 1
+ OVS_WAIT_UNTIL([test `grep -c "miss upcall" ovs-vswitchd.log` -ge 2])
AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)
])
@@ -229,11 +233,12 @@ m4_define([DPIF_NETDEV_MISS_FLOW_DUMP],
AT_CHECK([ovs-ofctl add-flow br0 action=normal])
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
- sleep 1
+ OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log])
AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)
])
+ ovs-appctl revalidator/wait
AT_CHECK([filter_flow_dump < ovs-vswitchd.log | strip_xout], [0], [dnl
skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions:
])
@@ -244,11 +249,12 @@ skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label
AT_CHECK([ovs-appctl upcall/disable-ufid], [0], [Datapath dumping tersely using UFID disabled
], [])
AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
- sleep 1
+ OVS_WAIT_UNTIL([test `grep -c "miss upcall" ovs-vswitchd.log` -ge 2])
AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)
])
+ ovs-appctl revalidator/wait
AT_CHECK([filter_flow_dump < ovs-vswitchd.log | strip_xout], [0], [dnl
skb_priority(0),skb_mark(0),ct_state(0/0xff),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions:
skb_priority(0/0),skb_mark(0/0),ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),dp_hash(0/0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions:
@@ -371,7 +377,7 @@ m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD],
[AT_SETUP([dpif-netdev - partial hw offload - $1])
OVS_VSWITCHD_START(
[add-port br0 p1 -- \
- set interface p1 type=$1 ofport_request=1 options:pstream=punix:$OVS_RUNDIR/p1.sock options:ifindex=1 -- \
+ set interface p1 type=$1 ofport_request=1 options:pstream=punix:$OVS_RUNDIR/p1.sock options:ifindex=1100 -- \
set bridge br0 datapath-type=dummy \
other-config:datapath-id=1234 fail-mode=secure], [], [],
[m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])])
@@ -393,7 +399,7 @@ skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc
# Check that flow successfully offloaded.
OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log])
AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl
-p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 0
+p1: flow put[[create]]: flow match: recirc_id=0,eth,ip,in_port=1,vlan_tci=0x0000,nw_frag=no, mark: 1
])
# Check that datapath flow installed successfully.
AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl
@@ -404,7 +410,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), a
# Check for succesfull packet matching with installed offloaded flow.
AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl
-p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 0
+p1: packet: ip,vlan_tci=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_proto=0,nw_tos=0,nw_ecn=0,nw_ttl=64 matches with flow: recirc_id=0,eth,ip,vlan_tci=0x0000,nw_frag=no with mark: 1
])
ovs-appctl revalidator/wait
@@ -421,7 +427,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), p
# Check that flow successfully deleted from HW.
OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log])
AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl
-p1: flow del: mark: 0
+p1: flow del: mark: 1
])
OVS_VSWITCHD_STOP
AT_CLEANUP])
@@ -434,7 +440,7 @@ m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS],
[AT_SETUP([dpif-netdev - partial hw offload with packet modifications - $1])
OVS_VSWITCHD_START(
[add-port br0 p1 -- \
- set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1 -- \
+ set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1101 -- \
set bridge br0 datapath-type=dummy \
other-config:datapath-id=1234 fail-mode=secure], [], [],
[m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])])
@@ -460,7 +466,7 @@ packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type
# Check that flow successfully offloaded.
OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log])
AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl
-p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 0
+p1: flow put[[create]]: flow match: recirc_id=0,eth,udp,in_port=1,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82, mark: 1
])
# Check that datapath flow installed successfully.
AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl
@@ -472,7 +478,7 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=
# Check for succesfull packet matching with installed offloaded flow.
AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl
p1: packet: udp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,nw_src=127.0.0.1,nw_dst=127.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=81,tp_dst=82 dnl
-matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 0
+matches with flow: recirc_id=0,eth,udp,dl_vlan=99,dl_vlan_pcp=7,nw_src=127.0.0.1,nw_frag=no,tp_dst=82 with mark: 1
])
ovs-appctl revalidator/wait
@@ -490,7 +496,7 @@ packets:1, bytes:64, used:0.0s, actions:set(ipv4(src=192.168.0.7)),set(udp(dst=3
# Check that flow successfully deleted from HW.
OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log])
AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl
-p1: flow del: mark: 0
+p1: flow del: mark: 1
])
# Check that ip address and udp port were correctly modified in output packets.
@@ -506,3 +512,80 @@ udp,in_port=ANY,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:
DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS([dummy])
DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS([dummy-pmd])
+
+m4_define([DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP],
+ [AT_SETUP([dpif-netdev - partial hw offload with arp vlan id packet modifications - $1])
+ OVS_VSWITCHD_START(
+ [add-port br0 p1 -- \
+ set interface p1 type=$1 ofport_request=1 options:pcap=p1.pcap options:ifindex=1102 -- \
+ set bridge br0 datapath-type=dummy \
+ other-config:datapath-id=1234 fail-mode=secure], [], [],
+ [m4_if([$1], [dummy-pmd], [--dummy-numa="0,0,0,0,1,1,1,1"], [])])
+ AT_CHECK([ovs-appctl vlog/set dpif:file:dbg dpif_netdev:file:dbg netdev_dummy:file:dbg])
+
+ AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=true])
+ OVS_WAIT_UNTIL([grep "netdev: Flow API Enabled" ovs-vswitchd.log])
+
+ AT_CHECK([ovs-ofctl del-flows br0])
+
+ # Setting flow to modify vlan id with arp packet to be sure that
+ # offloaded packets has correctly initialized l3 offset.
+ AT_CHECK([ovs-ofctl add-flow br0 in_port=1,arp,dl_vlan=99,actions=mod_vlan_vid=11,output:IN_PORT])
+
+ packet="packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806),arp(sip=127.0.0.1,tip=127.0.0.1,op=1,sha=00:0b:0c:0d:0e:0f,tha=00:00:00:00:00:00))"
+ AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet --len 64], [0])
+
+ OVS_WAIT_UNTIL([grep "miss upcall" ovs-vswitchd.log])
+ AT_CHECK([grep -A 1 'miss upcall' ovs-vswitchd.log | tail -n 1], [0], [dnl
+skb_priority(0),skb_mark(0),ct_state(0),ct_zone(0),ct_mark(0),ct_label(0),recirc_id(0),dp_hash(0),in_port(1),dnl
+packet_type(ns=0,id=0),eth(src=00:06:07:08:09:0a,dst=00:01:02:03:04:05),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806),arp(sip=127.0.0.1,tip=127.0.0.1,op=1,sha=00:0b:0c:0d:0e:0f,tha=00:00:00:00:00:00))
+])
+ # Check that flow successfully offloaded.
+ OVS_WAIT_UNTIL([grep "succeed to add netdev flow" ovs-vswitchd.log])
+ AT_CHECK([filter_hw_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl
+p1: flow put[[create]]: flow match: recirc_id=0,eth,arp,in_port=1,dl_vlan=99,dl_vlan_pcp=7, mark: 1
+])
+ # Check that datapath flow installed successfully.
+ AT_CHECK([filter_flow_install < ovs-vswitchd.log | strip_xout], [0], [dnl
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806)), actions:
+])
+ # Inject the same packet again.
+ AT_CHECK([ovs-appctl netdev-dummy/receive p1 $packet --len 64], [0])
+
+ # Check for succesfull packet matching with installed offloaded flow.
+ AT_CHECK([filter_hw_packet_netdev_dummy < ovs-vswitchd.log | strip_xout], [0], [dnl
+p1: packet: arp,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00 dnl
+matches with flow: recirc_id=0,eth,arp,dl_vlan=99,dl_vlan_pcp=7 with mark: 1
+])
+
+ ovs-appctl revalidator/wait
+ # Dump the datapath flow to see that actions was executed for a packet.
+ AT_CHECK([ovs-appctl dpif/dump-flows br0 | strip_timers], [0], [dnl
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x8100),vlan(vid=99,pcp=7),encap(eth_type(0x0806)), dnl
+packets:1, bytes:64, used:0.0s, actions:pop_vlan,push_vlan(vid=11,pcp=7),1
+])
+
+ # Wait for datapath flow expiration.
+ ovs-appctl time/stop
+ ovs-appctl time/warp 15000
+ ovs-appctl revalidator/wait
+
+ # Check that flow successfully deleted from HW.
+ OVS_WAIT_UNTIL([grep "succeed to delete netdev flow" ovs-vswitchd.log])
+ AT_CHECK([filter_hw_flow_del < ovs-vswitchd.log | strip_xout], [0], [dnl
+p1: flow del: mark: 1
+])
+
+ # Check that VLAN ID was correctly modified in output packets.
+ AT_CHECK([ovs-ofctl parse-pcap p1.pcap], [0], [dnl
+arp,in_port=ANY,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00
+arp,in_port=ANY,dl_vlan=11,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00
+arp,in_port=ANY,dl_vlan=99,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00
+arp,in_port=ANY,dl_vlan=11,dl_vlan_pcp=7,vlan_tci1=0x0000,dl_src=00:06:07:08:09:0a,dl_dst=00:01:02:03:04:05,arp_spa=127.0.0.1,arp_tpa=127.0.0.1,arp_op=1,arp_sha=00:0b:0c:0d:0e:0f,arp_tha=00:00:00:00:00:00
+])
+
+ OVS_VSWITCHD_STOP
+ AT_CLEANUP])
+
+DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy])
+DPIF_NETDEV_FLOW_HW_OFFLOAD_OFFSETS_VID_ARP([dummy-pmd])
diff --git a/tests/idltest.ovsschema b/tests/idltest.ovsschema
index bee79fc50f7723cc05f6c94cb8084b96a81baeef..3ddb612b0c459827a7c93e111d04c4ba2e24e2a2 100644
--- a/tests/idltest.ovsschema
+++ b/tests/idltest.ovsschema
@@ -54,6 +54,15 @@
},
"isRoot" : true
},
+ "indexed": {
+ "columns": {
+ "i": {
+ "type": "integer"
+ }
+ },
+ "indexes": [["i"]],
+ "isRoot" : true
+ },
"simple": {
"columns": {
"b": {
@@ -171,6 +180,36 @@
},
"isRoot" : false
},
+ "simple5": {
+ "columns" : {
+ "name": {"type": "string"},
+ "irefmap": {
+ "type": {
+ "key": {"type": "integer"},
+ "value": {"type": "uuid",
+ "refTable": "simple3"},
+ "min": 0,
+ "max": "unlimited"
+ }
+ }
+ },
+ "isRoot": true
+ },
+ "simple6": {
+ "columns" : {
+ "name": {"type": "string"},
+ "weak_ref": {
+ "type": {
+ "key": {"type": "uuid",
+ "refTable": "simple",
+ "refType": "weak"},
+ "min": 0,
+ "max": "unlimited"
+ }
+ }
+ },
+ "isRoot": true
+ },
"singleton" : {
"columns" : {
"name" : {
diff --git a/tests/lacp.at b/tests/lacp.at
index 7b460d7be35ef5212c984a4717b8763d3ec64728..f44331e8592f6e7e405f04635e8f247f7eff6f33 100644
--- a/tests/lacp.at
+++ b/tests/lacp.at
@@ -5,9 +5,9 @@ m4_define([STRIP_RECIRC_ID], [[sed '
s/Recirc-ID.*$//
' ]])
-# Strips out active slave mac address since it may change over time.
-m4_define([STRIP_ACTIVE_SLAVE_MAC], [[sed '
- s/active slave mac.*$//
+# Strips out active member mac address since it may change over time.
+m4_define([STRIP_ACTIVE_MEMBER_MAC], [[sed '
+ s/active member mac.*$//
' ]])
AT_SETUP([lacp - config])
@@ -27,7 +27,7 @@ AT_CHECK([ovs-appctl lacp/show], [0], [dnl
aggregation key: 1
lacp_time: slow
-slave: p1: expired attached
+member: p1: expired attached
port_id: 1
port_priority: 65535
may_enable: false
@@ -78,7 +78,7 @@ AT_CHECK([sed -e 's/aggregation key:.*/aggregation key: /' < stdout], [
aggregation key:
lacp_time: fast
-slave: p1: expired attached
+member: p1: expired attached
port_id: 11
port_priority: 111
may_enable: false
@@ -97,7 +97,7 @@ slave: p1: expired attached
partner key: 0
partner state: timeout
-slave: p2: expired attached
+member: p2: expired attached
port_id: 22
port_priority: 222
may_enable: false
@@ -121,16 +121,18 @@ AT_CHECK([ovs-appctl bond/show], [0], [dnl
bond_mode: active-backup
bond may use recirculation: no, Recirc-ID : -1
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-active slave mac: 00:00:00:00:00:00(none)
+active-backup primary:
+active member mac: 00:00:00:00:00:00(none)
-slave p1: disabled
+member p1: disabled
may_enable: false
-slave p2: disabled
+member p2: disabled
may_enable: false
])
@@ -138,8 +140,8 @@ OVS_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([lacp - negotiation])
-# Create bond0 on br0 with interfaces p0 and p1
-# and bond1 on br1 with interfaces p2 and p3
+# Create bond0 on br0 with members p0 and p1
+# and bond1 on br1 with members p2 and p3
# with p0 patched to p2 and p1 patched to p3.
OVS_VSWITCHD_START(
[add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \
@@ -191,9 +193,9 @@ done
AT_CHECK(
[ovs-appctl lacp/show bond0
ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC
-ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [stdout])
-AT_CHECK([sed '/active slave/d' stdout], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [stdout])
+AT_CHECK([sed '/active member/d' stdout], [0], [dnl
---- bond0 ----
status: active negotiated
sys_id: aa:55:aa:55:00:00
@@ -201,7 +203,7 @@ AT_CHECK([sed '/active slave/d' stdout], [0], [dnl
aggregation key: 2
lacp_time: fast
-slave: p0: current attached
+member: p0: current attached
port_id: 1
port_priority: 65535
may_enable: true
@@ -220,7 +222,7 @@ slave: p0: current attached
partner key: 4
partner state: activity timeout aggregation synchronized collecting distributing
-slave: p1: current attached
+member: p1: current attached
port_id: 2
port_priority: 65535
may_enable: true
@@ -245,7 +247,7 @@ slave: p1: current attached
aggregation key: 4
lacp_time: fast
-slave: p2: current attached
+member: p2: current attached
port_id: 3
port_priority: 65535
may_enable: true
@@ -264,7 +266,7 @@ slave: p2: current attached
partner key: 2
partner state: activity timeout aggregation synchronized collecting distributing
-slave: p3: current attached
+member: p3: current attached
port_id: 4
port_priority: 65535
may_enable: true
@@ -286,36 +288,40 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
+active-backup primary:
-slave p0: enabled
+member p0: enabled
may_enable: true
-slave p1: enabled
+member p1: enabled
may_enable: true
---- bond1 ----
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
+active-backup primary:
-slave p2: enabled
+member p2: enabled
may_enable: true
-slave p3: enabled
+member p3: enabled
may_enable: true
])
-AT_CHECK([grep 'active slave$' stdout], [0], [dnl
- active slave
- active slave
+AT_CHECK([grep 'active member$' stdout], [0], [dnl
+ active member
+ active member
])
# Redirect the patch link between p0 and p2 so that no packets get
@@ -329,8 +335,8 @@ ovs-appctl time/warp 4100 100
AT_CHECK(
[ovs-appctl lacp/show bond0
ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC
-ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [dnl
---- bond0 ----
status: active negotiated
sys_id: aa:55:aa:55:00:00
@@ -338,7 +344,7 @@ ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [d
aggregation key: 2
lacp_time: fast
-slave: p0: expired attached
+member: p0: expired attached
port_id: 1
port_priority: 65535
may_enable: false
@@ -357,7 +363,7 @@ slave: p0: expired attached
partner key: 4
partner state: activity timeout aggregation collecting distributing
-slave: p1: current attached
+member: p1: current attached
port_id: 2
port_priority: 65535
may_enable: true
@@ -382,7 +388,7 @@ slave: p1: current attached
aggregation key: 4
lacp_time: fast
-slave: p2: expired attached
+member: p2: expired attached
port_id: 3
port_priority: 65535
may_enable: false
@@ -401,7 +407,7 @@ slave: p2: expired attached
partner key: 2
partner state: activity timeout aggregation collecting distributing
-slave: p3: current attached
+member: p3: current attached
port_id: 4
port_priority: 65535
may_enable: true
@@ -423,34 +429,38 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-
+active-backup primary:
+
-slave p0: disabled
+member p0: disabled
may_enable: false
-slave p1: enabled
- active slave
+member p1: enabled
+ active member
may_enable: true
---- bond1 ----
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-
+active-backup primary:
+
-slave p2: disabled
+member p2: disabled
may_enable: false
-slave p3: enabled
- active slave
+member p3: enabled
+ active member
may_enable: true
])
@@ -461,8 +471,8 @@ ovs-appctl time/warp 4100 100
AT_CHECK(
[ovs-appctl lacp/show bond0
ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC
-ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [dnl
---- bond0 ----
status: active negotiated
sys_id: aa:55:aa:55:00:00
@@ -470,7 +480,7 @@ ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [d
aggregation key: 2
lacp_time: fast
-slave: p0: defaulted detached
+member: p0: defaulted detached
port_id: 1
port_priority: 65535
may_enable: false
@@ -489,7 +499,7 @@ slave: p0: defaulted detached
partner key: 0
partner state:
-slave: p1: current attached
+member: p1: current attached
port_id: 2
port_priority: 65535
may_enable: true
@@ -514,7 +524,7 @@ slave: p1: current attached
aggregation key: 4
lacp_time: fast
-slave: p2: defaulted detached
+member: p2: defaulted detached
port_id: 3
port_priority: 65535
may_enable: false
@@ -533,7 +543,7 @@ slave: p2: defaulted detached
partner key: 0
partner state:
-slave: p3: current attached
+member: p3: current attached
port_id: 4
port_priority: 65535
may_enable: true
@@ -555,34 +565,38 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-
+active-backup primary:
+
-slave p0: disabled
+member p0: disabled
may_enable: false
-slave p1: enabled
- active slave
+member p1: enabled
+ active member
may_enable: true
---- bond1 ----
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-
+active-backup primary:
+
-slave p2: disabled
+member p2: disabled
may_enable: false
-slave p3: enabled
- active slave
+member p3: enabled
+ active member
may_enable: true
])
@@ -598,8 +612,8 @@ ovs-appctl time/warp 30100 100
AT_CHECK(
[ovs-appctl lacp/show bond0
ovs-appctl lacp/show bond1
-ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC
-ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [dnl
+ovs-appctl bond/show bond0 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC
+ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC ], [0], [dnl
---- bond0 ----
status: active negotiated
sys_id: aa:55:aa:55:00:00
@@ -607,7 +621,7 @@ ovs-appctl bond/show bond1 | STRIP_RECIRC_ID | STRIP_ACTIVE_SLAVE_MAC ], [0], [d
aggregation key: 2
lacp_time: fast
-slave: p0: current attached
+member: p0: current attached
port_id: 1
port_priority: 65535
may_enable: true
@@ -626,7 +640,7 @@ slave: p0: current attached
partner key: 4
partner state: activity timeout aggregation synchronized collecting distributing
-slave: p1: current attached
+member: p1: current attached
port_id: 2
port_priority: 65535
may_enable: true
@@ -651,7 +665,7 @@ slave: p1: current attached
aggregation key: 4
lacp_time: fast
-slave: p2: current attached
+member: p2: current attached
port_id: 3
port_priority: 65535
may_enable: true
@@ -670,7 +684,7 @@ slave: p2: current attached
partner key: 2
partner state: activity timeout aggregation synchronized collecting distributing
-slave: p3: current attached
+member: p3: current attached
port_id: 4
port_priority: 65535
may_enable: true
@@ -692,34 +706,38 @@ slave: p3: current attached
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-
+active-backup primary:
+
-slave p0: enabled
+member p0: enabled
may_enable: true
-slave p1: enabled
- active slave
+member p1: enabled
+ active member
may_enable: true
---- bond1 ----
bond_mode: balance-tcp
bond may use recirculation: yes,
bond-hash-basis: 0
+lb_output action: disabled, bond-id: -1
updelay: 0 ms
downdelay: 0 ms
lacp_status: negotiated
lacp_fallback_ab: false
-
+active-backup primary:
+
-slave p2: enabled
+member p2: enabled
may_enable: true
-slave p3: enabled
- active slave
+member p3: enabled
+ active member
may_enable: true
])
@@ -753,8 +771,8 @@ ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log
# Set miss_send_len to 128, enabling port_status messages to our service connection.
ovs-appctl -t ovs-ofctl ofctl/send 0409000c0123456700000080
-# Create bond0 on br0 with interfaces p0 and p1
-# and bond1 on br1 with interfaces p2 and p3
+# Create bond0 on br0 with members p0 and p1
+# and bond1 on br1 with members p2 and p3
# with p0 patched to p2 and p1 patched to p3.
AT_CHECK([ovs-vsctl add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \
other-config:lacp-time=fast \
@@ -848,8 +866,8 @@ ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log
# Set miss_send_len to 128, enabling port_status messages to our service connection.
ovs-appctl -t ovs-ofctl ofctl/send 0509000c0123456700000080
-# Create bond0 on br0 with interfaces p0 and p1
-# and bond1 on br1 with interfaces p2 and p3
+# Create bond0 on br0 with members p0 and p1
+# and bond1 on br1 with members p2 and p3
# with p0 patched to p2 and p1 patched to p3.
AT_CHECK([ovs-vsctl add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \
other-config:lacp-time=fast \
@@ -943,8 +961,8 @@ ovs-appctl -t ovs-ofctl ofctl/set-output-file monitor.log
# Set miss_send_len to 128, enabling port_status messages to our service connection.
ovs-appctl -t ovs-ofctl ofctl/send 0609000c0123456700000080
-# Create bond0 on br0 with interfaces p0 and p1
-# and bond1 on br1 with interfaces p2 and p3
+# Create bond0 on br0 with members p0 and p1
+# and bond1 on br1 with members p2 and p3
# with p0 patched to p2 and p1 patched to p3.
AT_CHECK([ovs-vsctl add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \
other-config:lacp-time=fast \
diff --git a/tests/library.at b/tests/library.at
index ac4ea4abf28151afb5b4218650d64a82597f3a29..1702b7556bcfafd4818dc4cf3097dd45633fc440 100644
--- a/tests/library.at
+++ b/tests/library.at
@@ -53,7 +53,8 @@ AT_CHECK([ovstest test-packets])
AT_CLEANUP
AT_SETUP([SHA-1])
-AT_CHECK([ovstest test-sha1], [0], [.........
+AT_KEYWORDS([sha1])
+AT_CHECK([ovstest test-sha1], [0], [..........
])
AT_CLEANUP
diff --git a/tests/odp.at b/tests/odp.at
index 3ab9ad62dda293b78c63416f9ffed38d366a25ba..b762ebb2b97edaac6d495dc1c5eec35db995c51c 100644
--- a/tests/odp.at
+++ b/tests/odp.at
@@ -383,6 +383,7 @@ check_pkt_len(size=200,gt(4),le(5))
check_pkt_len(size=200,gt(drop),le(5))
check_pkt_len(size=200,gt(ct(nat)),le(drop))
check_pkt_len(size=200,gt(set(eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15))),le(set(eth(src=00:01:02:03:04:06,dst=10:11:12:13:14:16))))
+lb_output(1)
])
AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0],
[`cat actions.txt`
@@ -397,6 +398,43 @@ odp_actions_from_string: error
])
AT_CLEANUP
+AT_SETUP([OVS datapath actions parsing and formatting - userdata overflow])
+dnl Userdata should fit in a single netlink message, i.e. should be less than
+dnl UINT16_MAX - NLA_HDRLEN = 65535 - 4 = 65531 bytes. OVS should not accept
+dnl larger userdata. OTOH, userdata is part of a nested netlink message, that
+dnl should not be oversized too. 'pid' takes NLA_HDRLEN + 4 = 8 bytes.
+dnl Plus NLA_HDRLEN for the nested header. 'actions' flag takes NLA_HDRLEN = 4
+dnl and 'tunnel_out_port' takes NLA_HDRLEN + 4 = 8 bytes.
+dnl So, for the variant with 'actions' maximum length of userdata should be:
+dnl UINT16_MAX - NLA_HDRLEN - (NLA_HDRLEN + 4) - NLA_HDRLEN - NLA_HDRLEN
+dnl total max nested header pid actions userdata
+dnl Result: 65515 bytes for the actual userdata.
+dnl For the case with 'tunnel_out_port': 65511
+dnl Size of userdata will be rounded up to be multiple of 4, so highest
+dnl acceptable sizes are 65512 and 65508.
+
+dnl String with length 65512 * 2 = 131024 is valid, while 131026 is not.
+data_valid=$( printf '%*s' 131024 | tr ' ' "a")
+data_invalid=$(printf '%*s' 131026 | tr ' ' "a")
+
+echo "userspace(pid=1234567,userdata(${data_valid}),actions)" > actions.txt
+echo "userspace(pid=1234567,userdata(${data_invalid}),actions)" >> actions.txt
+
+dnl String with length 65508 * 2 = 131016 is valid, while 131018 is not.
+data_valid=$( printf '%*s' 131016 | tr ' ' "a")
+data_invalid=$(printf '%*s' 131018 | tr ' ' "a")
+
+echo "userspace(pid=1234567,userdata(${data_valid}),tunnel_out_port=10)" >> actions.txt
+echo "userspace(pid=1234567,userdata(${data_invalid}),tunnel_out_port=10)" >> actions.txt
+
+AT_CHECK_UNQUOTED([ovstest test-odp parse-actions < actions.txt], [0], [dnl
+`cat actions.txt | head -1`
+odp_actions_from_string: error
+`cat actions.txt | head -3 | tail -1`
+odp_actions_from_string: error
+])
+AT_CLEANUP
+
AT_SETUP([OVS datapath keys parsing and formatting - 33 nested encap ])
AT_DATA([odp-in.txt], [dnl
encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap(encap()))))))))))))))))))))))))))))))))
diff --git a/tests/ofp-actions.at b/tests/ofp-actions.at
index 4893280a998f2a1f5572c5ef79250313db18cb42..199db8ed0ff6d7321e469a4e0b2b81a43abd3697 100644
--- a/tests/ofp-actions.at
+++ b/tests/ofp-actions.at
@@ -80,11 +80,11 @@ ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffff0000ffff0000
# actions=multipath(eth_src,50,modulo_n,1,0,NXM_NX_REG0[])
ffff 0020 00002320 000a 0000 0032 0000 0000 0000 0000 0000 0000 001f 00010004
-# actions=bundle(eth_src,0,hrw,ofport,slaves:4,8)
+# actions=bundle(eth_src,0,hrw,ofport,members:4,8)
ffff 0028 00002320 000c 0001 0000 0000 00000002 0002 0000 00000000 00000000 dnl
0004 0008 00000000
-# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],slaves:4,8)
+# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],members:4,8)
ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl
0004 0008 00000000
@@ -316,6 +316,9 @@ ffff 0018 00002320 0031 05dc 000000010004000000000000
# actions=check_pkt_larger(1000)->NXM_NX_XXREG1[4]
ffff 0018 00002320 0031 03e8 00040001e010000000000000
+# actions=delete_field:tun_metadata10
+ffff 0018 00002320 0032 00 01 64 7c 00 00 00 00 000000000000
+
])
sed '/^[[#&]]/d' < test-data > input.txt
sed -n 's/^# //p; /^$/p' < test-data > expout
@@ -441,11 +444,11 @@ ffff 0020 00002320 0016 000000000000 fedcba9876543210 ffffffffffffffff
# actions=multipath(eth_src,50,modulo_n,1,0,NXM_NX_REG0[])
ffff 0020 00002320 000a 0000 0032 0000 0000 0000 0000 0000 0000 001f 00010004
-# actions=bundle(eth_src,0,hrw,ofport,slaves:4,8)
+# actions=bundle(eth_src,0,hrw,ofport,members:4,8)
ffff 0028 00002320 000c 0001 0000 0000 00000002 0002 0000 00000000 00000000 dnl
0004 0008 00000000
-# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],slaves:4,8)
+# actions=bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[],members:4,8)
ffff 0028 00002320 000d 0001 0000 0000 00000002 0002 001f 00010004 00000000 dnl
0004 0008 00000000
@@ -766,6 +769,17 @@ dnl Check OpenFlow v1.3.4 Conformance Test: 430.510.
& 00000010 00 00 00 10 00 00 00 01-
0019 0010 80000807 000102030405 000000000010 00000001
+dnl Check NSH encap (experimenter extension).
+# actions=encap(nsh(md_type=1))
+ffff 0018 00002320 002e 0000 0001894f 0004 01 05 01 000000
+
+dnl NSH encap with non-zero padding.
+# actions=encap(nsh(md_type=1))
+# 21: 12 -> 00
+# 22: 34 -> 00
+# 23: 56 -> 00
+ffff 0018 00002320 002e 0000 0001894f 0004 01 05 01 123456
+
])
sed '/^[[#&]]/d' < test-data > input.txt
sed -n 's/^# //p; /^$/p' < test-data > expout
@@ -944,17 +958,17 @@ bad_action 'enqueue:asdf:123' 'asdf: enqueue to unknown port'
# bundle
bad_action 'bundle:123' '123: not enough arguments to bundle action'
bad_action 'bundle(symmetric_l4,60,hrw,ofport,ports:1,2,3,4,5)' \
- "symmetric_l4,60,hrw,ofport,ports:1,2,3,4,5: missing slave delimiter, expected \`slaves' got \`ports'"
-bad_action 'bundle(symmetric_l4,60,hrw,ofport,slaves:xyzzy,2,3,4,5)' \
+ "symmetric_l4,60,hrw,ofport,ports:1,2,3,4,5: missing member delimiter, expected \`members', got \`ports'"
+bad_action 'bundle(symmetric_l4,60,hrw,ofport,members:xyzzy,2,3,4,5)' \
'xyzzy: bad port number'
-bad_action 'bundle(asymmetric_l4,60,hrw,ofport,slaves:1,2,3,4,5)' \
- "asymmetric_l4,60,hrw,ofport,slaves:1,2,3,4,5: unknown fields \`asymmetric_l4'"
-bad_action 'bundle(symmetric_l4,60,hrt,ofport,slaves:1,2,3,4,5)' \
- "symmetric_l4,60,hrt,ofport,slaves:1,2,3,4,5: unknown algorithm \`hrt'"
-bad_action 'bundle(symmetric_l4,60,hrw,odpport,slaves:1,2,3,4,5)' \
- "symmetric_l4,60,hrw,odpport,slaves:1,2,3,4,5: unknown slave_type \`odpport'"
-bad_action 'bundle_load(symmetric_l4,60,hrw,ofport,actset_output,slaves:1,2,3,4,5)' \
- "symmetric_l4,60,hrw,ofport,actset_output,slaves:1,2,3,4,5: experimenter OXM field 'actset_output' not supported"
+bad_action 'bundle(asymmetric_l4,60,hrw,ofport,members:1,2,3,4,5)' \
+ "asymmetric_l4,60,hrw,ofport,members:1,2,3,4,5: unknown fields \`asymmetric_l4'"
+bad_action 'bundle(symmetric_l4,60,hrt,ofport,members:1,2,3,4,5)' \
+ "symmetric_l4,60,hrt,ofport,members:1,2,3,4,5: unknown algorithm \`hrt'"
+bad_action 'bundle(symmetric_l4,60,hrw,odpport,members:1,2,3,4,5)' \
+ "symmetric_l4,60,hrw,odpport,members:1,2,3,4,5: unknown member_type \`odpport'"
+bad_action 'bundle_load(symmetric_l4,60,hrw,ofport,actset_output,members:1,2,3,4,5)' \
+ "symmetric_l4,60,hrw,ofport,actset_output,members:1,2,3,4,5: experimenter OXM field 'actset_output' not supported"
# mod_vlan_vid
bad_action 'mod_vlan_vid:6000' '6000: not a valid VLAN VID'
diff --git a/tests/ofp-print.at b/tests/ofp-print.at
index dd6410b11902d9d886441bbed4e1d89fbc773db6..2c7e163bd6025f778bd637f86edcc465537494de 100644
--- a/tests/ofp-print.at
+++ b/tests/ofp-print.at
@@ -2816,7 +2816,8 @@ AT_CLEANUP
AT_SETUP([OFPT_SET_ASYNC - OF1.3])
AT_KEYWORDS([ofp-print])
-dnl This message has bit 12 set for the PACKET_IN messages (master and slave).
+dnl This message has bit 12 set for the PACKET_IN messages (primary and
+dnl secondary).
dnl Those aren't supported bits so they get silently ignored on decoding.
dnl That seems reasonable because OF1.3 doesn't define any error codes for
dnl OFPT_SET_ASYNC.
@@ -2825,7 +2826,7 @@ AT_CHECK([ovs-ofctl ofp-print "\
00 00 00 03 00 00 00 07 00 00 00 00 00 00 00 03 \
"], [0], [dnl
OFPT_SET_ASYNC (OF1.3) (xid=0x0):
- master:
+ primary:
PACKET_IN: no_match invalid_ttl
PORT_STATUS: add delete
FLOW_REMOVED: (off)
@@ -2833,7 +2834,7 @@ OFPT_SET_ASYNC (OF1.3) (xid=0x0):
TABLE_STATUS: (off)
REQUESTFORWARD: (off)
- slave:
+ secondary:
PACKET_IN: no_match action invalid_ttl
PORT_STATUS: add delete modify
FLOW_REMOVED: idle hard
@@ -2849,7 +2850,7 @@ AT_CHECK([ovs-ofctl ofp-print "\
03 18 00 18 00 00 00 02 00 00 00 02 00 00 00 00 \
00 00 00 00 00 00 00 03 \
"], [0], [dnl
-OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=master generation_id=3
+OFPT_ROLE_REQUEST (OF1.2) (xid=0x2): role=primary generation_id=3
])
AT_CLEANUP
@@ -2869,7 +2870,7 @@ AT_CHECK([ovs-ofctl ofp-print "\
01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0a \
00 00 00 01 \
"], [0], [dnl
-NXT_ROLE_REQUEST (xid=0x2): role=master
+NXT_ROLE_REQUEST (xid=0x2): role=primary
])
AT_CLEANUP
@@ -2879,7 +2880,7 @@ AT_CHECK([ovs-ofctl ofp-print "\
03 19 00 18 00 00 00 02 00 00 00 03 00 00 00 00 \
12 34 56 78 ab cd ef 90 \
"], [0], [dnl
-OFPT_ROLE_REPLY (OF1.2) (xid=0x2): role=slave generation_id=1311768467750121360
+OFPT_ROLE_REPLY (OF1.2) (xid=0x2): role=secondary generation_id=1311768467750121360
])
AT_CLEANUP
@@ -2889,67 +2890,67 @@ AT_CHECK([ovs-ofctl ofp-print "\
01 04 00 14 00 00 00 02 00 00 23 20 00 00 00 0b \
00 00 00 02 \
"], [0], [dnl
-NXT_ROLE_REPLY (xid=0x2): role=slave
+NXT_ROLE_REPLY (xid=0x2): role=secondary
])
AT_CLEANUP
-AT_SETUP([OFP_ROLE_STATUS - master, experimenter - OF1.3])
+AT_SETUP([OFP_ROLE_STATUS - primary, experimenter - OF1.3])
AT_KEYWORDS([ofp-print])
AT_CHECK([ovs-ofctl ofp-print "\
04 04 00 20 00 00 00 0a 4f 4e 46 00 00 00 07 77 \
00 00 00 02 02 00 00 00 ff ff ff ff ff ff ff ff \
"], [0], [dnl
-ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=master reason=experimenter_data_changed
+ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=primary reason=experimenter_data_changed
])
AT_CLEANUP
-AT_SETUP([OFP_ROLE_STATUS - master, config - OF1.3])
+AT_SETUP([OFP_ROLE_STATUS - primary, config - OF1.3])
AT_KEYWORDS([ofp-print])
AT_CHECK([ovs-ofctl ofp-print "\
04 04 00 20 00 00 00 0a 4f 4e 46 00 00 00 07 77 \
00 00 00 02 01 00 00 00 ff ff ff ff ff ff ff ff \
"], [0], [dnl
-ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=master reason=configuration_changed
+ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=primary reason=configuration_changed
])
AT_CLEANUP
-AT_SETUP([OFP_ROLE_STATUS - master, config,generation - OF1.3])
+AT_SETUP([OFP_ROLE_STATUS - primary, config,generation - OF1.3])
AT_KEYWORDS([ofp-print])
AT_CHECK([ovs-ofctl ofp-print "\
04 04 00 20 00 00 00 0a 4f 4e 46 00 00 00 07 77 \
00 00 00 02 01 00 00 00 00 00 00 00 00 00 00 10 \
"], [0], [dnl
-ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=master generation_id=16 reason=configuration_changed
+ONFT_ROLE_STATUS (OF1.3) (xid=0xa): role=primary generation_id=16 reason=configuration_changed
])
AT_CLEANUP
-AT_SETUP([OFP_ROLE_STATUS - master, experimenter - OF1.4])
+AT_SETUP([OFP_ROLE_STATUS - primary, experimenter - OF1.4])
AT_KEYWORDS([ofp-print])
AT_CHECK([ovs-ofctl ofp-print "\
05 1e 00 18 00 00 00 0a \
00 00 00 02 02 00 00 00 ff ff ff ff ff ff ff ff \
"], [0], [dnl
-OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=master reason=experimenter_data_changed
+OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=primary reason=experimenter_data_changed
])
AT_CLEANUP
-AT_SETUP([OFP_ROLE_STATUS - master, config - OF1.4])
+AT_SETUP([OFP_ROLE_STATUS - primary, config - OF1.4])
AT_KEYWORDS([ofp-print])
AT_CHECK([ovs-ofctl ofp-print "\
05 1e 00 18 00 00 00 0a \
00 00 00 02 01 00 00 00 ff ff ff ff ff ff ff ff \
"], [0], [dnl
-OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=master reason=configuration_changed
+OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=primary reason=configuration_changed
])
AT_CLEANUP
-AT_SETUP([OFP_ROLE_STATUS - master, config,generation - OF1.4])
+AT_SETUP([OFP_ROLE_STATUS - primary, config,generation - OF1.4])
AT_KEYWORDS([ofp-print])
AT_CHECK([ovs-ofctl ofp-print "\
05 1e 00 18 00 00 00 0a \
00 00 00 02 01 00 00 00 00 00 00 00 00 00 00 10 \
"], [0], [dnl
-OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=master generation_id=16 reason=configuration_changed
+OFPT_ROLE_STATUS (OF1.4) (xid=0xa): role=primary generation_id=16 reason=configuration_changed
])
AT_CLEANUP
@@ -3156,7 +3157,7 @@ AT_CLEANUP
AT_SETUP([NXT_SET_ASYNC_CONFIG])
AT_KEYWORDS([ofp-print])
-dnl This message has bit 12 set for the PACKET_IN messages (master and slave).
+dnl This message has bit 12 set for the PACKET_IN messages (primary and secondary).
dnl Those aren't supported bits so they get silently ignored on decoding.
AT_CHECK([ovs-ofctl ofp-print "\
01 04 00 28 00 00 00 00 00 00 23 20 00 00 00 13 \
@@ -3164,7 +3165,7 @@ AT_CHECK([ovs-ofctl ofp-print "\
00 00 00 00 00 00 00 03 \
"], [0], [dnl
NXT_SET_ASYNC_CONFIG (xid=0x0):
- master:
+ primary:
PACKET_IN: no_match invalid_ttl
PORT_STATUS: add delete
FLOW_REMOVED: (off)
@@ -3172,7 +3173,7 @@ NXT_SET_ASYNC_CONFIG (xid=0x0):
TABLE_STATUS: (off)
REQUESTFORWARD: (off)
- slave:
+ secondary:
PACKET_IN: no_match action invalid_ttl
PORT_STATUS: add delete modify
FLOW_REMOVED: idle hard
@@ -3191,7 +3192,7 @@ AT_CHECK([ovs-ofctl ofp-print "\
00 05 00 08 00 00 00 05 \
"], [0], [dnl
OFPT_SET_ASYNC (OF1.4) (xid=0x2):
- master:
+ primary:
PACKET_IN: action
PORT_STATUS: add modify
FLOW_REMOVED: idle delete
@@ -3199,7 +3200,7 @@ OFPT_SET_ASYNC (OF1.4) (xid=0x2):
TABLE_STATUS: (off)
REQUESTFORWARD: (off)
- slave:
+ secondary:
PACKET_IN: no_match invalid_ttl
PORT_STATUS: delete
FLOW_REMOVED: delete group_delete meter_delete
diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at
index ff1cc93707b82c7519172391ba3aa3ca1a3fcdff..31064ed95e2899dd43715d1a6a4a387ec8b4e63e 100644
--- a/tests/ofproto-dpif.at
+++ b/tests/ofproto-dpif.at
@@ -29,12 +29,16 @@ AT_CHECK([ovs-appctl revalidator/wait])
OVS_VSWITCHD_STOP
AT_CLEANUP
-AT_SETUP([ofproto-dpif - active-backup bonding])
-# Create br0 with interfaces p1, p2 and p7, creating bond0 with p1 and p2
-# and br1 with interfaces p3, p4 and p8.
-# toggle p1,p2 of bond0 up and down to test bonding in active-backup mode.
+AT_SETUP([ofproto-dpif - active-backup bonding (with primary)])
+
+dnl Create br0 with members p1, p2 and p7, creating bond0 with p1 and
+dnl p2 (p1 as primary) and br1 with members p3, p4 and p8.
+dnl toggle p1,p2 of bond0 up and down to test bonding in active-backup mode.
+dnl With p1 down and p2 up/active, bring p1 back up. Since p1 is the primary,
+dnl it should become active.
OVS_VSWITCHD_START(
- [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\
+ [add-bond br0 bond0 p1 p2 bond_mode=active-backup \
+ other_config:bond-primary=p1 -- \
set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
@@ -45,8 +49,228 @@ OVS_VSWITCHD_START(
add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \
add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \
add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
WAIT_FOR_DUMMY_PORTS([p3], [p4])
+OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: p1'`"])
+
+
+AT_CHECK([ovs-ofctl add-flow br0 action=normal])
+AT_CHECK([ovs-ofctl add-flow br1 action=normal])
+ovs-appctl netdev-dummy/set-admin-state up
+ovs-appctl time/warp 100
+ovs-appctl netdev-dummy/set-admin-state p2 down
+ovs-appctl time/stop
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.3,dst=10.0.0.4,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+ovs-appctl time/warp 100
+ovs-appctl netdev-dummy/set-admin-state p2 up
+ovs-appctl netdev-dummy/set-admin-state p1 down
+ovs-appctl time/warp 100
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+ovs-appctl time/warp 2000 100
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'in_port([[348]])' | strip_xout], [0], [dnl
+recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions:
+])
+
+ovs-appctl netdev-dummy/set-admin-state p1 up
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+---- bond0 ----
+bond_mode: active-backup
+bond may use recirculation: no,
+bond-hash-basis: 0
+updelay: 0 ms
+downdelay: 0 ms
+lacp_status: off
+lacp_fallback_ab: false
+active-backup primary: p1
+
+
+member p1: enabled
+ active member
+ may_enable: true
+
+member p2: enabled
+ may_enable: true
+
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - active-backup bonding (primary validation)])
+dnl Make a switch with 3 ports in a bond, so that when we delete one of
+dnl the ports from the bond, there are still 2 ports left and the bond
+dnl remains functional.
+OVS_VSWITCHD_START(
+ [add-bond br0 bond0 p1 p2 p3 bond_mode=active-backup \
+ other_config:bond-primary=p1 -- \
+ set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+ set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+ set interface p3 type=dummy options:pstream=punix:$OVS_RUNDIR/p3.sock ofport_request=3 -- \
+ add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy --])
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+
+dnl Make sure the initial primary member is set
+OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: p1'`"])
+
+dnl Down the primary member and verify that we switched. Then
+dnl bring the primary back and verify that we switched back to the
+dnl primary.
+ovs-appctl netdev-dummy/set-admin-state p1 down
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'member p1: disabled'`"])
+ovs-appctl netdev-dummy/set-admin-state p1 up
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+---- bond0 ----
+bond_mode: active-backup
+bond may use recirculation: no,
+bond-hash-basis: 0
+updelay: 0 ms
+downdelay: 0 ms
+lacp_status: off
+lacp_fallback_ab: false
+active-backup primary: p1
+
+
+member p1: enabled
+ active member
+ may_enable: true
+
+member p2: enabled
+ may_enable: true
+
+member p3: enabled
+ may_enable: true
+
+])
+
+dnl Now delete the primary and verify that the output shows that the
+dnl primary is no longer an member
+ovs-vsctl --id=@p1 get Interface p1 -- remove Port bond0 interfaces @p1
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | fgrep 'active-backup primary: p1 (no such member)'`"])
+
+dnl Now re-add the primary and verify that the output shows that the
+dnl primary is available again.
+dnl
+dnl First, get the UUIDs of the members that exist on bond0.
+dnl Strip the trailing ] so that we can add a new UUID to the end.
+uuids=`ovs-vsctl get Port bond0 interfaces | sed -e 's/]//'`
+dnl Create a new port "p1" and add its UUID to the set of members
+dnl on bond0.
+ovs-vsctl \
+ --id=@p1 create Interface name=p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+ set Port bond0 interfaces="$uuids, @p1]"
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+---- bond0 ----
+bond_mode: active-backup
+bond may use recirculation: no,
+bond-hash-basis: 0
+updelay: 0 ms
+downdelay: 0 ms
+lacp_status: off
+lacp_fallback_ab: false
+active-backup primary: p1
+
+
+member p1: enabled
+ active member
+ may_enable: true
+
+member p2: enabled
+ may_enable: true
+
+member p3: enabled
+ may_enable: true
+
+])
+
+dnl Switch to another primary
+ovs-vsctl set port bond0 other_config:bond-primary=p2
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+---- bond0 ----
+bond_mode: active-backup
+bond may use recirculation: no,
+bond-hash-basis: 0
+updelay: 0 ms
+downdelay: 0 ms
+lacp_status: off
+lacp_fallback_ab: false
+active-backup primary: p2
+
+
+member p1: enabled
+ active member
+ may_enable: true
+
+member p2: enabled
+ may_enable: true
+
+member p3: enabled
+ may_enable: true
+
+])
+
+dnl Remove the "bond-primary" config directive from the bond.
+AT_CHECK([ovs-vsctl remove Port bond0 other_config bond-primary])
+ovs-appctl time/warp 100
+OVS_WAIT_UNTIL([ovs-appctl bond/show | STRIP_RECIRC_ID | STRIP_ACTIVE_MEMBER_MAC], [0], [dnl
+---- bond0 ----
+bond_mode: active-backup
+bond may use recirculation: no,
+bond-hash-basis: 0
+updelay: 0 ms
+downdelay: 0 ms
+lacp_status: off
+lacp_fallback_ab: false
+active-backup primary:
+
+
+member p1: enabled
+ active member
+ may_enable: true
+
+member p2: enabled
+ may_enable: true
+
+member p3: enabled
+ may_enable: true
+
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([ofproto-dpif - active-backup bonding (without primary)])
+dnl Create br0 with members p1, p2 and p7, creating bond0 with p1 and p2
+dnl and br1 with members p3, p4 and p8.
+dnl toggle p1,p2 of bond0 up and down to test bonding in active-backup mode.
+OVS_VSWITCHD_START(
+ [add-bond br0 bond0 p1 p2 bond_mode=active-backup --\
+ set interface p1 type=dummy options:pstream=punix:$OVS_RUNDIR/p1.sock ofport_request=1 -- \
+ set interface p2 type=dummy options:pstream=punix:$OVS_RUNDIR/p2.sock ofport_request=2 -- \
+ add-port br0 p7 -- set interface p7 ofport_request=7 type=dummy -- \
+ add-br br1 -- \
+ set bridge br1 other-config:hwaddr=aa:66:aa:66:00:00 -- \
+ set bridge br1 datapath-type=dummy other-config:datapath-id=1234 \
+ fail-mode=secure -- \
+ add-port br1 p3 -- set interface p3 type=dummy options:stream=unix:$OVS_RUNDIR/p1.sock ofport_request=3 -- \
+ add-port br1 p4 -- set interface p4 type=dummy options:stream=unix:$OVS_RUNDIR/p2.sock ofport_request=4 -- \
+ add-port br1 p8 -- set interface p8 ofport_request=8 type=dummy --])
AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+WAIT_FOR_DUMMY_PORTS([p3], [p4])
+OVS_WAIT_UNTIL([test -n "`ovs-appctl bond/show | grep 'active-backup primary: '`"])
AT_CHECK([ovs-ofctl add-flow br0 action=normal])
AT_CHECK([ovs-ofctl add-flow br1 action=normal])
@@ -63,22 +287,21 @@ ovs-appctl netdev-dummy/set-admin-state p1 down
ovs-appctl time/warp 100
AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(src=10.0.0.5,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
AT_CHECK([ovs-appctl netdev-dummy/receive p7 'in_port(7),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
-ovs-appctl time/warp 200 100
-sleep 1
-AT_CHECK([grep 'in_port([[348]])' ovs-vswitchd.log | filter_flow_install | strip_xout], [0], [dnl
-recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), actions:
-recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), actions:
-recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(frag=no), actions:
-recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(frag=no), actions:
-recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), actions:
-recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), actions:
+ovs-appctl time/warp 2000 100
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep 'in_port([[348]])' | strip_xout], [0], [dnl
+recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(3),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0d),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0e),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions:
+recirc_id(0),in_port(4),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b,dst=ff:ff:ff:ff:ff:ff),eth_type(0x8035), packets:0, bytes:0, used:never, actions:
])
OVS_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([ofproto-dpif - balance-slb bonding])
-# Create br0 with interfaces bond0(p1, p2, p3) and p7,
-# and br1 with interfaces p4, p5, p6 and p8.
+# Create br0 with members bond0(p1, p2, p3) and p7,
+# and br1 with members p4, p5, p6 and p8.
# p1 <-> p4, p2 <-> p5, p3 <-> p6
# Send some traffic, make sure the traffic are spread based on source mac.
OVS_VSWITCHD_START(
@@ -120,8 +343,8 @@ OVS_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([ofproto-dpif - balance-tcp bonding])
-# Create br0 with interfaces bond0(p1, p2, p3) and p7,
-# and br1 with interfaces bond1(p4, p5, p6) and p8.
+# Create br0 with members bond0(p1, p2, p3) and p7,
+# and br1 with members bond1(p4, p5, p6) and p8.
# bond0 <-> bond1
# Send some traffic, make sure the traffic are spread based on L4 headers.
OVS_VSWITCHD_START(
@@ -152,6 +375,8 @@ ovs-appctl time/stop
ovs-appctl time/warp 100
ovs-appctl lacp/show > lacp.txt
ovs-appctl bond/show > bond.txt
+# Check that lb_output is not enabled by default.
+AT_CHECK([grep -q '^lb_output action: disabled' bond.txt])
(
for i in `seq 0 255` ;
do
@@ -164,9 +389,36 @@ AT_CHECK([ovs-appctl dpif/dump-flows br0 |grep tcp > br0_flows.txt])
AT_CHECK([ovs-appctl dpif/dump-flows br1 |grep tcp > br1_flows.txt])
# Make sure there is resonable distribution to all three ports.
# We don't want to make this check precise, in case hash function changes.
-AT_CHECK([test `grep in_port.4 br1_flows.txt |wc -l` -gt 24])
-AT_CHECK([test `grep in_port.5 br1_flows.txt |wc -l` -gt 24])
-AT_CHECK([test `grep in_port.6 br1_flows.txt |wc -l` -gt 24])
+AT_CHECK([test $(grep -c in_port.4 br1_flows.txt) -gt 24])
+AT_CHECK([test $(grep -c in_port.5 br1_flows.txt) -gt 24])
+AT_CHECK([test $(grep -c in_port.6 br1_flows.txt) -gt 24])
+# Check that bonding is doing dp_hash.
+AT_CHECK([grep -q dp_hash br0_flows.txt])
+# Enabling lb_output.
+AT_CHECK([ovs-vsctl set Port bond0 other_config:lb-output-action=true])
+OVS_WAIT_UNTIL([ovs-appctl bond/show | grep -q '^lb_output action: enabled'])
+ovs-appctl time/warp 10000 500
+ovs-appctl revalidator/wait
+OVS_WAIT_WHILE([ovs-appctl dpif/dump-flows br1 | grep -q tcp])
+(
+for i in $(seq 256) ;
+ do
+ pkt="in_port(7),eth(src=50:54:00:00:00:05,dst=50:54:00:00:01:00),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=6,tos=0,ttl=64,frag=no),tcp(src=8,dst=$i),tcp_flags(ack)"
+ AT_CHECK([ovs-appctl netdev-dummy/receive p7 $pkt])
+ done
+)
+ovs-appctl time/warp 300 100
+AT_CHECK([ovs-appctl dpif/dump-flows br0 | grep tcp > br0_flows.txt])
+AT_CHECK([ovs-appctl dpif/dump-flows br1 | grep tcp > br1_flows.txt])
+# Make sure there is resonable distribution to all three ports, again.
+AT_CHECK([test $(grep -c in_port.4 br1_flows.txt) -gt 24])
+AT_CHECK([test $(grep -c in_port.5 br1_flows.txt) -gt 24])
+AT_CHECK([test $(grep -c in_port.6 br1_flows.txt) -gt 24])
+AT_CHECK([grep -q lb_output br0_flows.txt])
+
+AT_CHECK([test $(ovs-appctl dpif-netdev/bond-show | grep -c bucket) -eq 256])
+AT_CHECK([ovs-vsctl set Port bond0 other_config:lb-output-action=false])
+OVS_WAIT_UNTIL([test -z "$(ovs-appctl dpif-netdev/bond-show)"])
OVS_VSWITCHD_STOP()
AT_CLEANUP
@@ -1933,7 +2185,7 @@ cookie=0xd dl_src=60:66:66:66:00:02 actions=pop_mpls:0x0800,load:0xa000001->OXM_
cookie=0xd dl_src=60:66:66:66:00:03 actions=pop_mpls:0x0800,move:OXM_OF_IPV4_DST[[]]->OXM_OF_IPV4_SRC[[]],controller
cookie=0xd dl_src=60:66:66:66:00:04 actions=pop_mpls:0x0800,push:OXM_OF_IPV4_DST[[]],pop:OXM_OF_IPV4_SRC[[]],controller
cookie=0xd dl_src=60:66:66:66:00:05 actions=pop_mpls:0x0800,multipath(eth_src,50,modulo_n,1,0,OXM_OF_IPV4_SRC[[0..7]]),controller
-cookie=0xd dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,OXM_OF_IPV4_SRC[[0..15]],slaves:1,2),controller
+cookie=0xd dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,OXM_OF_IPV4_SRC[[0..15]],members:1,2),controller
cookie=0xd dl_src=60:66:66:66:00:07 actions=pop_mpls:0x0800,learn(table=1,hard_timeout=60,eth_type=0x800,nw_proto=6,OXM_OF_IPV4_SRC[[]]=OXM_OF_IPV4_DST[[]]),controller
cookie=0xd dl_src=60:66:66:66:00:08 actions=pop_mpls:0x0806,resubmit(1,1)
@@ -2931,7 +3183,7 @@ AT_CHECK([ovs-ofctl dump-flows br0 | ofctl_strip | sort], [0], [dnl
cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:03 actions=pop_mpls:0x0800,move:NXM_OF_IP_DST[[]]->NXM_OF_IP_SRC[[]],CONTROLLER:65535
cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:04 actions=pop_mpls:0x0800,push:NXM_OF_IP_DST[[]],pop:NXM_OF_IP_SRC[[]],CONTROLLER:65535
cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:05 actions=pop_mpls:0x0800,multipath(eth_src,50,modulo_n,1,0,NXM_OF_IP_SRC[[0..7]]),CONTROLLER:65535
- cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,NXM_OF_IP_SRC[[0..15]],slaves:1,2),CONTROLLER:65535
+ cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:06 actions=pop_mpls:0x0800,bundle_load(eth_src,50,hrw,ofport,NXM_OF_IP_SRC[[0..15]],members:1,2),CONTROLLER:65535
cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:07 actions=pop_mpls:0x0800,learn(table=1,hard_timeout=60,eth_type=0x800,nw_proto=6,NXM_OF_IP_SRC[[]]=NXM_OF_IP_DST[[]]),CONTROLLER:65535
cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:09 actions=resubmit(,2),CONTROLLER:65535
cookie=0xd, n_packets=3, n_bytes=186, dl_src=60:66:66:66:00:0a actions=pop_mpls:0x0800,mod_nw_dst:10.0.0.1,CONTROLLER:65535
@@ -3149,13 +3401,13 @@ AT_CHECK([ovs-ofctl --protocols=OpenFlow13 add-flow br0 'priority=0 actions=outp
dnl Singleton controller action.
AT_CHECK([ovs-ofctl monitor -P standard --protocols=OpenFlow13 br0 65534 --detach --no-chdir --pidfile 2> ofctl_monitor.log])
-# Become slave (OF 1.3), which should disable everything except port status.
+# Become secondary (OF 1.3), which should disable everything except port status.
ovs-appctl -t ovs-ofctl ofctl/send 041800180000000200000003000000000000000000000001
# Ensure that ovs-vswitchd gets a chance to reply before sending another command.
ovs-appctl time/warp 500 100
-# Use OF 1.3 OFPT_SET_ASYNC to enable OFPR_NO_MATCH for slave only.
+# Use OF 1.3 OFPT_SET_ASYNC to enable OFPR_NO_MATCH for secondary only.
ovs-appctl -t ovs-ofctl ofctl/send 041c002000000002000000000000000100000000000000000000000000000000
ovs-appctl time/warp 500 100
@@ -3167,11 +3419,11 @@ OVS_APP_EXIT_AND_WAIT([ovs-ofctl])
AT_CHECK([ovs-appctl revalidator/purge], [0])
AT_CHECK([cat ofctl_monitor.log], [0], [dnl
-send: OFPT_ROLE_REQUEST (OF1.3) (xid=0x2): role=slave generation_id=1
-OFPT_ROLE_REPLY (OF1.3) (xid=0x2): role=slave generation_id=1
+send: OFPT_ROLE_REQUEST (OF1.3) (xid=0x2): role=secondary generation_id=1
+OFPT_ROLE_REPLY (OF1.3) (xid=0x2): role=secondary generation_id=1
dnl
send: OFPT_SET_ASYNC (OF1.3) (xid=0x2):
- master:
+ primary:
PACKET_IN: (off)
PORT_STATUS: (off)
FLOW_REMOVED: (off)
@@ -3179,7 +3431,7 @@ send: OFPT_SET_ASYNC (OF1.3) (xid=0x2):
TABLE_STATUS: (off)
REQUESTFORWARD: (off)
- slave:
+ secondary:
PACKET_IN: no_match
PORT_STATUS: (off)
FLOW_REMOVED: (off)
@@ -5171,6 +5423,36 @@ AT_CHECK_UNQUOTED([tail -1 stdout], [0], [Datapath actions: 2
OVS_VSWITCHD_STOP
AT_CLEANUP
+# Checks for regression against a bug in which OVS dropped packets
+# with in_port=CONTROLLER when they were recirculated (because
+# CONTROLLER isn't a real port and could not be looked up).
+AT_SETUP([ofproto-dpif - packet-out recirculation])
+OVS_VSWITCHD_START
+add_of_ports br0 1 2
+
+AT_DATA([flows.txt], [dnl
+table=0 ip actions=mod_dl_dst:83:83:83:83:83:83,ct(table=1)
+table=1 ip actions=ct(commit),output:2
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+packet=ffffffffffff00102030405008004500001c00000000401100000a000002ffffffff0035111100080000
+AT_CHECK([ovs-ofctl packet-out br0 "in_port=controller packet=$packet actions=table"])
+
+# Dumps out the flow table, extracts the number of packets that have gone
+# through the (single) flow in table 1, and returns success if it's exactly 1.
+#
+# If this remains 0, then the recirculation isn't working properly since the
+# packet never goes through flow in table 1.
+check_flows () {
+ n=$(ovs-ofctl dump-flows br0 table=1 | sed -n 's/.*n_packets=\([[0-9]]\{1,\}\).*/\1/p')
+ echo "n_packets=$n"
+ test "$n" = 1
+}
+OVS_WAIT_UNTIL([check_flows], [ovs dump-flows br0])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
AT_SETUP([ofproto-dpif - debug_slow action])
OVS_VSWITCHD_START
@@ -8352,8 +8634,8 @@ OVS_VSWITCHD_STOP
AT_CLEANUP
AT_SETUP([ofproto-dpif megaflow - normal, balance-tcp bonding])
-# Create bond0 on br0 with interfaces p0 and p1
-# and bond1 on br1 with interfaces p2 and p3
+# Create bond0 on br0 with members p0 and p1
+# and bond1 on br1 with members p2 and p3
# with p0 patched to p2 and p1 patched to p3.
OVS_VSWITCHD_START(
[add-bond br0 bond0 p0 p1 bond_mode=balance-tcp lacp=active \
@@ -8632,6 +8914,29 @@ recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(dst=50:54:00:00:00:0c),eth_ty
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif megaflow - set dl_dst with match on dl_src])
+OVS_VSWITCHD_START
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg])
+add_of_ports br0 1 2
+AT_DATA([flows.txt], [dnl
+table=0 in_port=1,dl_src=50:54:00:00:00:09 actions=mod_dl_dst(50:54:00:00:00:0a),output(2)
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.6,dst=10.0.0.5,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0)'])
+sleep 1
+dnl The first packet is essentially a no-op, as the new destination MAC is the
+dnl same as the original. The second entry actually updates the destination
+dnl MAC. The last one must be dropped as it doesn't match with dl_src.
+AT_CHECK([strip_ufid < ovs-vswitchd.log | filter_flow_install | strip_used], [0], [dnl
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(frag=no), actions:2
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(frag=no), actions:set(eth(dst=50:54:00:00:00:0a)),2
+recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth(src=50:54:00:00:00:0b),eth_type(0x0800),ipv4(frag=no), actions:drop
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
m4_define([OFPROTO_DPIF_MEGAFLOW_DISABLED],
[AT_SETUP([ofproto-dpif megaflow - disabled$1])
OVS_VSWITCHD_START([], [], [], [m4_if([$1], [], [], [--dummy-numa="0,0,0,0,1,1,1,1"])])
@@ -8718,7 +9023,7 @@ AT_CHECK([ovs-appctl bfd/show | sed -n '/^.*Session State:.*/p'], [0], [dnl
Local Session State: up
Remote Session State: up
])
-# bond/show should show 'may-enable: true' for all slaves.
+# bond/show should show 'may-enable: true' for all members.
AT_CHECK([ovs-appctl bond/show | sed -n '/^.*may_enable:.*/p'], [0], [dnl
may_enable: true
may_enable: true
@@ -9466,7 +9771,7 @@ dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers
dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets
dnl get the ACL lookup using the conntrack tuple and the inverted direction.
dnl RELATED packets get ACL lookup using the conntrack tuple in the direction
-dnl of the master connection, as storted in ct_mark.
+dnl of the parent connection, as storted in ct_mark.
dnl
dnl Incoming non-related packet in the original direction (ACL IN)
table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5
@@ -9477,7 +9782,7 @@ table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit(
dnl Outgoing non-related reply packet (CT ACL IN)
table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4
dnl
-dnl Related packet (CT ACL in the direction of the master connection.)
+dnl Related packet (CT ACL in the direction of the parent connection.)
table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_MARK[[]]->NXM_NX_REG0[[]],resubmit(,3,ct),goto_table:4
dnl Drop everything else.
table=1 priority=0, action=drop
@@ -9510,7 +9815,7 @@ table=5 reg2=0 priority=1000 action=drop
dnl Commit new non-related IP connections.
table=5 priority=10 reg2=1 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG3[[0..31]]->NXM_NX_CT_MARK[[0..31]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
dnl Commit new related connections in either direction, which inherit the mark
-dnl (the direction of the original direction master tuple) from the master
+dnl (the direction of the original direction parent tuple) from the parent
dnl connection.
table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
dnl Forward everything else, including stateless accepts.
@@ -10540,6 +10845,62 @@ udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif - conntrack - match masked ct fields])
+OVS_VSWITCHD_START
+
+add_of_ports br0 1 2
+
+AT_CHECK([ovs-appctl vlog/set dpif_netdev:dbg vconn:info ofproto_dpif:info])
+
+dnl Allow new connections on p1->p2. Allow only established connections p2->p1
+AT_DATA([flows.txt], [dnl
+table=0,arp,action=normal
+table=0,ip,in_port=1,udp,nw_src=10.1.2.1/24,action=ct(commit)
+table=0,ip,in_port=1,udp6,ipv6_dst=2001:db8::1/64,action=ct(commit)
+table=0,ip,in_port=1,udp,tp_src=3/0x1,action=ct(commit)
+table=0,ip,in_port=2,actions=ct(table=1)
+table=0,ip6,in_port=2,actions=ct(table=1)
+table=1,priority=10,udp,ct_state=+trk+rpl,ct_nw_src=10.1.2.1/24,actions=controller
+table=1,priority=10,udp6,ct_state=+trk+rpl,ct_ipv6_dst=2001:db8::1/64,actions=controller
+table=1,priority=10,udp,ct_state=+trk+rpl,ct_tp_src=3/0x1,actions=controller
+table=1,priority=1,action=drop
+])
+
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CAPTURE_FILE([ofctl_monitor.log])
+AT_CHECK([ovs-ofctl monitor br0 65534 invalid_ttl -P nxt_packet_in --detach --no-chdir --pidfile 2> ofctl_monitor.log])
+
+dnl Match ct_nw_src=10.1.2.1/24
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.2.100,dst=10.1.2.200,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.2.200,dst=10.1.2.100,proto=17,tos=0,ttl=64,frag=no),udp(src=6,dst=6)'])
+
+dnl Match ct_ipv6_dst=2001:db8::1/64
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x86dd),ipv6(src=2001:db8::1,dst=2001:db8::2,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=1,dst=2)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x86dd),ipv6(src=2001:db8::2,dst=2001:db8::1,label=0,proto=17,tclass=0x70,hlimit=128,frag=no),udp(src=2,dst=1)'])
+
+dnl Match ct_tp_src=3/0x1
+AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.1.1.1,dst=10.1.1.2,proto=17,tos=0,ttl=64,frag=no),udp(src=1,dst=2)'])
+AT_CHECK([ovs-appctl netdev-dummy/receive p2 'in_port(2),eth(src=50:54:00:00:00:0a,dst=50:54:00:00:00:09),eth_type(0x0800),ipv4(src=10.1.1.2,dst=10.1.1.1,proto=17,tos=0,ttl=64,frag=no),udp(src=2,dst=1)'])
+
+OVS_WAIT_UNTIL([test `wc -l < ofctl_monitor.log` -ge 6])
+OVS_WAIT_UNTIL([ovs-appctl -t ovs-ofctl exit])
+
+dnl Check this output.
+AT_CHECK([cat ofctl_monitor.log], [0], [dnl
+NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.2.100,ct_nw_dst=10.1.2.200,ct_nw_proto=17,ct_tp_src=6,ct_tp_dst=6,ip,in_port=2 (via action) data_len=106 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.2.200,nw_dst=10.1.2.100,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=6,tp_dst=6 udp_csum:221
+dnl
+NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=126 ct_state=est|rpl|trk,ct_ipv6_src=2001:db8::1,ct_ipv6_dst=2001:db8::2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ipv6,in_port=2 (via action) data_len=126 (unbuffered)
+udp6,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,ipv6_src=2001:db8::2,ipv6_dst=2001:db8::1,ipv6_label=0x00000,nw_tos=112,nw_ecn=0,nw_ttl=128,tp_src=2,tp_dst=1 udp_csum:bfe2
+dnl
+NXT_PACKET_IN (xid=0x0): table_id=1 cookie=0x0 total_len=106 ct_state=est|rpl|trk,ct_nw_src=10.1.1.1,ct_nw_dst=10.1.1.2,ct_nw_proto=17,ct_tp_src=1,ct_tp_dst=2,ip,in_port=2 (via action) data_len=106 (unbuffered)
+udp,vlan_tci=0x0000,dl_src=50:54:00:00:00:0a,dl_dst=50:54:00:00:00:09,nw_src=10.1.1.2,nw_dst=10.1.1.1,nw_tos=0,nw_ecn=0,nw_ttl=64,tp_src=2,tp_dst=1 udp_csum:553
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto-dpif - conntrack - ofproto/trace])
OVS_VSWITCHD_START
@@ -10598,6 +10959,42 @@ AT_CHECK([tail -1 stdout], [0],
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([ofproto-dpif - nat - ofproto/trace])
+OVS_VSWITCHD_START
+
+add_of_ports br0 1 2 3
+
+flow="in_port=1,udp,nw_src=1.1.1.1,nw_dst=1.1.1.2,udp_src=100,udp_dst=200"
+AT_DATA([flows.txt], [dnl
+table=0,priority=100,ip,nw_src=1.1.1.1,ct_state=-trk,action=ct(commit,nat(src=10.0.0.1-10.0.0.42:1000-1042),table=0)
+table=0,priority=100,udp,ct_state=+trk,nw_src=10.0.0.1,nw_dst=1.1.1.2,tp_src=1000,tp_dst=200,action=ct(commit,nat(dst=20.0.0.1-20.0.0.42:2000-2042),table=0)
+table=0,priority=100,udp,ct_state=+trk,nw_src=10.0.0.1,nw_dst=20.0.0.1,tp_src=1000,tp_dst=2000,action=3
+table=0,priority=90,ip,ct_state=+trk,action=2
+])
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace br0 "$flow"], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: 3
+])
+
+flow="in_port=1,udp6,ipv6_src=1::1,ipv6_dst=1::2,udp_src=100,udp_dst=200"
+AT_DATA([flows.txt], [dnl
+table=0,priority=100,ip6,ipv6_src=1::1,ct_state=-trk,action=ct(commit,nat(src=[[10::1]]-[[10::42]]:1000-1042),table=0)
+table=0,priority=100,udp6,ct_state=+trk,ipv6_src=10::1,ipv6_dst=1::2,tp_src=1000,tp_dst=200,action=ct(commit,nat(dst=[[20::1]]-[[20::42]]:2000-2042),table=0)
+table=0,priority=100,udp6,ct_state=+trk,ipv6_src=10::1,ipv6_dst=20::1,tp_src=1000,tp_dst=2000,action=3
+table=0,priority=90,ip6,ct_state=+trk,action=2
+])
+AT_CHECK([ovs-ofctl del-flows br0])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+AT_CHECK([ovs-appctl ofproto/trace br0 "$flow"], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: 3
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([ofproto - set mtu])
OVS_VSWITCHD_START
@@ -10630,7 +11027,7 @@ AT_CHECK([ovs-vsctl add-port br0 p2 -- set int p2 type=dummy mtu_request=1600])
AT_CHECK([ovs-vsctl wait-until Interface p2 mtu=1600])
AT_CHECK([ovs-vsctl wait-until Interface br0 mtu=1600])
-# Explicitly set mtu_request on the internal interface. This should prevent
+# Explicitly set mtu_request on the internal member. This should prevent
# the MTU from being overriden.
AT_CHECK([ovs-vsctl set int br0 mtu_request=1700])
AT_CHECK([ovs-vsctl wait-until Interface br0 mtu=1700])
diff --git a/tests/ofproto-macros.at b/tests/ofproto-macros.at
index b2b17eed335ab745aaacc967e1d918ab111dad35..736d9809cb32d0463dbee0644f9159be101e502c 100644
--- a/tests/ofproto-macros.at
+++ b/tests/ofproto-macros.at
@@ -131,7 +131,8 @@ strip_duration () {
# Strips 'ufid:...' from output, to make it easier to compare.
# (ufids are random.)
strip_ufid () {
- sed 's/ufid:[[-0-9a-f]]* //'
+ sed 's/mega_ufid:[[-0-9a-f]]* //
+ s/ufid:[[-0-9a-f]]* //'
}
m4_divert_pop([PREPARE_TESTS])
@@ -303,11 +304,11 @@ add_pmd_of_ports () {
m4_divert_pop([PREPARE_TESTS])
-# OVS_VSWITCHD_STOP([WHITELIST])
+# OVS_VSWITCHD_STOP([ALLOWLIST])
#
# Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files
# for messages with severity WARN or higher and signaling an error if any
-# is present. The optional WHITELIST may contain shell-quoted "sed"
+# is present. The optional ALLOWLIST may contain shell-quoted "sed"
# commands to delete any warnings that are actually expected, e.g.:
#
# OVS_VSWITCHD_STOP(["/expected error/d"])
diff --git a/tests/ofproto.at b/tests/ofproto.at
index 23a5e150510a533b35e012eb3fab353588fa77df..08c0a20b607a17304e26d64ae8c8fa6995e67d29 100644
--- a/tests/ofproto.at
+++ b/tests/ofproto.at
@@ -2352,7 +2352,7 @@ head_table () {
actions: output group set_field strip_vlan push_vlan mod_nw_ttl dec_ttl set_mpls_ttl dec_mpls_ttl push_mpls pop_mpls set_queue
supported on Set-Field: tun_{id,src,dst,ipv6_{src,dst},flags,gbp_{id,flags},erspan_{idx,ver,dir,hwid},metadata0...metadata63} metadata in_{port,port_oxm} pkt_mark ct_{mark,label} reg0...reg15 xreg0...xreg7 xxreg0...xxreg3 eth_{src,dst} vlan_{tci,vid,pcp} mpls_{label,tc,ttl} ip_{src,dst} ipv6_{src,dst,label} nw_tos ip_dscp nw_{ecn,ttl} arp_{op,spa,tpa,sha,tha} tcp_{src,dst} udp_{src,dst} sctp_{src,dst} icmp_{type,code} icmpv6_{type,code} nd_{target,sll,tll,reserved,options_type} nsh_{flags,spi,si,c1...c4,ttl}
matching:
- arbitrary mask: dp_hash tun_{id,src,dst,ipv6_{src,dst},flags,gbp_{id,flags},erspan_{idx,ver,dir,hwid},metadata0...metadata63} metadata pkt_mark ct_{state,mark,label,nw_{src,dst},ipv6_{src,dst},tp_{src,dst}} reg0...reg15 xreg0...xreg7 xxreg0...xxreg3 eth_{src,dst} vlan_{tci,vid} ip_{src,dst} ipv6_{src,dst,label} ip_frag arp_{spa,tpa,sha,tha} tcp_{src,dst,flags} udp_{src,dst} sctp_{src,dst} nd_{target,sll,tll} nsh_{flags,c1...c4}
+ arbitrary mask: dp_hash tun_{id,src,dst,ipv6_{src,dst},flags,gbp_{id,flags},erspan_{idx,ver,dir,hwid},gtpu_{flags,msgtype},metadata0...metadata63} metadata pkt_mark ct_{state,mark,label,nw_{src,dst},ipv6_{src,dst},tp_{src,dst}} reg0...reg15 xreg0...xreg7 xxreg0...xxreg3 eth_{src,dst} vlan_{tci,vid} ip_{src,dst} ipv6_{src,dst,label} ip_frag arp_{spa,tpa,sha,tha} tcp_{src,dst,flags} udp_{src,dst} sctp_{src,dst} nd_{target,sll,tll} nsh_{flags,c1...c4}
exact match or wildcard: recirc_id packet_type conj_id in_{port,port_oxm} actset_output ct_{zone,nw_proto} eth_type vlan_pcp mpls_{label,tc,bos,ttl} nw_{proto,tos} ip_dscp nw_{ecn,ttl} arp_op icmp_{type,code} icmpv6_{type,code} nd_{reserved,options_type} nsh_{mdtype,np,spi,si,ttl}
' "$1"
@@ -3157,7 +3157,7 @@ check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE
ovs-appctl -t ovs-ofctl ofctl/send 0109000c0123456700040080
check_async 3 OFPR_ACTION OFPR_INVALID_TTL OFPPR_ADD OFPPR_DELETE OFPRR_DELETE
-# Become slave, which should disable everything except port status.
+# Become secondary, which should disable everything except port status.
ovs-appctl -t ovs-ofctl ofctl/send 0104001400000002000023200000000a00000002
check_async 4 OFPPR_ADD OFPPR_DELETE
@@ -3172,7 +3172,7 @@ check_async 6 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE
# Restore controller ID 0.
ovs-appctl -t ovs-ofctl ofctl/send 010400180000000300002320000000140000000000000000
-# Become master.
+# Become primary.
ovs-appctl -t ovs-ofctl ofctl/send 0104001400000002000023200000000a00000001
check_async 7 OFPR_ACTION OFPPR_ADD
@@ -3264,7 +3264,7 @@ check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE
ovs-appctl -t ovs-ofctl ofctl/send 0309000c0123456700040080
check_async 3 OFPR_ACTION OFPR_INVALID_TTL OFPPR_ADD OFPPR_DELETE OFPRR_DELETE
-# Become slave (OF 1.2), which should disable everything except port status.
+# Become secondary (OF 1.2), which should disable everything except port status.
ovs-appctl -t ovs-ofctl ofctl/send 031800180000000200000003000000000000000000000001
check_async 4 OFPPR_ADD OFPPR_DELETE
@@ -3279,7 +3279,7 @@ check_async 6 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE
# Restore controller ID 0.
ovs-appctl -t ovs-ofctl ofctl/send 030400180000000300002320000000140000000000000000
-# Become master (OF 1.2).
+# Become primary (OF 1.2).
ovs-appctl -t ovs-ofctl ofctl/send 031800180000000400000002000000000000000000000002
check_async 7 OFPR_ACTION OFPPR_ADD
@@ -3383,7 +3383,7 @@ check_async 1
ovs-appctl -t ovs-ofctl ofctl/send 0409000c0123456700000080
check_async 2 OFPR_ACTION OFPPR_ADD OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE
-# Become slave (OF 1.3), which should disable everything except port status.
+# Become secondary (OF 1.3), which should disable everything except port status.
ovs-appctl -t ovs-ofctl ofctl/send 041800180000000200000003000000000000000000000001
check_async 3 OFPPR_ADD OFPPR_DELETE
@@ -3398,7 +3398,7 @@ check_async 5 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE
# Restore controller ID 0.
ovs-appctl -t ovs-ofctl ofctl/send 040400180000000300002320000000140000000000000000
-# Become master (OF 1.3).
+# Become primary (OF 1.3).
ovs-appctl -t ovs-ofctl ofctl/send 041800180000000400000002000000000000000000000002
check_async 6 OFPR_ACTION OFPPR_ADD
@@ -3615,7 +3615,7 @@ check_async 1
ovs_appctl -t ovs-ofctl ofctl/send 0509000c0123456700000080
check_async 2 OFPR_PACKET_OUT OFPR_ACTION_SET OFPPR_ADD OFPPR_MODIFY OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE
-# Become slave (OF 1.4), which should disable everything except port status.
+# Become secondary (OF 1.4), which should disable everything except port status.
ovs_appctl -t ovs-ofctl ofctl/send 051800180000000200000003000000000000000000000001
check_async 3 OFPPR_ADD OFPPR_MODIFY OFPPR_DELETE
@@ -3630,7 +3630,7 @@ check_async 5 OFPR_NO_MATCH OFPPR_DELETE OFPRR_DELETE OFPRR_GROUP_DELETE
# Restore controller ID 0.
ovs_appctl -t ovs-ofctl ofctl/send 050400180000000300002320000000140000000000000000
-# Become master (OF 1.4).
+# Become primary (OF 1.4).
ovs_appctl -t ovs-ofctl ofctl/send 051800180000000400000002000000000000000000000002
check_async 6 OFPR_PACKET_OUT OFPPR_ADD OFPPR_MODIFY OFPRR_DELETE
@@ -3740,27 +3740,27 @@ for i in 1 2; do
echo >>expout$i "OFPT_ROLE_REPLY (OF1.2): role=equal"
done
-# controller 1: Become slave (generation_id is initially undefined, so
+# controller 1: Become secondary (generation_id is initially undefined, so
# 2^63+2 should not be stale)
ovs-appctl -t `pwd`/c1 ofctl/send 031800180000000300000003000000008000000000000002
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=slave generation_id=9223372036854775810"
-echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=slave generation_id=9223372036854775810"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=secondary generation_id=9223372036854775810"
+echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=secondary generation_id=9223372036854775810"
-# controller 2: Become master.
+# controller 2: Become primary.
ovs-appctl -t `pwd`/c2 ofctl/send 031800180000000300000002000000008000000000000003
-echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=9223372036854775811"
-echo >>expout2 "OFPT_ROLE_REPLY (OF1.2): role=master generation_id=9223372036854775811"
+echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=9223372036854775811"
+echo >>expout2 "OFPT_ROLE_REPLY (OF1.2): role=primary generation_id=9223372036854775811"
-# controller 1: Try to become the master using a stale generation ID
+# controller 1: Try to become the primary using a stale generation ID
ovs-appctl -t `pwd`/c1 ofctl/send 031800180000000400000002000000000000000000000003
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=3"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=3"
echo >>expout1 "OFPT_ERROR (OF1.2): OFPRRFC_STALE"
-echo >>expout1 "OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=3"
+echo >>expout1 "OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=3"
-# controller 1: Become master using a valid generation ID
+# controller 1: Become primary using a valid generation ID
ovs-appctl -t `pwd`/c1 ofctl/send 031800180000000500000002000000000000000000000001
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=master generation_id=1"
-echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=master generation_id=1"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.2): role=primary generation_id=1"
+echo >>expout1 "OFPT_ROLE_REPLY (OF1.2): role=primary generation_id=1"
for i in 1 2; do
ovs-appctl -t `pwd`/c$i ofctl/barrier
@@ -3779,8 +3779,8 @@ AT_CLEANUP
dnl This test checks that the role request/response messaging works,
dnl that generation_id is handled properly, and that role status update
-dnl messages are sent when a controller's role gets changed from master
-dnl to slave.
+dnl messages are sent when a controller's role gets changed from primary
+dnl to secondary.
AT_SETUP([ofproto - controller role (OpenFlow 1.4)])
OVS_VSWITCHD_START
on_exit 'kill `cat c1.pid c2.pid`'
@@ -3805,28 +3805,28 @@ for i in 1 2; do
echo >>expout$i "OFPT_ROLE_REPLY (OF1.4): role=equal"
done
-# controller 1: Become slave (generation_id is initially undefined, so
+# controller 1: Become secondary (generation_id is initially undefined, so
# 2^63+2 should not be stale)
ovs-appctl -t `pwd`/c1 ofctl/send 051800180000000300000003000000008000000000000002
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=slave generation_id=9223372036854775810"
-echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=slave generation_id=9223372036854775810"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=secondary generation_id=9223372036854775810"
+echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=secondary generation_id=9223372036854775810"
-# controller 2: Become master.
+# controller 2: Become primary.
ovs-appctl -t `pwd`/c2 ofctl/send 051800180000000300000002000000008000000000000003
-echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=9223372036854775811"
-echo >>expout2 "OFPT_ROLE_REPLY (OF1.4): role=master generation_id=9223372036854775811"
+echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=9223372036854775811"
+echo >>expout2 "OFPT_ROLE_REPLY (OF1.4): role=primary generation_id=9223372036854775811"
-# controller 1: Try to become the master using a stale generation ID
+# controller 1: Try to become the primary using a stale generation ID
ovs-appctl -t `pwd`/c1 ofctl/send 051800180000000400000002000000000000000000000003
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=3"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=3"
echo >>expout1 "OFPT_ERROR (OF1.4): OFPRRFC_STALE"
-echo >>expout1 "OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=3"
+echo >>expout1 "OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=3"
-# controller 1: Become master using a valid generation ID
+# controller 1: Become primary using a valid generation ID
ovs-appctl -t `pwd`/c1 ofctl/send 051800180000000500000002000000000000000000000001
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=master generation_id=1"
-echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=master generation_id=1"
-echo >>expout2 "OFPT_ROLE_STATUS (OF1.4): role=slave generation_id=1 reason=master_request"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.4): role=primary generation_id=1"
+echo >>expout1 "OFPT_ROLE_REPLY (OF1.4): role=primary generation_id=1"
+echo >>expout2 "OFPT_ROLE_STATUS (OF1.4): role=secondary generation_id=1 reason=primary_request"
for i in 1 2; do
ovs-appctl -t `pwd`/c$i ofctl/barrier
@@ -3845,8 +3845,8 @@ AT_CLEANUP
dnl This test checks that the role request/response messaging works,
dnl that generation_id is handled properly, and that role status update
-dnl messages are sent when a controller's role gets changed from master
-dnl to slave.
+dnl messages are sent when a controller's role gets changed from primary
+dnl to secondary.
AT_SETUP([ofproto - controller role (OpenFlow 1.3)])
OVS_VSWITCHD_START
on_exit 'kill `cat c1.pid c2.pid`'
@@ -3871,28 +3871,28 @@ for i in 1 2; do
echo >>expout$i "OFPT_ROLE_REPLY (OF1.3): role=equal"
done
-# controller 1: Become slave (generation_id is initially undefined, so
+# controller 1: Become secondary (generation_id is initially undefined, so
# 2^63+2 should not be stale)
ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000300000003000000008000000000000002
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=slave generation_id=9223372036854775810"
-echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=slave generation_id=9223372036854775810"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=secondary generation_id=9223372036854775810"
+echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=secondary generation_id=9223372036854775810"
-# controller 2: Become master.
+# controller 2: Become primary.
ovs-appctl -t `pwd`/c2 ofctl/send 041800180000000300000002000000008000000000000003
-echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=9223372036854775811"
-echo >>expout2 "OFPT_ROLE_REPLY (OF1.3): role=master generation_id=9223372036854775811"
+echo >>experr2 "send: OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=9223372036854775811"
+echo >>expout2 "OFPT_ROLE_REPLY (OF1.3): role=primary generation_id=9223372036854775811"
-# controller 1: Try to become the master using a stale generation ID
+# controller 1: Try to become the primary using a stale generation ID
ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000400000002000000000000000000000003
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=3"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=3"
echo >>expout1 "OFPT_ERROR (OF1.3): OFPRRFC_STALE"
-echo >>expout1 "OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=3"
+echo >>expout1 "OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=3"
-# controller 1: Become master using a valid generation ID
+# controller 1: Become primary using a valid generation ID
ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000500000002000000000000000000000001
-echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=master generation_id=1"
-echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=master generation_id=1"
-echo >>expout2 "ONFT_ROLE_STATUS (OF1.3): role=slave generation_id=1 reason=master_request"
+echo >>experr1 "send: OFPT_ROLE_REQUEST (OF1.3): role=primary generation_id=1"
+echo >>expout1 "OFPT_ROLE_REPLY (OF1.3): role=primary generation_id=1"
+echo >>expout2 "ONFT_ROLE_STATUS (OF1.3): role=secondary generation_id=1 reason=primary_request"
for i in 1 2; do
ovs-appctl -t `pwd`/c$i ofctl/barrier
@@ -3978,13 +3978,13 @@ s/ (xid=0x[0-9a-fA-F]*)//'< monitor$i.log]],
done
}
-# controller 1: Become slave
+# controller 1: Become secondary
ovs-appctl -t `pwd`/c1 ofctl/send 061800180000000300000003000000008000000000000002
-# controller 2: Become master
+# controller 2: Become primary
ovs-appctl -t `pwd`/c2 ofctl/send 051800180000000300000002000000008000000000000003
-# controller 1: Become slave
+# controller 1: Become secondary
ovs-appctl -t `pwd`/c3 ofctl/send 051800180000000300000003000000008000000000000004
# controller 1: Enabled requestforward using set Asynchronous message
@@ -4074,16 +4074,16 @@ s/ (xid=0x[0-9a-fA-F]*)//'< monitor$i.log]],
done
}
-# controller 1: Become slave
-# NXT_ROLE_REQUEST (xid=0x3): role=slave
+# controller 1: Become secondary
+# NXT_ROLE_REQUEST (xid=0x3): role=secondary
ovs-appctl -t `pwd`/c1 ofctl/send 0104001400000003000023200000000a00000002
-# controller 2: Become master
-# NXT_ROLE_REQUEST (xid=0x3): role=master
+# controller 2: Become primary
+# NXT_ROLE_REQUEST (xid=0x3): role=primary
ovs-appctl -t `pwd`/c2 ofctl/send 0104001400000003000023200000000a00000001
-# controller 1: Become slave
-# NXT_ROLE_REQUEST (xid=0x3): role=slave
+# controller 1: Become secondary
+# NXT_ROLE_REQUEST (xid=0x3): role=secondary
ovs-appctl -t `pwd`/c3 ofctl/send 0104001400000003000023200000000a00000002
# controller 1: Enabled requestforward using OFPRAW_NXT_SET_ASYNC_CONFIG2
@@ -4172,16 +4172,16 @@ s/ (xid=0x[0-9a-fA-F]*)//'< monitor$i.log]],
done
}
-# controller 1: Become slave
-# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=slave
+# controller 1: Become secondary
+# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=secondary
ovs-appctl -t `pwd`/c1 ofctl/send 041800180000000300000003000000008000000000000002
-# controller 2: Become master
-# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=master
+# controller 2: Become primary
+# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=primary
ovs-appctl -t `pwd`/c2 ofctl/send 041800180000000300000002000000008000000000000003
-# controller 1: Become slave
-# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=slave
+# controller 1: Become secondary
+# OFPT_ROLE_REQUEST (OF1.3) (xid=0x3): role=secondary
ovs-appctl -t `pwd`/c3 ofctl/send 041800180000000300000003000000008000000000000004
# controller 1: Enabled requestforward using OFPRAW_NXT_SET_ASYNC_CONFIG2 (necessary for OF1.3)
@@ -4951,7 +4951,7 @@ EOF
AT_CHECK([ofctl_strip < monitor.log], [], [dnl
send: OFPT_GET_ASYNC_REQUEST (OF1.3):
OFPT_GET_ASYNC_REPLY (OF1.3):
- master:
+ primary:
PACKET_IN: no_match action
PORT_STATUS: add delete modify
FLOW_REMOVED: idle hard delete group_delete
@@ -4959,7 +4959,7 @@ OFPT_GET_ASYNC_REPLY (OF1.3):
TABLE_STATUS: (off)
REQUESTFORWARD: (off)
- slave:
+ secondary:
PACKET_IN: (off)
PORT_STATUS: add delete modify
FLOW_REMOVED: (off)
@@ -6322,12 +6322,12 @@ AT_CHECK([strip_xids < stderr | sed '/FLOW_MOD/,$d'], [0], [dnl
OFPT_ERROR: OFPBAC_BAD_SET_LEN
])
-AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata1[[0..31]], slaves:4,8)"], [1], [], [stderr])
+AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata1[[0..31]], members:4,8)"], [1], [], [stderr])
AT_CHECK([strip_xids < stderr | sed '/FLOW_MOD/,$d'], [0], [dnl
OFPT_ERROR: NXFMFC_INVALID_TLV_FIELD
])
-AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata0[[32..63]], slaves:4,8)"], [1], [], [stderr])
+AT_CHECK([ovs-ofctl add-flow br0 "in_port=2 actions=bundle_load(eth_src,50,hrw,ofport,tun_metadata0[[32..63]], members:4,8)"], [1], [], [stderr])
AT_CHECK([strip_xids < stderr | sed '/FLOW_MOD/,$d'], [0], [dnl
OFPT_ERROR: OFPBAC_BAD_SET_LEN
])
diff --git a/tests/oss-fuzz/miniflow_target.c b/tests/oss-fuzz/miniflow_target.c
index 393443061e85a603bf2e3d593a2d32290fd457b3..50b8b0e6423799cafd546233101bc1e1bb840dff 100644
--- a/tests/oss-fuzz/miniflow_target.c
+++ b/tests/oss-fuzz/miniflow_target.c
@@ -206,8 +206,9 @@ test_minimask_combine(struct flow *flow)
struct minimask minicombined;
uint64_t storage[FLOW_U64S];
} m;
- struct flow flow2 = {0};
+ struct flow flow2;
+ memset(&flow2, 0, sizeof flow2);
mask.masks = *flow;
minimask = minimask_create(&mask);
diff --git a/tests/ovs-macros.at b/tests/ovs-macros.at
index fee50901543e4dcd6b836112fb526943556f460a..66545da5728fd4646a5b6b43de4782f8978847f1 100644
--- a/tests/ovs-macros.at
+++ b/tests/ovs-macros.at
@@ -106,6 +106,7 @@ if test "$IS_WIN32" = "yes"; then
signal=
retval=0
for arg; do
+ arg=$(echo $arg | tr -d '\n\r')
case $arg in
-*) signal=$arg ;;
[1-9][0-9]*)
@@ -333,3 +334,10 @@ m4_ifndef([AT_FAIL_IF],
[m4_define([AT_FAIL_IF],
[AT_CHECK([($1) \
&& exit 99 || exit 0], [0], [ignore], [ignore])])])
+
+dnl Certain Linux distributions, like CentOS, have default iptable rules
+dnl to reject input traffic from bridges such as br-underlay.
+dnl Add a rule to always accept the traffic.
+m4_define([IPTABLES_ACCEPT],
+ [AT_CHECK([iptables -I INPUT 1 -i $1 -j ACCEPT])
+ on_exit 'iptables -D INPUT 1 -i $1'])
diff --git a/tests/ovs-ofctl.at b/tests/ovs-ofctl.at
index c8062c8acc34b070ab0893eb3e4f62464ad53d19..5ddca67e71ed3593f394191a4cb87612c25877ae 100644
--- a/tests/ovs-ofctl.at
+++ b/tests/ovs-ofctl.at
@@ -101,6 +101,7 @@ for test_case in \
'mpls,mpls_label=5 NXM,OXM,OpenFlow11' \
'mpls,mpls_tc=1 NXM,OXM,OpenFlow11' \
'mpls,mpls_bos=0 NXM,OXM' \
+ 'mpls,mpls_ttl=5 NXM,OXM' \
'ip,ip_src=1.2.3.4 any' \
'ip,ip_src=192.168.0.0/24 any' \
'ip,ip_src=192.0.168.0/255.0.255.0 NXM,OXM,OpenFlow11' \
@@ -413,20 +414,20 @@ actions=multipath(eth_src, 50, hrw, 12, 0, NXM_NX_REG0[0..3]),multipath(symmetri
table=1,actions=drop
tun_id=0x1234000056780000/0xffff0000ffff0000,actions=drop
metadata=0x1234ffff5678ffff/0xffff0000ffff0000,actions=drop
-actions=bundle(eth_src,50,active_backup,ofport,slaves:1)
-actions=bundle(symmetric_l4,60,hrw,ofport,slaves:2,3)
-actions=bundle(symmetric_l4,60,hrw,ofport,slaves:)
-actions=bundle(symmetric_l3,60,hrw,ofport,slaves:2,3)
-actions=bundle(symmetric_l3,60,hrw,ofport,slaves:)
-actions=output:1,bundle(eth_src,0,hrw,ofport,slaves:1),output:2
-actions=bundle_load(eth_src,50,active_backup,ofport,reg0,slaves:1)
-actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3)
-actions=bundle_load(symmetric_l4,60,hrw,ofport,reg0[0..15],slaves:[2,3])
-actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:)
-actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3)
-actions=bundle_load(symmetric_l3,60,hrw,ofport,reg0[0..15],slaves:[2,3])
-actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:)
-actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],slaves:1),output:2
+actions=bundle(eth_src,50,active_backup,ofport,members:1)
+actions=bundle(symmetric_l4,60,hrw,ofport,members:2,3)
+actions=bundle(symmetric_l4,60,hrw,ofport,members:)
+actions=bundle(symmetric_l3,60,hrw,ofport,members:2,3)
+actions=bundle(symmetric_l3,60,hrw,ofport,members:)
+actions=output:1,bundle(eth_src,0,hrw,ofport,members:1),output:2
+actions=bundle_load(eth_src,50,active_backup,ofport,reg0,members:1)
+actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3)
+actions=bundle_load(symmetric_l4,60,hrw,ofport,reg0[0..15],members:[2,3])
+actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],members:)
+actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3)
+actions=bundle_load(symmetric_l3,60,hrw,ofport,reg0[0..15],members:[2,3])
+actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],members:)
+actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],members:1),output:2
actions=resubmit:1,resubmit(2),resubmit(,3),resubmit(2,3)
send_flow_rem,actions=output:1,output:NXM_NX_REG0,output:2,output:reg1[16..31],output:3
check_overlap,actions=output:1,exit,output:2
@@ -434,6 +435,7 @@ tcp,actions=fin_timeout(idle_timeout=5,hard_timeout=15)
actions=controller(max_len=123,reason=invalid_ttl,id=555)
actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678)
actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678,sampling_port=56789)
+mpls,mpls_label=5,mpls_tc=1,mpls_ttl=1,mpls_bos=0,actions=drop
ip,actions=ct(commit,zone=5)
ip,actions=ct(commit,exec(load(1->NXM_NX_CT_MARK[])))
ip,actions=ct(commit,exec(load(0x1->NXM_NX_CT_LABEL[])))
@@ -469,20 +471,20 @@ NXT_FLOW_MOD: ADD table:255 actions=multipath(eth_src,50,hrw,12,0,NXM_NX_REG0[0.
NXT_FLOW_MOD: ADD table:1 actions=drop
NXT_FLOW_MOD: ADD table:255 tun_id=0x1234000056780000/0xffff0000ffff0000 actions=drop
NXT_FLOW_MOD: ADD table:255 metadata=0x1234000056780000/0xffff0000ffff0000 actions=drop
-NXT_FLOW_MOD: ADD table:255 actions=bundle(eth_src,50,active_backup,ofport,slaves:1)
-NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,slaves:2,3)
-NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,slaves:)
-NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,slaves:2,3)
-NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,slaves:)
-NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle(eth_src,0,hrw,ofport,slaves:1),output:2
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(eth_src,50,active_backup,ofport,NXM_NX_REG0[],slaves:1)
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3)
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3)
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:)
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3)
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],slaves:2,3)
-NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],slaves:)
-NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],slaves:1),output:2
+NXT_FLOW_MOD: ADD table:255 actions=bundle(eth_src,50,active_backup,ofport,members:1)
+NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,members:2,3)
+NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l4,60,hrw,ofport,members:)
+NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,members:2,3)
+NXT_FLOW_MOD: ADD table:255 actions=bundle(symmetric_l3,60,hrw,ofport,members:)
+NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle(eth_src,0,hrw,ofport,members:1),output:2
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(eth_src,50,active_backup,ofport,NXM_NX_REG0[],members:1)
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3)
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3)
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l4,60,hrw,ofport,NXM_NX_REG0[0..30],members:)
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3)
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..15],members:2,3)
+NXT_FLOW_MOD: ADD table:255 actions=bundle_load(symmetric_l3,60,hrw,ofport,NXM_NX_REG0[0..30],members:)
+NXT_FLOW_MOD: ADD table:255 actions=output:1,bundle_load(eth_src,0,hrw,ofport,NXM_NX_REG0[16..31],members:1),output:2
NXT_FLOW_MOD: ADD table:255 actions=resubmit:1,resubmit:2,resubmit(,3),resubmit(2,3)
NXT_FLOW_MOD: ADD table:255 send_flow_rem actions=output:1,output:NXM_NX_REG0[],output:2,output:NXM_NX_REG1[16..31],output:3
NXT_FLOW_MOD: ADD table:255 check_overlap actions=output:1,exit,output:2
@@ -490,6 +492,7 @@ NXT_FLOW_MOD: ADD table:255 tcp actions=fin_timeout(idle_timeout=5,hard_timeout=
NXT_FLOW_MOD: ADD table:255 actions=controller(reason=invalid_ttl,max_len=123,id=555)
NXT_FLOW_MOD: ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678)
NXT_FLOW_MOD: ADD table:255 actions=sample(probability=12345,collector_set_id=23456,obs_domain_id=34567,obs_point_id=45678,sampling_port=56789)
+NXT_FLOW_MOD: ADD table:255 mpls,mpls_label=5,mpls_tc=1,mpls_ttl=1,mpls_bos=0 actions=drop
NXT_FLOW_MOD: ADD table:255 ip actions=ct(commit,zone=5)
NXT_FLOW_MOD: ADD table:255 ip actions=ct(commit,exec(load:0x1->NXM_NX_CT_MARK[]))
NXT_FLOW_MOD: ADD table:255 ip actions=ct(commit,exec(load:0x1->NXM_NX_CT_LABEL[0..63],load:0->NXM_NX_CT_LABEL[64..127]))
diff --git a/tests/ovs-vsctl.at b/tests/ovs-vsctl.at
index 55c7a6e179cdca4637f51fdd883dd5f12db12901..c8babe36120afa20864ab9c5866e1a74bf5c453d 100644
--- a/tests/ovs-vsctl.at
+++ b/tests/ovs-vsctl.at
@@ -966,6 +966,14 @@ AT_CHECK([RUN_OVS_VSCTL([--if-exists del-zone-tp netdev zone=1])])
AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:2, Timeout Policies: icmp_first=2 icmp_reply=3
])
+AT_CHECK(
+ [RUN_OVS_VSCTL_TOGETHER([--id=@n create CT_Zone external_ids:"test"="123"],
+ [--id=@m create Datapath datapath_version=0 ct_zones:"10"=@n],
+ [set Open_vSwitch . datapaths:"netdev"=@m])],
+ [0], [stdout])
+AT_CHECK([RUN_OVS_VSCTL([list-zone-tp netdev])], [0], [Zone:10, Timeout Policies: system default
+])
+
AT_CHECK([RUN_OVS_VSCTL([-- --id=@m create Datapath datapath_version=0 'capabilities={recirc=true}' -- set Open_vSwitch . datapaths:"system"=@m])], [0], [stdout])
AT_CHECK([RUN_OVS_VSCTL([list-dp-cap system])], [0], [recirc=true
])
diff --git a/tests/ovsdb-cluster.at b/tests/ovsdb-cluster.at
index 3a0bd4579e4161825b803afe90814f2fd85d5472..92aa4270930e176627f515a5951787e7d1b6dd45 100644
--- a/tests/ovsdb-cluster.at
+++ b/tests/ovsdb-cluster.at
@@ -179,6 +179,41 @@ AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
ovsdb_test_cluster_disconnect 5 leader yes
AT_CLEANUP
+AT_SETUP([OVSDB cluster - initial status should be disconnected])
+AT_KEYWORDS([ovsdb server negative unix cluster disconnect])
+
+n=3
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 $n`; do
+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq $n`; do
+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq $n`; do
+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+# Stop all servers, and start the s1 only, to test initial connection status
+# when there is no leader yet.
+for i in `seq 1 $n`; do
+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+i=1
+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+
+# The initial status should be disconnected. So wait should fail.
+AT_CHECK([ovsdb_client_wait --timeout=1 unix:s$i.ovsdb $schema_name connected], [142], [ignore], [ignore])
+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+
+AT_CLEANUP
+
AT_BANNER([OVSDB cluster election timer change])
@@ -273,6 +308,88 @@ OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s4 cluster/status $schema_name | grep "Ele
AT_CLEANUP
+
+AT_BANNER([OVSDB cluster install snapshot RPC])
+
+AT_SETUP([OVSDB cluster - install snapshot RPC])
+AT_KEYWORDS([ovsdb server positive unix cluster snapshot])
+
+n=3
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 $n`; do
+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq $n`; do
+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq $n`; do
+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+ {"op": "insert",
+ "table": "indexed",
+ "row": {"i": 0}}]]'], [0], [ignore], [ignore])
+
+# Kill one follower (s2) and write some data to cluster, so that the follower is falling behind
+printf "\ns2: stopping\n"
+OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s2], [s2.pid])
+
+# Delete "i":0 and readd it to get a different UUID for it.
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+ {"op": "delete",
+ "table": "indexed",
+ "where": [["i", "==", 0]]}]]'], [0], [ignore], [ignore])
+
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+ {"op": "insert",
+ "table": "indexed",
+ "row": {"i": 0}}]]'], [0], [ignore], [ignore])
+
+AT_CHECK([ovsdb-client transact unix:s1.ovsdb '[["idltest",
+ {"op": "insert",
+ "table": "indexed",
+ "row": {"i": 1}}]]'], [0], [ignore], [ignore])
+
+# Compact leader online to generate snapshot
+AT_CHECK([ovs-appctl -t "`pwd`"/s1 ovsdb-server/compact])
+
+# Start the follower s2 again.
+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s2.log --pidfile=s2.pid --unixctl=s2 --remote=punix:s2.ovsdb s2.db])
+AT_CHECK([ovsdb_client_wait unix:s2.ovsdb $schema_name connected])
+
+# A client transaction through s2. During this transaction, there will be a
+# install_snapshot RPC because s2 detects it is behind and s1 doesn't have the
+# pre_log_index requested by s2 because it is already compacted.
+# After the install_snapshot RPC process, the transaction through s2 should
+# succeed.
+AT_CHECK([ovsdb-client transact unix:s2.ovsdb '[["idltest",
+ {"op": "insert",
+ "table": "indexed",
+ "row": {"i": 2}}]]'], [0], [ignore], [ignore])
+
+# The snapshot should overwrite the in-memory contents of the DB on S2
+# without generating any constraint violations. All tree records (0, 1, 2)
+# should be in the DB at this point.
+AT_CHECK([ovsdb-client --no-headings dump unix:s2.ovsdb idltest indexed | uuidfilt | sort -k 2], [0], [dnl
+<0> 0
+<1> 1
+<2> 2
+indexed table
+])
+
+for i in `seq $n`; do
+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+
+AT_CLEANUP
+
OVS_START_SHELL_HELPERS
@@ -436,6 +553,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
AT_CLEANUP
+
+AT_SETUP([OVSDB cluster - competing candidates])
+AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates])
+
+n=3
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+ordinal_schema > schema
+AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
+cid=`ovsdb-tool db-cid s1.db`
+schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
+for i in `seq 2 $n`; do
+ AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
+done
+
+on_exit 'kill `cat *.pid`'
+for i in `seq $n`; do
+ AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
+done
+for i in `seq $n`; do
+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+# We need to simulate the situation when 2 candidates starts election with same
+# term.
+#
+# Before triggering leader election, tell follower s2 don't send vote request (simulating
+# vote-request lost or not handled in time), and tell follower s3 to delay
+# election timer to make sure s3 doesn't send vote-request before s2 enters
+# term 2.
+AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore])
+AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore])
+
+# Restart leader, which will become follower, and both old followers will start
+# election as candidate. The new follower (old leader) will vote one of them,
+# and the other candidate should step back as follower as again.
+kill -9 `cat s1.pid`
+AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db])
+
+# Tell s1 to delay election timer so that it won't start election before s3
+# becomes candidate.
+AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore])
+
+OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"])
+
+for i in `seq $n`; do
+ OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"])
+ AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
+done
+
+for i in `seq $n`; do
+ OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
+done
+
+AT_CLEANUP
+
AT_BANNER([OVSDB - cluster tests])
@@ -529,7 +701,7 @@ ovsdb|WARN|schema: changed 30 columns in 'Open_vSwitch' database from ephemeral
# Use file instead of var because code inside "while" runs in a subshell.
echo 0 > phase
i=0
- (while :; do echo; sleep 0.1; done) | while read REPLY; do
+ (while :; do echo || exit 0; sleep 0.1; done) | while read REPLY; do
printf "t=%2d s:" $i
done=0
for j in $(seq 0 $(expr $n1 - 1)); do
diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at
index cc38d69c106ce7882b57a2dc7e07c55d41a38749..4b4791a7daac3952f9ac9b710ec3286f12b8de17 100644
--- a/tests/ovsdb-idl.at
+++ b/tests/ovsdb-idl.at
@@ -12,25 +12,6 @@ ovsdb_start_idltest () {
on_exit 'kill `cat ovsdb-server.pid`'
}
-# ovsdb_cluster_start_idltest [REMOTE] [SCHEMA]
-#
-# Creates a database using SCHEMA (default: idltest.ovsschema) and
-# starts a database cluster listening on punix:socket and REMOTE (if
-# specified).
-ovsdb_cluster_start_idltest () {
- local n=$1
- ovsdb-tool create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft || return $?
- cid=`ovsdb-tool db-cid s1.db`
- schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
- for i in `seq 2 $n`; do
- ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft || return $?
- done
- for i in `seq $n`; do
- ovsdb-server -vraft -vconsole:warn --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb ${2:+--remote=$2} s$i.db || return $?
- done
- on_exit 'kill `cat s*.pid`'
-}
-
# ovsdb_cluster_leader [REMOTES] [DATABASE]
#
# Returns the leader of the DATABASE cluster.
@@ -48,6 +29,35 @@ ovsdb_cluster_leader () {
done
}])
+# OVSDB_CLUSTER_START_IDLTEST([N], [REMOTE])
+#
+# Creates a clustered database using idltest.ovsschema and starts a database
+# cluster of N servers listening on punix:socket and REMOTE (if specified).
+m4_define([OVSDB_CLUSTER_START_IDLTEST],
+ [n=$1
+ AT_CHECK([ovsdb-tool create-cluster s1.db \
+ $abs_srcdir/idltest.ovsschema unix:s1.raft])
+ cid=$(ovsdb-tool db-cid s1.db)
+ schema_name=$(ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema)
+ for i in $(seq 2 $n); do
+ AT_CHECK([ovsdb-tool join-cluster s$i.db \
+ $schema_name unix:s$i.raft unix:s1.raft])
+ done
+ on_exit 'kill $(cat s*.pid)'
+ for i in $(seq $n); do
+ AT_CHECK([ovsdb-server -vraft -vconsole:warn --detach --no-chdir \
+ --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i \
+ --remote=punix:s$i.ovsdb \
+ m4_if([$2], [], [], [--remote=$2]) s$i.db])
+ done
+
+ for i in $(seq $n); do
+ OVS_WAIT_UNTIL([ovs-appctl -t $(pwd)/s$i cluster/status ${schema_name} \
+ | grep -q 'Status: cluster member'])
+ done
+])
+
+
# OVSDB_CHECK_IDL_C(TITLE, [PRE-IDL-TXN], TRANSACTIONS, OUTPUT, [KEYWORDS],
# [FILTER])
#
@@ -954,7 +964,10 @@ AT_CHECK([sort stdout | uuidfilt], [0],
# Check that ovsdb-idl figured out that table link2 and column l2 are missing.
AT_CHECK([grep ovsdb_idl stderr | sort], [0], [dnl
+test-ovsdb|ovsdb_idl|idltest database lacks indexed table (database needs upgrade?)
test-ovsdb|ovsdb_idl|idltest database lacks link2 table (database needs upgrade?)
+test-ovsdb|ovsdb_idl|idltest database lacks simple5 table (database needs upgrade?)
+test-ovsdb|ovsdb_idl|idltest database lacks simple6 table (database needs upgrade?)
test-ovsdb|ovsdb_idl|idltest database lacks singleton table (database needs upgrade?)
test-ovsdb|ovsdb_idl|link1 table in idltest database lacks l2 column (database needs upgrade?)
])
@@ -1150,6 +1163,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated],
"where": [],
"row": {"b": true}}]']],
[[000: i=1 r=2 b=true s=mystring u=<0> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<1> <2>] uuid=<3>
+000: inserted row: uuid=<3>
000: updated columns: b ba i ia r ra s sa u ua
001: {"error":null,"result":[{"count":2}]}
002: i=0 r=0 b=true s= u=<4> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<5>
@@ -1158,6 +1172,111 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated],
003: done
]])
+dnl This test creates database with weak references and checks that orphan
+dnl rows created for weak references are not available for iteration via
+dnl list of tracked changes.
+OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan weak references],
+ [['["idltest",
+ {"op": "insert",
+ "table": "simple",
+ "row": {"s": "row0_s"},
+ "uuid-name": "weak_row0"},
+ {"op": "insert",
+ "table": "simple",
+ "row": {"s": "row1_s"},
+ "uuid-name": "weak_row1"},
+ {"op": "insert",
+ "table": "simple",
+ "row": {"s": "row2_s"},
+ "uuid-name": "weak_row2"},
+ {"op": "insert",
+ "table": "simple6",
+ "row": {"name": "first_row",
+ "weak_ref": ["set",
+ [["named-uuid", "weak_row0"],
+ ["named-uuid", "weak_row1"],
+ ["named-uuid", "weak_row2"]]
+ ]}}]']],
+ [['condition simple []' \
+ 'condition simple [["s","==","row1_s"]]' \
+ '["idltest",
+ {"op": "update",
+ "table": "simple6",
+ "where": [],
+ "row": {"name": "new_name"}}]' \
+ '["idltest",
+ {"op": "delete",
+ "table": "simple6",
+ "where": []}]']],
+ [[000: change conditions
+001: inserted row: uuid=<0>
+001: name=first_row weak_ref=[] uuid=<0>
+001: updated columns: name weak_ref
+002: change conditions
+003: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+003: inserted row: uuid=<2>
+003: name=first_row weak_ref=[<2>] uuid=<0>
+003: updated columns: s
+004: {"error":null,"result":[{"count":1}]}
+005: name=new_name weak_ref=[<2>] uuid=<0>
+005: updated columns: name
+006: {"error":null,"result":[{"count":1}]}
+007: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+008: done
+]])
+
+dnl This test creates database with weak references and checks that the
+dnl content of orphaned rows created for weak references after monitor
+dnl condition change are not leaked when the row is reinserted and deleted.
+OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated, orphan rows, conditional],
+ [['["idltest",
+ {"op": "insert",
+ "table": "simple",
+ "row": {"s": "row0_s"},
+ "uuid-name": "weak_row0"},
+ {"op": "insert",
+ "table": "simple",
+ "row": {"s": "row1_s"},
+ "uuid-name": "weak_row1"},
+ {"op": "insert",
+ "table": "simple6",
+ "row": {"name": "first_row",
+ "weak_ref": ["set",
+ [["named-uuid", "weak_row0"]]
+ ]}}]']],
+ [['condition simple []' \
+ 'condition simple [["s","==","row0_s"]]' \
+ 'condition simple [["s","==","row1_s"]]' \
+ 'condition simple [["s","==","row0_s"]]' \
+ '["idltest",
+ {"op": "delete",
+ "table": "simple6",
+ "where": []}]']],
+ [[000: change conditions
+001: inserted row: uuid=<0>
+001: name=first_row weak_ref=[] uuid=<0>
+001: updated columns: name weak_ref
+002: change conditions
+003: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+003: inserted row: uuid=<2>
+003: name=first_row weak_ref=[<2>] uuid=<0>
+003: updated columns: s
+004: change conditions
+005: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3>
+005: inserted row: uuid=<3>
+005: updated columns: s
+006: change conditions
+007: deleted row: uuid=<3>
+007: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+007: i=0 r=0 b=false s=row1_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<3>
+007: inserted row: uuid=<2>
+007: name=first_row weak_ref=[<2>] uuid=<0>
+007: updated columns: s
+008: {"error":null,"result":[{"count":1}]}
+009: i=0 r=0 b=false s=row0_s u=<1> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+010: done
+]])
+
OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops],
[],
[['["idltest",
@@ -1212,6 +1331,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops],
[[000: empty
001: {"error":null,"result":[{"uuid":["uuid","<0>"]},{"uuid":["uuid","<1>"]}]}
002: i=1 r=2 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0>
+002: inserted row: uuid=<0>
002: updated columns: b ba i ia r ra s sa u ua
003: {"error":null,"result":[{"count":2}]}
004: i=0 r=0 b=true s= u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
@@ -1223,6 +1343,7 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops],
006: updated columns: r
007: {"error":null,"result":[{"uuid":["uuid","<6>"]}]}
008: i=-1 r=125 b=false s= u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6>
+008: inserted row: uuid=<6>
008: updated columns: ba i ia r ra
009: {"error":null,"result":[{"count":2}]}
010: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6>
@@ -1230,7 +1351,8 @@ OVSDB_CHECK_IDL_TRACK([track, simple idl, initially empty, various ops],
010: updated columns: s
010: updated columns: s
011: {"error":null,"result":[{"count":1}]}
-012: ##deleted## uuid=<1>
+012: deleted row: uuid=<1>
+012: i=0 r=123.5 b=true s=newstring u=<5> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
013: reconnect
014: i=-1 r=125 b=false s=newstring u=<5> ia=[1] ra=[1.5] ba=[false] sa=[] ua=[] uuid=<6>
014: i=1 r=123.5 b=true s=mystring u=<2> ia=[1 2 3] ra=[-0.5] ba=[true] sa=[abc def] ua=[<3> <4>] uuid=<0>
@@ -1288,6 +1410,18 @@ OVSDB_CHECK_IDL_PY([partial-map idl],
009: done
]])
+OVSDB_CHECK_IDL_PY([partial-map update set refmap idl],
+[['["idltest", {"op":"insert", "table":"simple3", "row":{"name":"myString1"}},
+ {"op":"insert", "table":"simple5", "row":{"name":"myString2"}}]']],
+['partialmapmutateirefmap'],
+[[000: name=myString1 uset=[]
+000: name=myString2 irefmap=[]
+001: commit, status=success
+002: name=myString1 uset=[]
+002: name=myString2 irefmap=[(1 <0>)]
+003: done
+]])
+
m4_define([OVSDB_CHECK_IDL_PARTIAL_UPDATE_SET_COLUMN],
[AT_SETUP([$1 - C])
AT_KEYWORDS([ovsdb server idl partial update set column positive $5])
@@ -1764,41 +1898,34 @@ OVSDB_CHECK_IDL_COMPOUND_INDEX_WITH_REF([set, simple3 idl-compound-index-with-re
]])
m4_define([CHECK_STREAM_OPEN_BLOCK],
- [AT_SETUP([Check Stream open block - C - $1])
- AT_SKIP_IF([test "$1" = "tcp6" && test "$IS_WIN32" = "yes"])
- AT_SKIP_IF([test "$1" = "tcp6" && test "$HAVE_IPV6" = "no"])
- AT_KEYWORDS([Check Stream open block $1])
- AT_CHECK([ovsdb_start_idltest "ptcp:0:$2"])
+ [AT_SETUP([Check stream open block - $1 - $3])
+ AT_SKIP_IF([test "$3" = "tcp6" && test "$IS_WIN32" = "yes"])
+ AT_SKIP_IF([test "$3" = "tcp6" && test "$HAVE_IPV6" = "no"])
+ AT_KEYWORDS([ovsdb server stream open_block $3])
+ AT_CHECK([ovsdb_start_idltest "ptcp:0:$4"])
PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT])
WRONG_PORT=$(($TCP_PORT + 101))
- AT_CHECK([test-stream tcp:$2:$TCP_PORT], [0], [ignore])
- AT_CHECK([test-stream tcp:$2:$WRONG_PORT], [1], [ignore], [ignore])
+ AT_CHECK([$2 tcp:$4:$TCP_PORT], [0], [ignore])
+ AT_CHECK([$2 tcp:$4:$WRONG_PORT], [1], [ignore], [ignore])
OVSDB_SERVER_SHUTDOWN
- AT_CHECK([test-stream tcp:$2:$TCP_PORT], [1], [ignore], [ignore])
+ AT_CHECK([$2 tcp:$4:$TCP_PORT], [1], [ignore], [ignore])
AT_CLEANUP])
-CHECK_STREAM_OPEN_BLOCK([tcp], [127.0.0.1])
-CHECK_STREAM_OPEN_BLOCK([tcp6], [[[::1]]])
-
-m4_define([CHECK_STREAM_OPEN_BLOCK_PY],
- [AT_SETUP([$1 - Python3])
- AT_KEYWORDS([Check PY Stream open block - $3])
- AT_CHECK([ovsdb_start_idltest "ptcp:0:127.0.0.1"])
- PARSE_LISTENING_PORT([ovsdb-server.log], [TCP_PORT])
- WRONG_PORT=$(($TCP_PORT + 101))
- AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT], [0], [ignore])
- AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$WRONG_PORT], [1], [ignore])
- OVSDB_SERVER_SHUTDOWN
- AT_CHECK([$3 $srcdir/test-stream.py tcp:127.0.0.1:$TCP_PORT], [1], [ignore])
- AT_CLEANUP])
+CHECK_STREAM_OPEN_BLOCK([C], [test-stream], [tcp], [127.0.0.1])
+CHECK_STREAM_OPEN_BLOCK([C], [test-stream], [tcp6], [[[::1]]])
+CHECK_STREAM_OPEN_BLOCK([Python3], [$PYTHON3 $srcdir/test-stream.py],
+ [tcp], [127.0.0.1])
+CHECK_STREAM_OPEN_BLOCK([Python3], [$PYTHON3 $srcdir/test-stream.py],
+ [tcp6], [[[::1]]])
# same as OVSDB_CHECK_IDL but uses Python IDL implementation with tcp
# with multiple remotes to assert the idl connects to the leader of the Raft cluster
m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY],
[AT_SETUP([$1 - Python3 (leader only)])
+ AT_SKIP_IF([test "$IS_ARM64" = "yes"])
AT_KEYWORDS([ovsdb server idl Python leader_only with tcp socket])
m4_define([LPBK],[127.0.0.1])
- AT_CHECK([ovsdb_cluster_start_idltest $2 "ptcp:0:"LPBK])
+ OVSDB_CLUSTER_START_IDLTEST([$2], ["ptcp:0:"LPBK])
PARSE_LISTENING_PORT([s2.log], [TCP_PORT_1])
PARSE_LISTENING_PORT([s3.log], [TCP_PORT_2])
PARSE_LISTENING_PORT([s1.log], [TCP_PORT_3])
@@ -1814,3 +1941,59 @@ m4_define([OVSDB_CHECK_IDL_LEADER_ONLY_PY],
OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL connects to leader], 3, ['remote'])
OVSDB_CHECK_IDL_LEADER_ONLY_PY([Check Python IDL reconnects to leader], 3, ['remote' '+remotestop' 'remote'])
+
+# same as OVSDB_CHECK_IDL but uses C IDL implementation with tcp
+# with multiple remotes.
+m4_define([OVSDB_CHECK_CLUSTER_IDL_C],
+ [AT_SETUP([$1 - C - tcp])
+ AT_KEYWORDS([ovsdb server idl positive tcp socket $5])
+ m4_define([LPBK],[127.0.0.1])
+ OVSDB_CLUSTER_START_IDLTEST([$2], ["ptcp:0:"LPBK])
+ PARSE_LISTENING_PORT([s1.log], [TCP_PORT_1])
+ PARSE_LISTENING_PORT([s2.log], [TCP_PORT_2])
+ PARSE_LISTENING_PORT([s3.log], [TCP_PORT_3])
+ remotes=tcp:LPBK:$TCP_PORT_1,tcp:LPBK:$TCP_PORT_2,tcp:LPBK:$TCP_PORT_3
+
+ m4_if([$3], [], [],
+ [AT_CHECK([ovsdb-client transact $remotes $3], [0], [ignore], [ignore])])
+ AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 idl tcp:LPBK:$TCP_PORT_1 $4],
+ [0], [stdout], [ignore])
+ AT_CHECK([sort stdout | uuidfilt]m4_if([$7],,, [[| $7]]),
+ [0], [$5])
+ AT_CLEANUP])
+
+# Checks that monitor_cond_since works fine when disconnects happen
+# with cond_change requests in flight (i.e., IDL is properly updated).
+OVSDB_CHECK_CLUSTER_IDL_C([simple idl, monitor_cond_since, cluster disconnect],
+ 3,
+ [['["idltest",
+ {"op": "insert",
+ "table": "simple",
+ "row": {"i": 1,
+ "r": 1.0,
+ "b": true}},
+ {"op": "insert",
+ "table": "simple",
+ "row": {"i": 2,
+ "r": 1.0,
+ "b": true}}]']],
+ [['condition simple []' \
+ 'condition simple [["i","==",2]]' \
+ 'condition simple [["i","==",1]]' \
+ '+reconnect' \
+ '["idltest",
+ {"op": "update",
+ "table": "simple",
+ "where": [["i", "==", 1]],
+ "row": {"r": 2.0 }}]']],
+ [[000: change conditions
+001: empty
+002: change conditions
+003: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
+004: change conditions
+005: reconnect
+006: i=2 r=1 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<1>
+007: {"error":null,"result":[{"count":1}]}
+008: i=1 r=2 b=true s= u=<0> ia=[] ra=[] ba=[] sa=[] ua=[] uuid=<2>
+009: done
+]])
diff --git a/tests/ovsdb-monitor-sort.py b/tests/ovsdb-monitor-sort.py
index 7d368a7afce832240f04c72f79a65979ed31b1db..8a7976bdc70700720ebeb7ffc520ba71047e98a4 100755
--- a/tests/ovsdb-monitor-sort.py
+++ b/tests/ovsdb-monitor-sort.py
@@ -1,4 +1,17 @@
-#! /usr/bin/env python
+#!/usr/bin/env python3
+# Copyright (c) 2020 VMware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
# Breaks lines read from stdin into groups using blank lines as
# group separators, then sorts lines within the groups for
diff --git a/tests/pmd.at b/tests/pmd.at
index 5b612f88f693b5a0b8f9aad91ab76fedf4eca617..cc5371d5a5000d0853e1ce3d0738ddf930b2ebc8 100644
--- a/tests/pmd.at
+++ b/tests/pmd.at
@@ -707,6 +707,15 @@ recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_typ
recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234), packets:0, bytes:0, used:never, actions:2
])
+dnl Check pmd filtering option.
+AT_CHECK([ovs-appctl dpctl/dump-flows dummy@dp0 pmd=0], [0], [dnl
+recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234), packets:0, bytes:0, used:never, actions:2
+])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows dummy@dp0 pmd=-1], [0], [dnl
+recirc_id(0),in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234), packets:0, bytes:0, used:never, actions:2
+])
+
AT_CHECK([ovs-appctl dpctl/del-flow dummy@dp0 'in_port(1),eth(src=00:00:00:00:00:01,dst=00:00:00:00:00:02),eth_type(0x1234)'], [0], [dnl
])
diff --git a/tests/sendpkt.py b/tests/sendpkt.py
index 328ae2bc9d1b54cbaef772817a728333ff744973..49ac45275a902bc686d6b08bc247a545f5aafab4 100755
--- a/tests/sendpkt.py
+++ b/tests/sendpkt.py
@@ -1,6 +1,6 @@
-#! /usr/bin/env python
+#!/usr/bin/env python3
-# Copyright (c) 2018 VMware, Inc.
+# Copyright (c) 2018, 2020 VMware, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/tests/system-afxdp.at b/tests/system-afxdp.at
index e4451624f882a92a1146f9da555c0758d9d4a433..0d09906fb6c85cc48df09966acaee890a306f836 100644
--- a/tests/system-afxdp.at
+++ b/tests/system-afxdp.at
@@ -22,3 +22,26 @@ AT_CHECK([grep "ovs-p0: could not set configuration" ovs-vswitchd.log | wc -l],
OVS_TRAFFIC_VSWITCHD_STOP(["/ovs-p0: Too big 'n_rxq'/d
/ovs-p0: could not set configuration/d"])
AT_CLEANUP
+
+
+AT_SETUP([AF_XDP - ping between pmd and non-pmd ports])
+AT_KEYWORDS([afxdp nonpmd])
+OVS_TRAFFIC_VSWITCHD_START()
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_CHECK([ovs-vsctl del-port ovs-p0])
+AT_CHECK([ovs-vsctl add-port br0 ovs-p0 -- \
+ set interface ovs-p0 type=afxdp-nonpmd options:n_rxq=1],
+ [0], [], [stderr])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/system-kmod-macros.at b/tests/system-kmod-macros.at
index daf66bdec827af802fc810b535d6710097046db1..15628a7c6f42d2d4d076fcd97607ebfb5f38ff5c 100644
--- a/tests/system-kmod-macros.at
+++ b/tests/system-kmod-macros.at
@@ -29,16 +29,16 @@ m4_define([OVS_TRAFFIC_VSWITCHD_START],
AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2])
])
-# OVS_TRAFFIC_VSWITCHD_STOP([WHITELIST], [extra_cmds])
+# OVS_TRAFFIC_VSWITCHD_STOP([ALLOWLIST], [extra_cmds])
#
# Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files
# for messages with severity WARN or higher and signaling an error if any
-# is present. The optional WHITELIST may contain shell-quoted "sed"
+# is present. The optional ALLOWLIST may contain shell-quoted "sed"
# commands to delete any warnings that are actually expected, e.g.:
#
# OVS_TRAFFIC_VSWITCHD_STOP(["/expected error/d"])
#
-# 'extra_cmds' are shell commands to be executed afte OVS_VSWITCHD_STOP() is
+# 'extra_cmds' are shell commands to be executed after OVS_VSWITCHD_STOP() is
# invoked. They can be used to perform additional cleanups such as name space
# removal.
m4_define([OVS_TRAFFIC_VSWITCHD_STOP],
diff --git a/tests/system-layer3-tunnels.at b/tests/system-layer3-tunnels.at
index 1232964bb67cf973009e4ef163a7aac7e3219c44..d21fd777ddb7aa7199525b32c48a879c92bfbf76 100644
--- a/tests/system-layer3-tunnels.at
+++ b/tests/system-layer3-tunnels.at
@@ -152,3 +152,99 @@ AT_CHECK([tail -1 stdout], [0],
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([layer3 - ping over MPLS Bareudp])
+OVS_CHECK_MIN_KERNEL(5, 7)
+OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])])
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "36:b1:ee:7c:01:01")
+ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24", "36:b1:ee:7c:01:02")
+
+ADD_OVS_TUNNEL([bareudp], [br0], [at_bareudp0], [8.1.1.3], [8.1.1.2/24],
+ [ options:local_ip=8.1.1.2 options:packet_type="legacy_l3" options:payload_type=mpls options:dst_port=6635])
+
+ADD_OVS_TUNNEL([bareudp], [br1], [at_bareudp1], [8.1.1.2], [8.1.1.3/24],
+ [options:local_ip=8.1.1.3 options:packet_type="legacy_l3" options:payload_type=mpls options:dst_port=6635])
+
+AT_DATA([flows0.txt], [dnl
+table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,output:at_bareudp0
+table=0,priority=100,dl_type=0x8847 in_port=at_bareudp0 actions=pop_mpls:0x0800,set_field:36:b1:ee:7c:01:01->dl_dst,set_field:36:b1:ee:7c:01:02->dl_src,output:ovs-p0
+table=0,priority=10 actions=normal
+])
+
+AT_DATA([flows1.txt], [dnl
+table=0,priority=100,dl_type=0x0800 actions=push_mpls:0x8847,set_mpls_label:3,output:at_bareudp1
+table=0,priority=100,dl_type=0x8847 in_port=at_bareudp1 actions=pop_mpls:0x0800,set_field:36:b1:ee:7c:01:02->dl_dst,set_field:36:b1:ee:7c:01:01->dl_src,output:ovs-p1
+table=0,priority=10 actions=normal
+])
+
+AT_CHECK([ip link add patch0 type veth peer name patch1])
+on_exit 'ip link del patch0'
+
+AT_CHECK([ip link set dev patch0 up])
+AT_CHECK([ip link set dev patch1 up])
+AT_CHECK([ovs-vsctl add-port br0 patch0])
+AT_CHECK([ovs-vsctl add-port br1 patch1])
+
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br0 flows0.txt])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows1.txt])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([layer3 - ping over Bareudp])
+OVS_CHECK_MIN_KERNEL(5, 7)
+OVS_TRAFFIC_VSWITCHD_START([_ADD_BR([br1])])
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "36:b1:ee:7c:01:01")
+ADD_VETH(p1, at_ns1, br1, "10.1.1.2/24", "36:b1:ee:7c:01:02")
+
+ADD_OVS_TUNNEL([bareudp], [br0], [at_bareudp0], [8.1.1.3], [8.1.1.2/24],
+ [ options:local_ip=8.1.1.2 options:packet_type="legacy_l3" options:payload_type=ip options:dst_port=6636])
+
+ADD_OVS_TUNNEL([bareudp], [br1], [at_bareudp1], [8.1.1.2], [8.1.1.3/24],
+ [options:local_ip=8.1.1.3 options:packet_type="legacy_l3" options:payload_type=ip options:dst_port=6636])
+
+AT_DATA([flows0.txt], [dnl
+table=0,priority=100,dl_type=0x0800 in_port=ovs-p0, actions=output:at_bareudp0
+table=0,priority=100,dl_type=0x0800 in_port=at_bareudp0 actions=set_field:36:b1:ee:7c:01:01->dl_dst,set_field:36:b1:ee:7c:01:02->dl_src,output:ovs-p0
+table=0,priority=10 actions=normal
+])
+
+AT_DATA([flows1.txt], [dnl
+table=0,priority=100,dl_type=0x0800 in_port=ovs-p1 actions=output:at_bareudp1
+table=0,priority=100,dl_type=0x0800 in_port=at_bareudp1 actions=set_field:36:b1:ee:7c:01:02->dl_dst,set_field:36:b1:ee:7c:01:01->dl_src,output:ovs-p1
+table=0,priority=10 actions=normal
+])
+
+AT_CHECK([ip link add patch0 type veth peer name patch1])
+on_exit 'ip link del patch0'
+
+AT_CHECK([ip link set dev patch0 up])
+AT_CHECK([ip link set dev patch1 up])
+AT_CHECK([ovs-vsctl add-port br0 patch0])
+AT_CHECK([ovs-vsctl add-port br1 patch1])
+
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br0 flows0.txt])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flows br1 flows1.txt])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+NS_CHECK_EXEC([at_ns1], [ping -q -c 3 -i 0.3 -w 2 10.1.1.1 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
index 379a8a5e9280fa68c1d8fef911a9842a55c8bdcb..4f601ef9396ac75d89ec917982f64f80a994fcd4 100644
--- a/tests/system-offloads-traffic.at
+++ b/tests/system-offloads-traffic.at
@@ -32,6 +32,8 @@ in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:882, used:
AT_CHECK([ovs-appctl dpctl/dump-flows type=offloaded], [0], [])
+AT_CHECK([test $(ovs-appctl upcall/show | grep -c "offloaded flows") -eq 0], [0], [ignore])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
@@ -64,5 +66,7 @@ in_port(2),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:
in_port(3),eth(macs),eth_type(0x0800),ipv4(frag=no), packets:9, bytes:756, used:0.001s, actions:output
])
+AT_CHECK([ovs-appctl upcall/show | grep -E "offloaded flows : [[1-9]]"], [0], [ignore])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
diff --git a/tests/system-route.at b/tests/system-route.at
new file mode 100644
index 0000000000000000000000000000000000000000..1714273e35edce168b57cd432305cd6b97ba0ec5
--- /dev/null
+++ b/tests/system-route.at
@@ -0,0 +1,28 @@
+AT_BANNER([system-route])
+
+dnl Add an interface, add/del ip address, check that OVS catches route updates.
+AT_SETUP([ovs-route - add/remove system route])
+AT_KEYWORDS([route])
+OVS_TRAFFIC_VSWITCHD_START()
+
+dnl Create tap port.
+AT_CHECK([ip tuntap add name p1-route mode tap])
+AT_CHECK([ip link set p1-route up])
+on_exit 'ip link del p1-route'
+
+dnl Add ip address.
+AT_CHECK([ip addr add 10.0.0.17/24 dev p1-route], [0], [stdout])
+
+dnl Check that OVS catches route updates.
+OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep 'p1-route' | sort], [0], [dnl
+Cached: 10.0.0.17/24 dev p1-route SRC 10.0.0.17
+Cached: 10.0.0.17/32 dev p1-route SRC 10.0.0.17 local
+])
+
+dnl Delete ip address.
+AT_CHECK([ip addr del 10.0.0.17/24 dev p1-route], [0], [stdout])
+dnl Check that routes was removed from OVS.
+OVS_WAIT_UNTIL([test `ovs-appctl ovs/route/show | grep -c 'p1-route'` -eq 0 ])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/system-tap.at b/tests/system-tap.at
new file mode 100644
index 0000000000000000000000000000000000000000..871a3bda4fcc761faa5a3d93ce30cc46b6e6dccd
--- /dev/null
+++ b/tests/system-tap.at
@@ -0,0 +1,34 @@
+AT_SETUP([traffic between namespaces using tap])
+AT_KEYWORDS([http_tap])
+OVS_TRAFFIC_VSWITCHD_START()
+AT_SKIP_IF([test $HAVE_TUNCTL = no])
+
+AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+AT_CHECK([ip tuntap add tap0 mode tap])
+on_exit 'ip tuntap del tap0 mode tap'
+AT_CHECK([ip tuntap add tap1 mode tap])
+on_exit 'ip tuntap del tap1 mode tap'
+
+AT_CHECK([ovs-vsctl add-port br0 tap0 -- set int tap0 type=tap])
+AT_CHECK([ovs-vsctl add-port br0 tap1 -- set int tap1 type=tap])
+AT_CHECK([ip link set tap0 netns at_ns0])
+AT_CHECK([ip link set tap1 netns at_ns1])
+
+AT_CHECK([ip netns exec at_ns0 ip link set dev tap0 up])
+AT_CHECK([ip netns exec at_ns1 ip link set dev tap1 up])
+AT_CHECK([ip netns exec at_ns0 ip addr add 10.1.1.1/24 dev tap0])
+AT_CHECK([ip netns exec at_ns1 ip addr add 10.1.1.2/24 dev tap1])
+
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+OVS_START_L7([at_ns1], [http])
+NS_CHECK_EXEC([at_ns0], [wget 10.1.1.2 -t 3 -T 1 --retry-connrefused -v -o wget0.log])
+
+OVS_TRAFFIC_VSWITCHD_STOP(["/.*ethtool command ETHTOOL_G.*/d"])
+
+AT_CLEANUP
diff --git a/tests/system-traffic.at b/tests/system-traffic.at
index 4a39c929c20762b2fa81a37b57f34a23d10bb981..d2a4dbffecbe615e36cdeb187b95fb9419c4fac6 100644
--- a/tests/system-traffic.at
+++ b/tests/system-traffic.at
@@ -611,6 +611,16 @@ NS_CHECK_EXEC([at_ns0], [ping -q -c 3 10.1.1.100 | FORMAT_PING], [0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
+dnl Test OVS handles TLV map modifictions properly when restores frozen state.
+NS_CHECK_EXEC([at_ns0], [ping 10.1.1.100 > /dev/null &])
+
+AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0x88,len=4}->tun_metadata1"])
+sleep 1
+AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0x99,len=4}->tun_metadata2"])
+sleep 1
+AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0xaa,len=4}->tun_metadata3"])
+sleep 1
+
OVS_APP_EXIT_AND_WAIT([ovs-ofctl])
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
@@ -678,11 +688,7 @@ AT_CHECK([ip link set dev br-underlay up])
dnl Set up tunnel endpoints on OVS outside the namespace.
ADD_OVS_TUNNEL([gre], [br0], [at_gre0], [172.31.1.1], [10.1.1.100/24])
-dnl Certain Linux distributions, like CentOS, have default iptable rules
-dnl to reject input traffic from br-underlay. Here we add a rule to walk
-dnl around it.
-iptables -I INPUT 1 -i br-underlay -j ACCEPT
-on_exit 'iptables -D INPUT 1'
+IPTABLES_ACCEPT([br-underlay])
ip netns exec at_ns0 tcpdump -n -i p0 dst host 172.31.1.1 -l > p0.pcap &
sleep 1
@@ -729,11 +735,7 @@ dnl Set up tunnel endpoints on OVS outside the namespace and emulate a native
dnl linux device inside the namespace.
ADD_OVS_TUNNEL([erspan], [br0], [at_erspan0], [172.31.1.1], [10.1.1.100/24], [options:key=1 options:erspan_ver=1 options:erspan_idx=7])
-dnl Certain Linux distributions, like CentOS, have default iptable rules
-dnl to reject input traffic from br-underlay. Here we add a rule to walk
-dnl around it.
-iptables -I INPUT 1 -i br-underlay -j ACCEPT
-on_exit 'iptables -D INPUT 1'
+IPTABLES_ACCEPT([br-underlay])
ip netns exec at_ns0 tcpdump -n -x -i p0 dst host 172.31.1.1 -l > p0.pcap &
sleep 1
@@ -2331,6 +2333,35 @@ NXST_FLOW reply:
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([conntrack - generic IP protocol])
+CHECK_CONNTRACK()
+OVS_TRAFFIC_VSWITCHD_START()
+AT_CHECK([ovs-appctl vlog/set dpif:dbg dpif_netdev:dbg ofproto_dpif_upcall:dbg])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
+
+AT_DATA([flows.txt], [dnl
+table=0, priority=1,action=drop
+table=0, priority=10,arp,action=normal
+table=0, priority=100,ip,action=ct(table=1)
+table=1, priority=100,in_port=1,ip,ct_state=+trk+new,action=ct(commit)
+table=1, priority=100,in_port=1,ct_state=+trk+est,action=normal
+])
+
+AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=01005e00001200005e000101080045c0002800000000ff7019cdc0a8001ee0000012210164010001ba52c0a800010000000000000000000000000000 actions=resubmit(,0)"])
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=192\.168\.0\.30,"], [], [dnl
+112,orig=(src=192.168.0.30,dst=224.0.0.18,sport=0,dport=0),reply=(src=224.0.0.18,dst=192.168.0.30,sport=0,dport=0)
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([conntrack - ICMP related])
AT_SKIP_IF([test $HAVE_NC = no])
CHECK_CONNTRACK()
@@ -3301,8 +3332,15 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=
AT_CHECK([ovs-appctl dpctl/flush-conntrack])
dnl Shorten the udp_single and icmp_first timeout in zone 5
+dnl Userspace datapath uses udp_first and icmp_reply, and
+dnl kernel datapath uses udp_single and icmp_first
VSCTL_ADD_DATAPATH_TABLE()
-AT_CHECK([ovs-vsctl add-zone-tp $DP_TYPE zone=5 udp_single=3 icmp_first=3])
+
+dnl Creating more timeout policies
+for i in `seq 1 255`; do
+ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=$i udp_first=$i udp_single=$i icmp_first=$i icmp_reply=$i;
+done
+AT_CHECK([ovs-vsctl --may-exist add-zone-tp $DP_TYPE zone=5 udp_first=1 udp_single=1 icmp_first=1 icmp_reply=1])
dnl Send ICMP and UDP traffic
NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
@@ -3317,7 +3355,7 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=
dnl Wait until the timeout expire.
dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
-sleep 4
+sleep 6
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
])
@@ -3335,11 +3373,27 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=
dnl Wait until the timeout expire.
dnl We intend to wait a bit longer, because conntrack does not recycle the entry right after it is expired.
-sleep 4
+sleep 6
AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2)], [0], [dnl
])
+dnl Set the timeout policy to default again.
+AT_CHECK([ovs-vsctl del-zone-tp $DP_TYPE zone=5])
+
+dnl Send ICMP and UDP traffic
+NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"])
+
+sleep 1
+
+AT_CHECK([ovs-appctl dpctl/dump-conntrack | FORMAT_CT(10.1.1.2) | sort], [0], [dnl
+icmp,orig=(src=10.1.1.1,dst=10.1.1.2,id=,type=8,code=0),reply=(src=10.1.1.2,dst=10.1.1.1,id=,type=0,code=0),zone=5
+udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=,dport=),reply=(src=10.1.1.2,dst=10.1.1.1,sport=,dport=),zone=5
+])
+
OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP
@@ -4785,7 +4839,7 @@ dnl Non-REPLY/RELATED packets get the ACL lookup with the packet headers
dnl in the actual packet direction in reg0 (IN=1, OUT=2). REPLY packets
dnl get the ACL lookup using the conntrack tuple and the inverted direction.
dnl RELATED packets get ACL lookup using the conntrack tuple in the direction
-dnl of the master connection, as stored in ct_label[0].
+dnl of the parent connection, as stored in ct_label[0].
dnl
dnl Incoming non-related packet in the original direction (ACL IN)
table=1 reg3=1, ip, ct_state=-rel-rpl+trk-inv action=set_field:1->reg0,resubmit(,3),goto_table:5
@@ -4796,7 +4850,7 @@ table=1 reg3=2, ip, ct_state=-rel-rpl+trk-inv action=set_field:2->reg0,resubmit(
dnl Outgoing non-related reply packet (CT ACL IN)
table=1 reg3=2, ip, ct_state=-rel+rpl+trk-inv action=set_field:1->reg0,resubmit(,3,ct),goto_table:4
dnl
-dnl Related packet (CT ACL in the direction of the master connection.)
+dnl Related packet (CT ACL in the direction of the parent connection.)
table=1 ip, ct_state=+rel+trk-inv, action=move:NXM_NX_CT_LABEL[[0]]->NXM_NX_REG0[[0]],resubmit(,3,ct),goto_table:4
dnl Drop everything else.
table=1 priority=0, action=drop
@@ -4838,7 +4892,7 @@ dnl (This should not get any packets in this test.)
table=5 priority=10 reg2=1 reg3=2 ct_state=+new-rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat(dst=$2),exec(move:NXM_NX_REG3[[0]]->NXM_NX_CT_LABEL[[0]],move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
dnl Commit new related connections in either direction, which need 'nat'
dnl and which inherit the label (the direction of the original direction
-dnl master tuple) from the master connection.
+dnl parent tuple) from the parent connection.
table=5 priority=10 reg2=1 ct_state=+new+rel, ip, action=ct(zone=NXM_NX_REG4[[0..15]],commit,nat,exec(move:NXM_NX_REG1[[0..31]]->NXM_NX_CT_LABEL[[96..127]])),goto_table:6
dnl
dnl NAT incoming non-NEW packets. Outgoing packets were NATted in table 0.
diff --git a/tests/system-tso-macros.at b/tests/system-tso-macros.at
new file mode 100644
index 0000000000000000000000000000000000000000..406334f3e081e5c038c3e6b163fd4a10462f4d9f
--- /dev/null
+++ b/tests/system-tso-macros.at
@@ -0,0 +1,31 @@
+# _ADD_BR([name])
+#
+# Expands into the proper ovs-vsctl commands to create a bridge with the
+# appropriate type and properties
+m4_define([_ADD_BR], [[add-br $1 -- set Bridge $1 datapath_type="netdev" protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13,OpenFlow14,OpenFlow15 fail-mode=secure ]])
+
+# OVS_TRAFFIC_VSWITCHD_START([vsctl-args], [vsctl-output], [=override])
+#
+# Creates a database and starts ovsdb-server, starts ovs-vswitchd
+# connected to that database, calls ovs-vsctl to create a bridge named
+# br0 with predictable settings, passing 'vsctl-args' as additional
+# commands to ovs-vsctl. If 'vsctl-args' causes ovs-vsctl to provide
+# output (e.g. because it includes "create" commands) then 'vsctl-output'
+# specifies the expected output after filtering through uuidfilt.
+m4_define([OVS_TRAFFIC_VSWITCHD_START],
+ [
+ OVS_WAIT_WHILE([ip link show ovs-netdev])
+ _OVS_VSWITCHD_START([--disable-system])
+ dnl Add bridges, ports, etc.
+ OVS_WAIT_WHILE([ip link show br0])
+ AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:userspace-tso-enable=true])
+ AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2])
+])
+
+# CONFIGURE_VETH_OFFLOADS([VETH])
+#
+# Enable TCP segmentation offload and scatter-gather for veths.
+m4_define([CONFIGURE_VETH_OFFLOADS],
+ [AT_CHECK([ethtool -K $1 sg on], [0], [ignore], [ignore])]
+ [AT_CHECK([ethtool -K $1 tso on], [0], [ignore], [ignore])]
+)
diff --git a/tests/system-tso-testsuite.at b/tests/system-tso-testsuite.at
new file mode 100644
index 0000000000000000000000000000000000000000..594d1a6fde85ea3d5965d96a00806cf2c225bc03
--- /dev/null
+++ b/tests/system-tso-testsuite.at
@@ -0,0 +1,27 @@
+AT_INIT
+
+AT_COPYRIGHT([Copyright (c) 2020 VMware, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at:
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.])
+
+m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
+
+m4_include([tests/ovs-macros.at])
+m4_include([tests/ovsdb-macros.at])
+m4_include([tests/ofproto-macros.at])
+m4_include([tests/system-common-macros.at])
+m4_include([tests/system-userspace-macros.at])
+m4_include([tests/system-tso-macros.at])
+
+m4_include([tests/system-tap.at])
+m4_include([tests/system-traffic.at])
diff --git a/tests/system-userspace-macros.at b/tests/system-userspace-macros.at
index ba7f4102f494d0500a43f83709b4d9449d8e88e6..34f82cee3db61d3c24cf13d7448201df4b934f7e 100644
--- a/tests/system-userspace-macros.at
+++ b/tests/system-userspace-macros.at
@@ -21,16 +21,16 @@ m4_define([OVS_TRAFFIC_VSWITCHD_START],
AT_CHECK([ovs-vsctl -- _ADD_BR([br0]) -- $1 m4_if([$2], [], [], [| uuidfilt])], [0], [$2])
])
-# OVS_TRAFFIC_VSWITCHD_STOP([WHITELIST], [extra_cmds])
+# OVS_TRAFFIC_VSWITCHD_STOP([ALLOWLIST], [extra_cmds])
#
# Gracefully stops ovs-vswitchd and ovsdb-server, checking their log files
# for messages with severity WARN or higher and signaling an error if any
-# is present. The optional WHITELIST may contain shell-quoted "sed"
+# is present. The optional ALLOWLIST may contain shell-quoted "sed"
# commands to delete any warnings that are actually expected, e.g.:
#
# OVS_TRAFFIC_VSWITCHD_STOP(["/expected error/d"])
#
-# 'extra_cmds' are shell commands to be executed afte OVS_VSWITCHD_STOP() is
+# 'extra_cmds' are shell commands to be executed after OVS_VSWITCHD_STOP() is
# invoked. They can be used to perform additional cleanups such as name space
# removal.
m4_define([OVS_TRAFFIC_VSWITCHD_STOP],
@@ -99,12 +99,8 @@ m4_define([CHECK_CONNTRACK_NAT])
# CHECK_CONNTRACK_TIMEOUT()
#
# Perform requirements checks for running conntrack customized timeout tests.
-* The userspace datapath does not support this feature yet.
#
-m4_define([CHECK_CONNTRACK_TIMEOUT],
-[
- AT_SKIP_IF([:])
-])
+m4_define([CHECK_CONNTRACK_TIMEOUT])
# CHECK_CT_DPIF_SET_GET_MAXCONNS()
#
diff --git a/tests/system-userspace-packet-type-aware.at b/tests/system-userspace-packet-type-aware.at
index c2246316de7a81905508b646e5d84b84f57a5d97..974304758f81af8a9902be5b88a71d4227be5906 100644
--- a/tests/system-userspace-packet-type-aware.at
+++ b/tests/system-userspace-packet-type-aware.at
@@ -129,6 +129,7 @@ AT_CHECK([
ip addr add 10.0.0.1/24 dev br-p1
ip link set br-p1 up
], [0], [stdout])
+OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep -q br-p1])
AT_CHECK([
ovs-appctl ovs/route/add 10.0.0.0/24 br-p1
@@ -141,6 +142,7 @@ AT_CHECK([
ip addr add 20.0.0.2/24 dev br-p2
ip link set br-p2 up
], [0], [stdout])
+OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep -q br-p2])
AT_CHECK([
ovs-appctl ovs/route/add 20.0.0.0/24 br-p2
@@ -153,6 +155,7 @@ AT_CHECK([
ip addr add 30.0.0.3/24 dev br-p3
ip link set br-p3 up
], [0], [stdout])
+OVS_WAIT_UNTIL([ovs-appctl ovs/route/show | grep -q br-p3])
AT_CHECK([
ovs-appctl ovs/route/add 30.0.0.0/24 br-p3
diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at
index b40da9579e7068a414cf4af9f73bec46c8cbcbce..2e9659a6758569590ed44eee5c93d7bb6d05fc45 100644
--- a/tests/system-userspace-testsuite.at
+++ b/tests/system-userspace-testsuite.at
@@ -26,3 +26,4 @@ m4_include([tests/system-traffic.at])
m4_include([tests/system-layer3-tunnels.at])
m4_include([tests/system-interface.at])
m4_include([tests/system-userspace-packet-type-aware.at])
+m4_include([tests/system-route.at])
diff --git a/tests/test-bundle.c b/tests/test-bundle.c
index 124ad5b434d1ff09778e94a14d88a72c39dcc90c..53f78e86ff0384e59904953b6a89f96e7b48577a 100644
--- a/tests/test-bundle.c
+++ b/tests/test-bundle.c
@@ -25,28 +25,28 @@
#include "util.h"
#define N_FLOWS 50000
-#define MAX_SLAVES 8 /* Maximum supported by this test framework. */
+#define MAX_MEMBERS 8 /* Maximum supported by this test framework. */
-struct slave {
- ofp_port_t slave_id;
+struct member {
+ ofp_port_t member_id;
bool enabled;
size_t flow_count;
};
-struct slave_group {
- size_t n_slaves;
- struct slave slaves[MAX_SLAVES];
+struct member_group {
+ size_t n_members;
+ struct member members[MAX_MEMBERS];
};
-static struct slave *
-slave_lookup(struct slave_group *sg, ofp_port_t slave_id)
+static struct member *
+member_lookup(struct member_group *sg, ofp_port_t member_id)
{
size_t i;
- for (i = 0; i < sg->n_slaves; i++) {
- if (sg->slaves[i].slave_id == slave_id) {
- return &sg->slaves[i];
+ for (i = 0; i < sg->n_members; i++) {
+ if (sg->members[i].member_id == member_id) {
+ return &sg->members[i];
}
}
@@ -54,12 +54,12 @@ slave_lookup(struct slave_group *sg, ofp_port_t slave_id)
}
static bool
-slave_enabled_cb(ofp_port_t slave_id, void *aux)
+member_enabled_cb(ofp_port_t member_id, void *aux)
{
- struct slave *slave;
+ struct member *member;
- slave = slave_lookup(aux, slave_id);
- return slave ? slave->enabled : false;
+ member = member_lookup(aux, member_id);
+ return member ? member->enabled : false;
}
static struct ofpact_bundle *
@@ -80,8 +80,8 @@ parse_bundle_actions(char *actions)
bundle = ofpact_get_BUNDLE(xmemdup(action, action->len));
ofpbuf_uninit(&ofpacts);
- if (bundle->n_slaves > MAX_SLAVES) {
- ovs_fatal(0, "At most %u slaves are supported", MAX_SLAVES);
+ if (bundle->n_members > MAX_MEMBERS) {
+ ovs_fatal(0, "At most %u members are supported", MAX_MEMBERS);
}
return bundle;
@@ -109,7 +109,7 @@ test_bundle_main(int argc, char *argv[])
struct ofpact_bundle *bundle;
struct flow *flows;
size_t i, n_permute, old_n_enabled;
- struct slave_group sg;
+ struct member_group sg;
int old_active;
set_program_name(argv[0]);
@@ -120,17 +120,17 @@ test_bundle_main(int argc, char *argv[])
bundle = parse_bundle_actions(argv[1]);
- /* Generate 'slaves' array. */
- sg.n_slaves = 0;
- for (i = 0; i < bundle->n_slaves; i++) {
- ofp_port_t slave_id = bundle->slaves[i];
+ /* Generate 'members' array. */
+ sg.n_members = 0;
+ for (i = 0; i < bundle->n_members; i++) {
+ ofp_port_t member_id = bundle->members[i];
- if (slave_lookup(&sg, slave_id)) {
- ovs_fatal(0, "Redundant slaves are not supported. ");
+ if (member_lookup(&sg, member_id)) {
+ ovs_fatal(0, "Redundant members are not supported. ");
}
- sg.slaves[sg.n_slaves].slave_id = slave_id;
- sg.n_slaves++;
+ sg.members[sg.n_members].member_id = member_id;
+ sg.n_members++;
}
/* Generate flows. */
@@ -141,14 +141,14 @@ test_bundle_main(int argc, char *argv[])
}
/* Cycles through each possible liveness permutation for the given
- * n_slaves. The initial state is equivalent to all slaves down, so we
+ * n_members. The initial state is equivalent to all members down, so we
* skip it by starting at i = 1. We do one extra iteration to cover
* transitioning from the final state back to the initial state. */
old_n_enabled = 0;
old_active = -1;
- n_permute = 1 << sg.n_slaves;
+ n_permute = 1 << sg.n_members;
for (i = 1; i <= n_permute + 1; i++) {
- struct slave *slave;
+ struct member *member;
size_t j, n_enabled, changed;
double disruption, perfect;
uint8_t mask;
@@ -156,27 +156,27 @@ test_bundle_main(int argc, char *argv[])
mask = i % n_permute;
- /* Gray coding ensures that in each iteration exactly one slave
+ /* Gray coding ensures that in each iteration exactly one member
* changes its liveness. This makes the expected disruption a bit
* easier to calculate, and is likely similar to how failures will be
* experienced in the wild. */
mask = mask ^ (mask >> 1);
- /* Initialize slaves. */
+ /* Initialize members. */
n_enabled = 0;
- for (j = 0; j < sg.n_slaves; j++) {
- slave = &sg.slaves[j];
- slave->flow_count = 0;
- slave->enabled = ((1 << j) & mask) != 0;
+ for (j = 0; j < sg.n_members; j++) {
+ member = &sg.members[j];
+ member->flow_count = 0;
+ member->enabled = ((1 << j) & mask) != 0;
- if (slave->enabled) {
+ if (member->enabled) {
n_enabled++;
}
}
active = -1;
- for (j = 0; j < sg.n_slaves; j++) {
- if (sg.slaves[j].enabled) {
+ for (j = 0; j < sg.n_members; j++) {
+ if (sg.members[j].enabled) {
active = j;
break;
}
@@ -185,19 +185,19 @@ test_bundle_main(int argc, char *argv[])
changed = 0;
for (j = 0; j < N_FLOWS; j++) {
struct flow *flow = &flows[j];
- ofp_port_t old_slave_id, ofp_port;
+ ofp_port_t old_member_id, ofp_port;
struct flow_wildcards wc;
- old_slave_id = u16_to_ofp(flow->regs[0]);
- ofp_port = bundle_execute(bundle, flow, &wc, slave_enabled_cb,
+ old_member_id = u16_to_ofp(flow->regs[0]);
+ ofp_port = bundle_execute(bundle, flow, &wc, member_enabled_cb,
&sg);
flow->regs[0] = ofp_to_u16(ofp_port);
if (ofp_port != OFPP_NONE) {
- slave_lookup(&sg, ofp_port)->flow_count++;
+ member_lookup(&sg, ofp_port)->flow_count++;
}
- if (old_slave_id != ofp_port) {
+ if (old_member_id != ofp_port) {
changed++;
}
}
@@ -208,23 +208,23 @@ test_bundle_main(int argc, char *argv[])
if (old_n_enabled || n_enabled) {
perfect = 1.0 / MAX(old_n_enabled, n_enabled);
} else {
- /* This will happen when 'sg.n_slaves' is 0. */
+ /* This will happen when 'sg.n_members' is 0. */
perfect = 0;
}
}
disruption = changed / (double)N_FLOWS;
printf("%s: disruption=%.2f (perfect=%.2f)",
- mask_str(mask, sg.n_slaves), disruption, perfect);
+ mask_str(mask, sg.n_members), disruption, perfect);
- for (j = 0 ; j < sg.n_slaves; j++) {
- slave = &sg.slaves[j];
+ for (j = 0 ; j < sg.n_members; j++) {
+ member = &sg.members[j];
double flow_percent;
- flow_percent = slave->flow_count / (double)N_FLOWS;
+ flow_percent = member->flow_count / (double)N_FLOWS;
printf( " %.2f", flow_percent);
- if (slave->enabled) {
+ if (member->enabled) {
double perfect_fp;
if (bundle->algorithm == NX_BD_ALG_ACTIVE_BACKUP) {
@@ -234,16 +234,16 @@ test_bundle_main(int argc, char *argv[])
}
if (fabs(flow_percent - perfect_fp) >= .01) {
- fprintf(stderr, "%s: slave %d: flow_percentage=%.5f for"
+ fprintf(stderr, "%s: member %d: flow_percentage=%.5f for"
" differs from perfect=%.5f by more than .01\n",
- mask_str(mask, sg.n_slaves), slave->slave_id,
+ mask_str(mask, sg.n_members), member->member_id,
flow_percent, perfect_fp);
ok = false;
}
- } else if (slave->flow_count) {
- fprintf(stderr, "%s: slave %d: disabled slave received"
- " flows.\n", mask_str(mask, sg.n_slaves),
- slave->slave_id);
+ } else if (member->flow_count) {
+ fprintf(stderr, "%s: member %d: disabled member received"
+ " flows.\n", mask_str(mask, sg.n_members),
+ member->member_id);
ok = false;
}
}
@@ -251,7 +251,7 @@ test_bundle_main(int argc, char *argv[])
if (fabs(disruption - perfect) >= .01) {
fprintf(stderr, "%s: disruption=%.5f differs from perfect=%.5f by"
- " more than .01\n", mask_str(mask, sg.n_slaves),
+ " more than .01\n", mask_str(mask, sg.n_members),
disruption, perfect);
ok = false;
}
diff --git a/tests/test-classifier.c b/tests/test-classifier.c
index 6d53d016de6022e32361db6fa5c294bf1ce128c4..cff00c8fa35e3a28886ecf21d3fa984730e1ab6d 100644
--- a/tests/test-classifier.c
+++ b/tests/test-classifier.c
@@ -14,7 +14,8 @@
* limitations under the License.
*/
-/* "White box" tests for classifier.
+/* Tests for classifier, written with knowledge of and to advantage of the
+ * classifier's internal structure.
*
* With very few exceptions, these tests obtain complete coverage of every
* basic block and every branch in the classifier implementation, e.g. a clean
@@ -512,8 +513,9 @@ verify_tries(struct classifier *cls)
int i;
for (i = 0; i < cls->n_tries; i++) {
- n_rules += trie_verify(&cls->tries[i].root, 0,
- cls->tries[i].field->n_bits);
+ const struct mf_field * cls_field
+ = ovsrcu_get(struct mf_field *, &cls->tries[i].field);
+ n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits);
}
assert(n_rules <= cls->n_rules);
}
diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index f77ee75e38df1d6758bd20fd7c8cf4780f5c464c..24c93e4a488a12678d281878211b298de62a5647 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -82,6 +82,7 @@ ct_thread_main(void *aux_)
{
struct thread_aux *aux = aux_;
struct dp_packet_batch *pkt_batch;
+ struct dp_packet *pkt;
ovs_be16 dl_type;
size_t i;
long long now = time_msec();
@@ -90,7 +91,10 @@ ct_thread_main(void *aux_)
ovs_barrier_block(&barrier);
for (i = 0; i < n_pkts; i += batch_size) {
conntrack_execute(ct, pkt_batch, dl_type, false, true, 0, NULL, NULL,
- 0, 0, NULL, NULL, now);
+ 0, 0, NULL, NULL, now, 0);
+ DP_PACKET_BATCH_FOR_EACH (j, pkt, pkt_batch) {
+ pkt_metadata_init_conn(&pkt->md);
+ }
}
ovs_barrier_block(&barrier);
destroy_packets(pkt_batch);
@@ -174,7 +178,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_,
if (flow.dl_type != dl_type) {
conntrack_execute(ct_, &new_batch, dl_type, false, true, 0,
- NULL, NULL, 0, 0, NULL, NULL, now);
+ NULL, NULL, 0, 0, NULL, NULL, now, 0);
dp_packet_batch_init(&new_batch);
}
dp_packet_batch_add(&new_batch, packet);
@@ -182,7 +186,7 @@ pcap_batch_execute_conntrack(struct conntrack *ct_,
if (!dp_packet_batch_is_empty(&new_batch)) {
conntrack_execute(ct_, &new_batch, dl_type, false, true, 0, NULL, NULL,
- 0, 0, NULL, NULL, now);
+ 0, 0, NULL, NULL, now, 0);
}
}
diff --git a/tests/test-l7.py b/tests/test-l7.py
index d7854a1df31ba80fcade5689832f6ab34af2a149..32a77392c6427a94f4c684fc52e3bad92f9dd1cf 100755
--- a/tests/test-l7.py
+++ b/tests/test-l7.py
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# Copyright (c) 2015, 2016 Nicira, Inc.
+#!/usr/bin/env python3
+# Copyright (c) 2015, 2016, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c
index b1a4be36bb1ec0dc0c86fd00e5422c4bd13a60cc..31513c537fd5ae73650106d154fcdb7ad1fe89d1 100644
--- a/tests/test-ovsdb.c
+++ b/tests/test-ovsdb.c
@@ -1904,6 +1904,26 @@ print_idl_row_updated_link2(const struct idltest_link2 *l2, int step)
}
}
+static void
+print_idl_row_updated_simple6(const struct idltest_simple6 *s6, int step)
+{
+ size_t i;
+ bool updated = false;
+
+ for (i = 0; i < IDLTEST_SIMPLE6_N_COLUMNS; i++) {
+ if (idltest_simple6_is_updated(s6, i)) {
+ if (!updated) {
+ printf("%03d: updated columns:", step);
+ updated = true;
+ }
+ printf(" %s", idltest_simple6_columns[i].name);
+ }
+ }
+ if (updated) {
+ printf("\n");
+ }
+}
+
static void
print_idl_row_updated_singleton(const struct idltest_singleton *sng, int step)
{
@@ -1991,6 +2011,22 @@ print_idl_row_link2(const struct idltest_link2 *l2, int step)
print_idl_row_updated_link2(l2, step);
}
+static void
+print_idl_row_simple6(const struct idltest_simple6 *s6, int step)
+{
+ int i;
+
+ printf("%03d: name=%s ", step, s6->name);
+ printf("weak_ref=[");
+ for (i = 0; i < s6->n_weak_ref; i++) {
+ printf("%s"UUID_FMT, i ? " " : "",
+ UUID_ARGS(&s6->weak_ref[i]->header_.uuid));
+ }
+
+ printf("] uuid="UUID_FMT"\n", UUID_ARGS(&s6->header_.uuid));
+ print_idl_row_updated_simple6(s6, step);
+}
+
static void
print_idl_row_singleton(const struct idltest_singleton *sng, int step)
{
@@ -2030,37 +2066,64 @@ print_idl(struct ovsdb_idl *idl, int step)
}
static void
-print_idl_track(struct ovsdb_idl *idl, int step, unsigned int seqno)
+print_idl_track(struct ovsdb_idl *idl, int step)
{
+ const struct idltest_simple6 *s6;
const struct idltest_simple *s;
const struct idltest_link1 *l1;
const struct idltest_link2 *l2;
int n = 0;
IDLTEST_SIMPLE_FOR_EACH_TRACKED (s, idl) {
- if (idltest_simple_row_get_seqno(s, OVSDB_IDL_CHANGE_DELETE) >= seqno) {
- printf("%03d: ##deleted## uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid));
- } else {
- print_idl_row_simple(s, step);
+ print_idl_row_simple(s, step);
+ if (idltest_simple_is_deleted(s)) {
+ printf("%03d: deleted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&s->header_.uuid));
+ } else if (idltest_simple_is_new(s)) {
+ printf("%03d: inserted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&s->header_.uuid));
}
n++;
}
IDLTEST_LINK1_FOR_EACH_TRACKED (l1, idl) {
- if (idltest_simple_row_get_seqno(s, OVSDB_IDL_CHANGE_DELETE) >= seqno) {
- printf("%03d: ##deleted## uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid));
+ if (idltest_link1_is_deleted(l1)) {
+ printf("%03d: deleted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&l1->header_.uuid));
} else {
print_idl_row_link1(l1, step);
+ if (idltest_link1_is_new(l1)) {
+ printf("%03d: inserted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&l1->header_.uuid));
+ }
}
n++;
}
IDLTEST_LINK2_FOR_EACH_TRACKED (l2, idl) {
- if (idltest_simple_row_get_seqno(s, OVSDB_IDL_CHANGE_DELETE) >= seqno) {
- printf("%03d: ##deleted## uuid="UUID_FMT"\n", step, UUID_ARGS(&s->header_.uuid));
+ if (idltest_link2_is_deleted(l2)) {
+ printf("%03d: deleted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&l2->header_.uuid));
} else {
print_idl_row_link2(l2, step);
+ if (idltest_link2_is_new(l2)) {
+ printf("%03d: inserted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&l2->header_.uuid));
+ }
+
+ }
+ n++;
+ }
+ IDLTEST_SIMPLE6_FOR_EACH_TRACKED (s6, idl) {
+ print_idl_row_simple6(s6, step);
+ if (idltest_simple6_is_deleted(s6)) {
+ printf("%03d: deleted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&s6->header_.uuid));
+ } else if (idltest_simple6_is_new(s6)) {
+ printf("%03d: inserted row: uuid="UUID_FMT"\n", step,
+ UUID_ARGS(&s6->header_.uuid));
}
n++;
}
+
if (!n) {
printf("%03d: empty\n", step);
}
@@ -2282,6 +2345,8 @@ find_table_class(const char *name)
return &idltest_table_link1;
} else if (!strcmp(name, "link2")) {
return &idltest_table_link2;
+ } else if (!strcmp(name, "simple6")) {
+ return &idltest_table_simple6;
}
return NULL;
}
@@ -2391,6 +2456,10 @@ update_conditions(struct ovsdb_idl *idl, char *commands)
if (seqno == next_seqno ) {
ovs_fatal(0, "condition unchanged");
}
+ unsigned int new_next_seqno = ovsdb_idl_set_condition(idl, tc, &cond);
+ if (next_seqno != new_next_seqno) {
+ ovs_fatal(0, "condition expected seqno changed");
+ }
ovsdb_idl_condition_destroy(&cond);
json_destroy(json);
}
@@ -2465,7 +2534,7 @@ do_idl(struct ovs_cmdl_context *ctx)
/* Print update. */
if (track) {
- print_idl_track(idl, step++, ovsdb_idl_get_seqno(idl));
+ print_idl_track(idl, step++);
ovsdb_idl_track_clear(idl);
} else {
print_idl(idl, step++);
diff --git a/tests/test-ovsdb.py b/tests/test-ovsdb.py
index 1b94b79a07aa41ab3975b78492258318c5080aac..a19680274302e9d29449eabd6fb5fa41ad436333 100644
--- a/tests/test-ovsdb.py
+++ b/tests/test-ovsdb.py
@@ -28,6 +28,7 @@ import ovs.util
import ovs.vlog
from ovs.db import data
from ovs.db import error
+from ovs.db.idl import _row_to_uuid as row_to_uuid
from ovs.fatal_signal import signal_alarm
vlog = ovs.vlog.Vlog("test-ovsdb")
@@ -159,7 +160,8 @@ def get_simple_printable_row_string(row, columns):
is ovs.db.data.Atom):
value = getattr(row, column)
if isinstance(value, dict):
- value = sorted(value.items())
+ value = sorted((row_to_uuid(k), row_to_uuid(v))
+ for k, v in value.items())
s += "%s=%s " % (column, value)
s = s.strip()
s = re.sub('""|,|u?\'', "", s)
@@ -212,6 +214,14 @@ def print_idl(idl, step):
print(s)
n += 1
+ if "simple5" in idl.tables:
+ simple5 = idl.tables["simple5"].rows
+ for row in simple5.values():
+ s = "%03d: " % step
+ s += get_simple_printable_row_string(row, ["name", "irefmap"])
+ print(s)
+ n += 1
+
if "link1" in idl.tables:
l1 = idl.tables["link1"].rows
for row in l1.values():
@@ -303,6 +313,11 @@ def idltest_find_simple3(idl, i):
return next(idl.index_equal("simple3", "simple3_by_name", i), None)
+def idltest_find(idl, table, col, match):
+ return next((r for r in idl.tables[table].rows.values() if
+ getattr(r, col) == match), None)
+
+
def idl_set(idl, commands, step):
txn = ovs.db.idl.Transaction(idl)
increment = False
@@ -524,6 +539,12 @@ def idl_set(idl, commands, step):
setattr(new_row3, 'name', 'String3')
new_row3.addvalue('uset', new_row41.uuid)
assert len(getattr(new_row3, 'uset', [])) == 1
+ elif name == 'partialmapmutateirefmap':
+ row3 = idltest_find_simple3(idl, "myString1")
+ row5 = idltest_find(idl, "simple5", "name", "myString2")
+ row5.setkey('irefmap', 1, row3.uuid)
+ maplen = len(row5.irefmap)
+ assert maplen == 1, "expected 1, got %d" % maplen
else:
sys.stderr.write("unknown command %s\n" % name)
sys.exit(1)
diff --git a/tests/test-reconnect.c b/tests/test-reconnect.c
index 5a14e7fe58dae58254fa5e5fcbb1c3d8ebaae974..bf0463e25c0bbf8fc78753cac0174fee366d3814 100644
--- a/tests/test-reconnect.c
+++ b/tests/test-reconnect.c
@@ -48,6 +48,7 @@ test_reconnect_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
now = 1000;
reconnect = reconnect_create(now);
+ reconnect_receive_attempted(reconnect, LLONG_MAX);
reconnect_set_name(reconnect, "remote");
reconnect_get_stats(reconnect, now, &prev);
printf("### t=%d ###\n", now);
diff --git a/tests/test-sha1.c b/tests/test-sha1.c
index b7279db6aaa4594b0a4a3c4be46dd68440426958..cc80888a7dd5358ca2a0ffaa19fd82f704e83f75 100644
--- a/tests/test-sha1.c
+++ b/tests/test-sha1.c
@@ -137,6 +137,42 @@ test_big_vector(void)
free(vec.data);
}
+static void
+test_huge_vector(void)
+{
+ enum { SIZE = 1000000000 };
+ struct test_vector vec = {
+ NULL, SIZE,
+ /* Computed by the sha1sum utility for a file with 10^9 symbols 'a'. */
+ { 0xD0, 0xF3, 0xE4, 0xF2, 0xF3, 0x1C, 0x66, 0x5A, 0xBB, 0xD8,
+ 0xF5, 0x18, 0xE8, 0x48, 0xD5, 0xCB, 0x80, 0xCA, 0x78, 0xF7 }
+ };
+ int chunk = random_range(SIZE / 10000);
+ uint8_t md[SHA1_DIGEST_SIZE];
+ struct sha1_ctx sha1;
+ size_t i, sz;
+
+ /* It's not user-friendly to allocate 1GB of memory for a unit test,
+ * so we're allocating only a small chunk and re-using it. */
+ vec.data = xmalloc(chunk);
+ for (i = 0; i < chunk; i++) {
+ vec.data[i] = 'a';
+ }
+
+ sha1_init(&sha1);
+ for (sz = 0; sz < SIZE; sz += chunk) {
+ int n = sz + chunk < SIZE ? chunk : SIZE - sz;
+
+ sha1_update(&sha1, vec.data, n);
+ }
+ sha1_final(&sha1, md);
+ ovs_assert(!memcmp(md, vec.output, SHA1_DIGEST_SIZE));
+
+ free(vec.data);
+ putchar('.');
+ fflush(stdout);
+}
+
static void
test_shar1_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
{
@@ -147,6 +183,7 @@ test_shar1_main(int argc OVS_UNUSED, char *argv[] OVS_UNUSED)
}
test_big_vector();
+ test_huge_vector();
putchar('\n');
}
diff --git a/tests/test-stream.c b/tests/test-stream.c
index 4af44200e801374f8872f0ac93d11925cc10f861..68ce2c5442fa08aa5fad539ab64f9e3832b943c7 100644
--- a/tests/test-stream.c
+++ b/tests/test-stream.c
@@ -42,5 +42,6 @@ main(int argc, char *argv[])
VLOG_ERR("stream_open_block(%s) failure: %s",
argv[1], ovs_strerror(error));
}
+ stream_close(stream);
return (error || !stream) ? 1 : 0;
}
diff --git a/tests/tunnel-push-pop.at b/tests/tunnel-push-pop.at
index b92c23fde8f755b986190c7b34b1ba2d667a83e9..48c5de9d1907c4c404f0be0e5c96acce7a38a475 100644
--- a/tests/tunnel-push-pop.at
+++ b/tests/tunnel-push-pop.at
@@ -216,6 +216,8 @@ AT_CHECK([ovs-vsctl add-port int-br t2 -- set Interface t2 type=vxlan \
options:remote_ip=1.1.2.92 options:key=456 options:packet_type=legacy_l3 ofport_request=7\
-- add-port int-br t7 -- set Interface t7 type=vxlan \
options:remote_ip=1.1.2.92 options:key=345 options:exts=gpe ofport_request=8\
+ -- add-port int-br t8 -- set Interface t8 type=gtpu \
+ options:remote_ip=1.1.2.92 options:key=123 ofport_request=9\
], [0])
AT_CHECK([ovs-appctl dpif/show], [0], [dnl
@@ -232,6 +234,7 @@ dummy@ovs-dummy: hit:0 missed:0
t5 6/6081: (geneve: egress_pkt_mark=1234, out_key=flow, remote_ip=1.1.2.93)
t6 7/3: (gre: key=456, packet_type=legacy_l3, remote_ip=1.1.2.92)
t7 8/4789: (vxlan: key=345, remote_ip=1.1.2.92)
+ t8 9/2152: (gtpu: key=123, remote_ip=1.1.2.92)
])
dnl First setup dummy interface IP address, then add the route
@@ -342,6 +345,7 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
Listening ports:
genev_sys_6081 (6081) ref_cnt=2
gre_sys (3) ref_cnt=2
+gtpu_sys_2152 (2152) ref_cnt=1
vxlan_sys_4789 (4789) ref_cnt=3
])
@@ -369,6 +373,13 @@ AT_CHECK([tail -1 stdout], [0],
[Datapath actions: tnl_pop(6081)
])
+dnl Check GTP-U tunnel pop
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.2.92,dst=1.1.2.88,proto=17,tos=0,ttl=64,frag=no),udp(src=51283,dst=2152)'],
+[0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: tnl_pop(2152)
+])
+
dnl Check VXLAN tunnel push
AT_CHECK([ovs-ofctl add-flow int-br action=2])
AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'], [0], [stdout])
@@ -426,6 +437,15 @@ AT_CHECK([tail -1 stdout], [0],
[Datapath actions: clone(tnl_push(tnl_port(6081),header(size=58,type=5,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=6081,csum=0x0),geneve(crit,vni=0x7b,options({class=0xffff,type=0x80,len=4,0xa}))),out_port(100)),1)
])
+dnl Check GTP-U tunnel push
+AT_CHECK([ovs-ofctl add-flow int-br "actions=9"])
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(2),eth(src=f8:bc:12:44:34:b6,dst=aa:55:aa:55:00:00),eth_type(0x0800),ipv4(src=1.1.3.88,dst=1.1.3.112,proto=47,tos=0,ttl=64,frag=no)'],
+[0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: pop_eth,clone(tnl_push(tnl_port(2152),header(size=50,type=110,eth(dst=f8:bc:12:44:34:b6,src=aa:55:aa:55:00:00,dl_type=0x0800),ipv4(src=1.1.2.88,dst=1.1.2.92,proto=17,tos=0,ttl=64,frag=0x4000),udp(src=0,dst=2152,csum=0x0),gtpu(flags=0x30,msgtype=255,teid=0x7b)),out_port(100)),1)
+])
+AT_CHECK([ovs-ofctl del-flows int-br])
+
dnl Check decapsulation of GRE packet
AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820006558000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
AT_CHECK([ovs-appctl netdev-dummy/receive p0 'aa55aa550000001b213cab6408004500007e79464000402fba550101025c0101025820006558000001c8fe71d883724fbeb6f4e1494a080045000054ba200000400184861e0000011e00000200004227e75400030af3195500000000f265010000000000101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f3031323334353637'])
@@ -515,6 +535,7 @@ AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
Listening ports:
genev_sys_6081 (6081) ref_cnt=1
gre_sys (3) ref_cnt=1
+gtpu_sys_2152 (2152) ref_cnt=1
vxlan_sys_4789 (4789) ref_cnt=2
vxlan_sys_4790 (4790) ref_cnt=1
])
@@ -524,6 +545,7 @@ AT_CHECK([ovs-vsctl del-port int-br t1 \
-- del-port int-br t4 \
-- del-port int-br t6 \
-- del-port int-br t7 \
+ -- del-port int-br t8 \
], [0])
dnl Check tunnel lookup entries after deleting all remaining tunnel ports
diff --git a/tests/tunnel.at b/tests/tunnel.at
index ce000a25e6b6a1d767df45656204fb52320cac37..b8ae7caa9b5eee1a7c90596403574fa7012060af 100644
--- a/tests/tunnel.at
+++ b/tests/tunnel.at
@@ -110,6 +110,57 @@ Datapath actions: drop
OVS_VSWITCHD_STOP(["/dropping tunnel packet marked ECN CE but is not ECN capable/d"])
AT_CLEANUP
+AT_SETUP([tunnel - input with matching tunnel mask])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
+ options:remote_ip=1.1.1.1 \
+ ofport_request=1 \
+ -- add-port br0 p2 -- set Interface p2 type=dummy \
+ ofport_request=2])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+ br0 65534/100: (dummy-internal)
+ p1 1/1: (gre: remote_ip=1.1.1.1)
+ p2 2/2: (dummy)
+])
+
+AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(dst=1.1.1.1,src=3.3.3.200/255.255.255.0,tp_dst=123,tp_src=1,ttl=64),recirc_id(0),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"])
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | tail -1], [0], [dnl
+tunnel(src=3.3.3.200/255.255.255.0,dst=1.1.1.1,ttl=64,tp_src=1,tp_dst=123),recirc_id(0),in_port(1),eth_type(0x0800), packets:0, bytes:0, used:never, actions:2
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([tunnel - too long nested attributes])
+OVS_VSWITCHD_START([add-port br0 p1 \
+ -- set Interface p1 type=gre options:remote_ip=1.1.1.1 ofport_request=1 \
+ -- add-port br0 p2 -- set Interface p2 type=dummy ofport_request=2])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+ br0 65534/100: (dummy-internal)
+ p1 1/1: (gre: remote_ip=1.1.1.1)
+ p2 2/2: (dummy)
+])
+
+dst_single="dst=1.1.1.1"
+dst_rep=${dst_single}
+dnl Size of one OVS_TUNNEL_KEY_ATTR_IPV4_DST is 4 bytes + NLA_HDRLEN (4 bytes).
+dnl One nested message has room for UINT16_MAX - NLA_HDRLEN (4) bytes, i.e.
+dnl (UINT16_MAX - NLA_HDRLEN) / (4 + NLA_HDRLEN) = 8191.375 of dst addresses.
+for i in `seq 1 8192` ; do
+ dst_rep="${dst_rep},${dst_single}"
+done
+
+AT_CHECK([ovs-appctl dpctl/add-flow "tunnel(${dst_rep})" "2" 2>&1 | dnl
+ sed "s/${dst_single},//g"], [], [dnl
+ovs-vswitchd: parsing flow key (syntax error at tunnel(dst=1.1.1.1)) (Argument list too long)
+ovs-appctl: ovs-vswitchd: server returned an error
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([tunnel - output])
OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gre \
options:remote_ip=1.1.1.1 options:local_ip=2.2.2.2 \
@@ -890,6 +941,43 @@ Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff
OVS_VSWITCHD_STOP
AT_CLEANUP
+AT_SETUP([tunnel - Delete Geneve option])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=geneve \
+ options:remote_ip=1.1.1.1 ofport_request=1 \
+ -- add-port br0 p2 -- set Interface p2 type=dummy \
+ ofport_request=2 ofport_request=2])
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
+
+AT_CHECK([ovs-ofctl add-tlv-map br0 "{class=0xffff,type=0,len=4}->tun_metadata0,{class=0xffff,type=1,len=4}->tun_metadata1,{class=0xffff,type=2,len=4}->tun_metadata3"])
+
+AT_DATA([flows.txt], [dnl
+table=0,tun_metadata0=0x11112222,actions=set_field:0x55556666->tun_metadata1,resubmit(,1)
+table=0,tun_metadata0=0x33334444,actions=delete_field:tun_metadata0,set_field:0x77778888->tun_metadata1,resubmit(,1)
+table=0,tun_metadata0=0x88889999,actions=delete_field:tun_metadata3,resubmit(,1)
+table=1,actions=IN_PORT
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x11112222}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no)'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+ [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x11112222,tun_metadata1=NP,in_port=1,nw_ecn=0,nw_frag=no
+Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0,len=4,0x11112222}{class=0xffff,type=0x1,len=4,0x55556666}),flags(df))),6081
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x33334444}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no)'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+ [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x33334444,tun_metadata1=NP,in_port=1,nw_ecn=0,nw_frag=no
+Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0x1,len=4,0x77778888}),flags(df))),6081
+])
+
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x0,src=1.1.1.1,dst=1.1.1.2,ttl=64,geneve({class=0xffff,type=0,len=4,0x88889999}),flags(df|key)),in_port(6081),skb_mark(0),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(frag=no)'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+ [Megaflow: recirc_id=0,eth,ip,tun_id=0,tun_src=1.1.1.1,tun_dst=1.1.1.2,tun_tos=0,tun_flags=+df-csum+key,tun_metadata0=0x88889999,in_port=1,nw_ecn=0,nw_frag=no
+Datapath actions: set(tunnel(dst=1.1.1.1,ttl=64,tp_dst=6081,geneve({class=0xffff,type=0,len=4,0x88889999}),flags(df))),6081
+])
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
AT_SETUP([tunnel - concomitant IPv6 and IPv4 tunnels])
OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=vxlan \
options:remote_ip=1.1.1.1 ofport_request=1 \
@@ -1041,3 +1129,79 @@ AT_CHECK([ovs-appctl tnl/neigh/show | tail -n+3 | sort], [0], [dnl
OVS_VSWITCHD_STOP
AT_CLEANUP
+
+AT_SETUP([tunnel - GTP-U basic])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=gtpu \
+ options:remote_ip=1.1.1.1 \
+ options:key=123 ofport_request=1])
+
+AT_CHECK([ovs-appctl dpif/show | tail -n +3], [0], [dnl
+ br0 65534/100: (dummy-internal)
+ p1 1/2152: (gtpu: key=123, remote_ip=1.1.1.1)
+])
+
+AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
+Listening ports:
+gtpu_sys_2152 (2152) ref_cnt=1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
+
+AT_SETUP([tunnel - GTP-U push and pop])
+OVS_VSWITCHD_START([add-port br0 p1 -- set Interface p1 type=dummy \
+ ofport_request=1 \
+ -- add-port br0 p2 -- set Interface p2 type=dummy \
+ ofport_request=2])
+
+# Add these ports separately to ensure that they get the datapath port
+# number expected below.
+ovs-vsctl -- add-port br0 p3 \
+ -- set Interface p3 type=gtpu \
+ ofport_request=3 \
+ options:remote_ip=1.1.1.1 \
+ options:key=3 \
+ options:packet_type=legacy_l3
+ovs-vsctl -- add-port br0 p4 \
+ -- set Interface p4 type=gtpu \
+ ofport_request=4 \
+ options:remote_ip=1.1.1.2 \
+ options:key=4 \
+ options:packet_type=legacy_l3
+OVS_VSWITCHD_DISABLE_TUNNEL_PUSH_POP
+
+dnl AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
+AT_CHECK([ovs-appctl dpif/show | tail -n +4], [0], [dnl
+ p1 1/1: (dummy)
+ p2 2/2: (dummy)
+ p3 3/2152: (gtpu: key=3, remote_ip=1.1.1.1)
+ p4 4/2152: (gtpu: key=4, remote_ip=1.1.1.2)
+])
+
+AT_DATA([flows.txt], [dnl
+in_port=1,actions=3
+in_port=2,actions=4
+in_port=3,tun_gtpu_flags=0x30,tun_gtpu_msgtype=255,actions=1
+])
+AT_CHECK([ovs-ofctl add-flows br0 flows.txt])
+
+AT_CHECK([ovs-appctl tnl/ports/show |sort], [0], [dnl
+Listening ports:
+gtpu_sys_2152 (2152) ref_cnt=2
+])
+
+dnl Encap: in_port=1,actions=3
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'in_port(1),eth(src=50:54:00:00:00:05,dst=50:54:00:00:00:07),eth_type(0x0800),ipv4(src=192.168.0.1,dst=192.168.0.2,proto=1,tos=0,ttl=128,frag=no),icmp(type=8,code=0)'], [0], [stdout])
+AT_CHECK([tail -1 stdout], [0],
+ [Datapath actions: set(tunnel(tun_id=0x3,dst=1.1.1.1,ttl=64,tp_dst=2152,flags(df|key))),pop_eth,2152
+])
+
+dnl receive packet from GTP-U port, match it, and output to layer3 GRE
+AT_CHECK([ovs-appctl ofproto/trace ovs-dummy 'recirc_id(0),tunnel(tun_id=0x3,src=1.1.1.1,dst=2.2.2.2,ttl=64,gtpu(flags=0x30,msgtype=255),flags(df|key)),in_port(2152),packet_type(ns=1,id=0),skb_mark(0),ipv4(frag=no)'], [0], [stdout])
+AT_CHECK([tail -2 stdout], [0],
+ [Megaflow: recirc_id=0,packet_type=(1,0),tun_id=0x3,tun_src=1.1.1.1,tun_dst=2.2.2.2,tun_tos=0,gtpu_flags=0x30,gtpu_msgtype=255,tun_flags=+df-csum+key,in_port=3,dl_type=0x0000
+Datapath actions: push_eth(src=00:00:00:00:00:00,dst=00:00:00:00:00:00),1
+])
+
+OVS_VSWITCHD_STOP
+AT_CLEANUP
diff --git a/tests/uuidfilt.py b/tests/uuidfilt.py
index bc49aa480e9ee4df735bf91d1a52fc70bb98f2a7..39679dd4445e4b8380bdce84d83e1f5592ff4b3b 100755
--- a/tests/uuidfilt.py
+++ b/tests/uuidfilt.py
@@ -1,4 +1,20 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
+# Copyright (c) 2020 VMware, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Breaks lines read from stdin into groups using blank lines as
+# group separators, then sorts lines within the groups for
+# reproducibility.
import re
import sys
diff --git a/utilities/bugtool/ovs-bugtool.in b/utilities/bugtool/ovs-bugtool.in
index e55bfc2ed58e077e22fc511b38e65e6fee51a57b..fa62cbe949ac4e44606832fbb58fef69846722a7 100755
--- a/utilities/bugtool/ovs-bugtool.in
+++ b/utilities/bugtool/ovs-bugtool.in
@@ -33,8 +33,7 @@
# or func_output().
#
-import StringIO
-import commands
+from io import BytesIO
import fcntl
import getopt
import hashlib
@@ -48,7 +47,7 @@ import warnings
import zipfile
from select import select
from signal import SIGTERM
-from subprocess import PIPE, Popen
+from subprocess import PIPE, Popen, check_output
from xml.dom.minidom import getDOMImplementation, parse
@@ -348,7 +347,7 @@ def collect_data():
cap = v['cap']
if 'cmd_args' in v:
if 'output' not in v.keys():
- v['output'] = StringIOmtime()
+ v['output'] = BytesIOmtime()
if v['repeat_count'] > 0:
if cap not in process_lists:
process_lists[cap] = []
@@ -373,20 +372,23 @@ def collect_data():
if 'filename' in v and v['filename'].startswith('/proc/'):
# proc files must be read into memory
try:
- f = open(v['filename'], 'r')
+ f = open(v['filename'], 'rb')
s = f.read()
f.close()
if check_space(cap, v['filename'], len(s)):
- v['output'] = StringIOmtime(s)
+ v['output'] = BytesIOmtime(s)
except:
pass
elif 'func' in v:
try:
s = v['func'](cap)
except Exception as e:
- s = str(e)
+ s = str(e).encode()
if check_space(cap, k, len(s)):
- v['output'] = StringIOmtime(s)
+ if isinstance(s, str):
+ v['output'] = BytesIOmtime(s.encode())
+ else:
+ v['output'] = BytesIOmtime(s)
def main(argv=None):
@@ -626,6 +628,7 @@ exclude those logs from the archive.
cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-k', p])
cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-i', p])
cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-c', p])
+ cmd_output(CAP_NETWORK_INFO, [ETHTOOL, '-l', p])
if int(t) == 1:
cmd_output(CAP_NETWORK_INFO,
[TC, '-s', '-d', 'class', 'show', 'dev', p])
@@ -640,7 +643,8 @@ exclude those logs from the archive.
if os.path.exists(OPENVSWITCH_VSWITCHD_PID):
cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'show', '-s'])
for d in dp_list():
- cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', '-m', d])
+ cmd_output(CAP_NETWORK_STATUS, [OVS_DPCTL, 'dump-flows', '-m',
+ d.decode()])
cmd_output(CAP_PROCESS_LIST, [PS, 'wwwaxf', '-eo',
'pid,tty,stat,time,nice,psr,pcpu,pmem,nwchan,wchan:25,args'],
@@ -682,8 +686,8 @@ exclude those logs from the archive.
ovs_info_caps = [CAP_NETWORK_STATUS, CAP_SYSTEM_LOGS,
CAP_OPENVSWITCH_LOGS, CAP_NETWORK_CONFIG]
ovs_info_list = ['process-tree']
- # We cannot use iteritems, since we modify 'data' as we pass through
- for (k, v) in data.items():
+ # We cannot use items(), since we modify 'data' as we pass through
+ for (k, v) in list(data.items()):
cap = v['cap']
if 'filename' in v:
info = k[0]
@@ -703,8 +707,8 @@ exclude those logs from the archive.
pass
# permit the user to filter out data
- # We cannot use iteritems, since we modify 'data' as we pass through
- for (k, v) in sorted(data.items()):
+ # We cannot use items(), since we modify 'data' as we pass through
+ for (k, v) in list(data.items()):
cap = v['cap']
if 'filename' in v:
key = k[0]
@@ -721,7 +725,7 @@ exclude those logs from the archive.
# include inventory
data['inventory.xml'] = {'cap': None,
- 'output': StringIOmtime(make_inventory(data, subdir))}
+ 'output': BytesIOmtime(make_inventory(data, subdir))}
# create archive
if output_fd == -1:
@@ -782,7 +786,7 @@ def dump_scsi_hosts(cap):
def module_info(cap):
- output = StringIO.StringIO()
+ output = BytesIO()
modules = open(PROC_MODULES, 'r')
procs = []
@@ -806,7 +810,7 @@ def multipathd_topology(cap):
def dp_list():
- output = StringIO.StringIO()
+ output = BytesIO()
procs = [ProcOutput([OVS_DPCTL, 'dump-dps'],
caps[CAP_NETWORK_STATUS][MAX_TIME], output)]
@@ -828,7 +832,7 @@ def collect_ovsdb():
if os.path.isfile(OPENVSWITCH_COMPACT_DB):
os.unlink(OPENVSWITCH_COMPACT_DB)
- output = StringIO.StringIO()
+ output = BytesIO()
max_time = 5
procs = [ProcOutput(['ovsdb-tool', 'compact',
OPENVSWITCH_CONF_DB, OPENVSWITCH_COMPACT_DB],
@@ -871,7 +875,7 @@ def fd_usage(cap):
def dump_rdac_groups(cap):
- output = StringIO.StringIO()
+ output = BytesIO()
procs = [ProcOutput([MPPUTIL, '-a'], caps[cap][MAX_TIME], output)]
run_procs([procs])
@@ -896,7 +900,7 @@ def load_plugins(just_capabilities=False, filter=None):
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
rc += node.data
- return rc.encode()
+ return rc
def getBoolAttr(el, attr, default=False):
ret = default
@@ -1037,7 +1041,7 @@ def make_tar(subdir, suffix, output_fd, output_file):
s = os.stat(v['filename'])
ti.mtime = s.st_mtime
ti.size = s.st_size
- tf.addfile(ti, open(v['filename']))
+ tf.addfile(ti, open(v['filename'], 'rb'))
except:
pass
finally:
@@ -1095,12 +1099,12 @@ def make_inventory(inventory, subdir):
s.setAttribute('date', time.strftime('%c'))
s.setAttribute('hostname', platform.node())
s.setAttribute('uname', ' '.join(platform.uname()))
- s.setAttribute('uptime', commands.getoutput(UPTIME))
+ s.setAttribute('uptime', check_output(UPTIME).decode())
document.getElementsByTagName(INVENTORY_XML_ROOT)[0].appendChild(s)
map(lambda k_v: inventory_entry(document, subdir, k_v[0], k_v[1]),
inventory.items())
- return document.toprettyxml()
+ return document.toprettyxml().encode()
def inventory_entry(document, subdir, k, v):
@@ -1301,7 +1305,7 @@ class ProcOutput(object):
line = self.proc.stdout.readline()
else:
line = self.proc.stdout.read(self.bufsize)
- if line == '':
+ if line == b'':
# process exited
self.proc.stdout.close()
self.status = self.proc.wait()
@@ -1348,7 +1352,7 @@ def run_procs(procs):
if p.running and now > p.timeout:
output_ts("'%s' timed out" % p.cmdAsStr())
if p.inst:
- p.inst.write("\n** timeout **\n")
+ p.inst.write("\n** timeout **\n".encode())
p.timed_out = True
p.terminate()
@@ -1391,13 +1395,13 @@ def get_free_disk_space(path):
return s.f_frsize * s.f_bfree
-class StringIOmtime(StringIO.StringIO):
- def __init__(self, buf=''):
- StringIO.StringIO.__init__(self, buf)
+class BytesIOmtime(BytesIO):
+ def __init__(self, buf=b''):
+ BytesIO.__init__(self, buf)
self.mtime = time.time()
def write(self, s):
- StringIO.StringIO.write(self, s)
+ BytesIO.write(self, s)
self.mtime = time.time()
diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml b/utilities/bugtool/plugins/network-status/openvswitch.xml
index 72aa449302b8647a83e3711093b678965dd519e4..56e091feb45f43e6b3f6e01200b708362006c49d 100644
--- a/utilities/bugtool/plugins/network-status/openvswitch.xml
+++ b/utilities/bugtool/plugins/network-status/openvswitch.xml
@@ -32,6 +32,7 @@
/usr/share/openvswitch/scripts/ovs-bugtool-ovs-appctl-dpif
ovs-appctl -t ovsdb-server ovsdb-server/list-dbs
ovs-appctl dpctl/dump-flows netdev@ovs-netdev
+ ovs-appctl dpctl/dump-flows -m netdev@ovs-netdev
ovs-appctl dpctl/dump-flows system@ovs-system
ovs-appctl dpctl/show -s
/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "show"
@@ -39,6 +40,7 @@
/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-ports"
/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-groups"
/usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-group-stats"
+ /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-tlv-map"
/usr/share/openvswitch/scripts/ovs-bugtool-get-dpdk-nic-numa
ip -s -s link show
/usr/share/openvswitch/scripts/ovs-bugtool-get-port-stats
diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index fc9e20bf1b5f113bb63a6a51885c20de1a5e1432..bc6bfae15ae45a1eb480fe4fa202a090ae6654e5 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -167,6 +167,7 @@ __regex_is_for_if_single_line_bracket = \
__regex_ends_with_bracket = \
re.compile(r'[^\s]\) {(\s+/\*[\s\Sa-zA-Z0-9\.,\?\*/+-]*)?$')
__regex_ptr_declaration_missing_whitespace = re.compile(r'[a-zA-Z0-9]\*[^*]')
+__regex_cast_missing_whitespace = re.compile(r'\)[a-zA-Z0-9]')
__regex_is_comment_line = re.compile(r'^\s*(/\*|\*\s)')
__regex_has_comment = re.compile(r'.*(/\*|\*\s)')
__regex_has_c99_comment = re.compile(r'.*//.*$')
@@ -182,6 +183,7 @@ __regex_if_macros = re.compile(r'^ +(%s) \([\S]([\s\S]+[\S])*\) { +\\' %
skip_leading_whitespace_check = False
skip_trailing_whitespace_check = False
+skip_gerrit_change_id_check = False
skip_block_whitespace_check = False
skip_signoff_check = False
@@ -189,13 +191,13 @@ skip_signoff_check = False
# name, as they may have legitimate reasons to have longer lines.
#
# Python isn't checked as flake8 performs these checks during build.
-line_length_blacklist = re.compile(
+line_length_ignore_list = re.compile(
r'\.(am|at|etc|in|m4|mk|patch|py)$|debian/rules')
# Don't enforce a requirement that leading whitespace be all spaces on
# files that include these characters in their name, since these kinds
# of files need lines with leading tabs.
-leading_whitespace_blacklist = re.compile(r'\.(mk|am|at)$|debian/rules')
+leading_whitespace_ignore_list = re.compile(r'\.(mk|am|at)$|debian/rules')
def is_subtracted_line(line):
@@ -285,6 +287,12 @@ def pointer_whitespace_check(line):
return __regex_ptr_declaration_missing_whitespace.search(line) is not None
+def cast_whitespace_check(line):
+ """Return TRUE if there is no space between the '()' used in a cast and
+ the expression whose type is cast, i.e.: '(void *)foo'"""
+ return __regex_cast_missing_whitespace.search(line) is not None
+
+
def line_length_check(line):
"""Return TRUE if the line length is too long"""
if len(line) > 79:
@@ -522,11 +530,11 @@ file_checks = [
checks = [
{'regex': None,
- 'match_name': lambda x: not line_length_blacklist.search(x),
+ 'match_name': lambda x: not line_length_ignore_list.search(x),
'check': lambda x: line_length_check(x)},
{'regex': None,
- 'match_name': lambda x: not leading_whitespace_blacklist.search(x),
+ 'match_name': lambda x: not leading_whitespace_ignore_list.search(x),
'check': lambda x: not leading_whitespace_is_spaces(x),
'print': lambda: print_warning("Line has non-spaces leading whitespace")},
@@ -550,6 +558,12 @@ checks = [
'print':
lambda: print_error("Inappropriate spacing in pointer declaration")},
+ {'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None,
+ 'prereq': lambda x: not is_comment_line(x),
+ 'check': lambda x: cast_whitespace_check(x),
+ 'print':
+ lambda: print_error("Inappropriate spacing around cast")},
+
{'regex': r'(\.c|\.h)(\.in)?$', 'match_name': None,
'prereq': lambda x: not is_comment_line(x),
'check': lambda x: trailing_operator(x),
@@ -814,7 +828,8 @@ def ovs_checkpatch_parse(text, filename, author=None, committer=None):
elif is_co_author.match(line):
m = is_co_author.match(line)
co_authors.append(m.group(2))
- elif is_gerrit_change_id.match(line):
+ elif (is_gerrit_change_id.match(line) and
+ not skip_gerrit_change_id_check):
print_error(
"Remove Gerrit Change-Id's before submitting upstream.")
print("%d: %s\n" % (lineno, line))
@@ -885,7 +900,8 @@ Check options:
-s|--skip-signoff-lines Tolerate missing Signed-off-by line
-S|--spellcheck Check C comments and commit-message for possible
spelling mistakes
--t|--skip-trailing-whitespace Skips the trailing whitespace test"""
+-t|--skip-trailing-whitespace Skips the trailing whitespace test
+ --skip-gerrit-change-id Skips the gerrit change id test"""
% sys.argv[0])
@@ -942,6 +958,7 @@ if __name__ == '__main__':
"skip-leading-whitespace",
"skip-signoff-lines",
"skip-trailing-whitespace",
+ "skip-gerrit-change-id",
"spellcheck",
"quiet"])
except:
@@ -960,6 +977,8 @@ if __name__ == '__main__':
skip_signoff_check = True
elif o in ("-t", "--skip-trailing-whitespace"):
skip_trailing_whitespace_check = True
+ elif o in ("--skip-gerrit-change-id"):
+ skip_gerrit_change_id_check = True
elif o in ("-f", "--check-file"):
checking_file = True
elif o in ("-S", "--spellcheck"):
diff --git a/utilities/gdb/ovs_gdb.py b/utilities/gdb/ovs_gdb.py
index befc2b4a4b45668679d7b427b619a75f991a9626..1111f3100d9d2d06ab8a0e1ecf4674b711611ad7 100644
--- a/utilities/gdb/ovs_gdb.py
+++ b/utilities/gdb/ovs_gdb.py
@@ -55,6 +55,7 @@
# ...
# ...
#
+from __future__ import print_function
import gdb
import sys
import uuid
@@ -413,6 +414,39 @@ class ForEachLIST():
return self.__next__()
+#
+# Class that will provide an iterator over an OFPACTS.
+#
+class ForEachOFPACTS():
+ def __init__(self, ofpacts, ofpacts_len):
+ self.ofpact = ofpacts.cast(gdb.lookup_type('struct ofpact').pointer())
+ self.length = int(ofpacts_len)
+
+ def __round_up(self, val, round_to):
+ return int(val) + (round_to - int(val)) % round_to
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ if self.ofpact is None or self.length <= 0:
+ raise StopIteration
+
+ ofpact = self.ofpact
+ length = self.__round_up(ofpact['len'], 8)
+
+ self.length -= length
+ self.ofpact = self.ofpact.cast(
+ gdb.lookup_type('void').pointer()) + length
+ self.ofpact = self.ofpact.cast(
+ gdb.lookup_type('struct ofpact').pointer())
+
+ return ofpact
+
+ def next(self):
+ return self.__next__()
+
+
#
# Implements the GDB "ovs_dump_bridges" command
#
@@ -1233,6 +1267,46 @@ class CmdShowUpcall(gdb.Command):
self.display_udpif_upcall(udpif, 0, "dbg" in arg_list)
+#
+# Implements the GDB "ovs_dump_ofpacts" command
+#
+class CmdDumpOfpacts(gdb.Command):
+ """Dump all actions in an ofpacts set
+ Usage: ovs_dump_ofpacts
+
+ : Pointer to set of ofpact structures.
+ : Total length of the set.
+
+ Example dumping all actions when in the clone_xlate_actions() function:
+
+ (gdb) ovs_dump_ofpacts actions actions_len
+ (struct ofpact *) 0x561c7be487c8: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24}
+ (struct ofpact *) 0x561c7be487e0: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24}
+ (struct ofpact *) 0x561c7be487f8: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24}
+ (struct ofpact *) 0x561c7be48810: {type = OFPACT_SET_FIELD, raw = 255 '', len = 32}
+ (struct ofpact *) 0x561c7be48830: {type = OFPACT_SET_FIELD, raw = 255 '', len = 24}
+ (struct ofpact *) 0x561c7be48848: {type = OFPACT_RESUBMIT, raw = 38 '&', len = 16}
+ """
+ def __init__(self):
+ super(CmdDumpOfpacts, self).__init__("ovs_dump_ofpacts",
+ gdb.COMMAND_DATA)
+
+ def invoke(self, arg, from_tty):
+ arg_list = gdb.string_to_argv(arg)
+
+ if len(arg_list) != 2:
+ print("usage: ovs_dump_ofpacts ")
+ return
+
+ ofpacts = gdb.parse_and_eval(arg_list[0]).cast(
+ gdb.lookup_type('struct ofpact').pointer())
+
+ length = gdb.parse_and_eval(arg_list[1])
+
+ for node in ForEachOFPACTS(ofpacts, length):
+ print("(struct ofpact *) {}: {}".format(node, node.dereference()))
+
+
#
# Initialize all GDB commands
#
@@ -1244,6 +1318,7 @@ CmdDumpDpNetdevPorts()
CmdDumpDpProvider()
CmdDumpNetdev()
CmdDumpNetdevProvider()
+CmdDumpOfpacts()
CmdDumpOvsList()
CmdDumpSimap()
CmdDumpSmap()
diff --git a/utilities/ovs-dev.py b/utilities/ovs-dev.py
index 248d22ab9a7e8b1fe7488b4c51bce9283264fd46..c45788acd527457f9d80cf26201b6728cfb9e165 100755
--- a/utilities/ovs-dev.py
+++ b/utilities/ovs-dev.py
@@ -1,5 +1,5 @@
-#!/usr/bin/env python
-# Copyright (c) 2013, 2014, 2015, 2016 Nicira, Inc.
+#!/usr/bin/env python3
+# Copyright (c) 2013, 2014, 2015, 2016, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/utilities/ovs-dpctl-top.in b/utilities/ovs-dpctl-top.in
index f2cc3f7f2a6faba2a321a35bb37a48a5b7bbddf3..fbe6e4f560a303726c9edc72ad9f29d6185914c2 100755
--- a/utilities/ovs-dpctl-top.in
+++ b/utilities/ovs-dpctl-top.in
@@ -480,6 +480,8 @@ def elements_to_dict(elements):
""" Convert line to a hierarchy of dictionaries. """
result = {}
for element in elements:
+ if (element == "eth()"):
+ continue
match = FIELDS_CMPND.search(element)
if (match):
key = match.group(1)
@@ -592,7 +594,7 @@ def flows_read(ihdl, flow_db):
try:
flow_db.flow_line_add(line)
- except ValueError, arg:
+ except ValueError as arg:
logging.error(arg)
return flow_db
@@ -958,6 +960,9 @@ class FlowDB:
change order of fields of the same flow.
"""
+ if not isinstance(line, str):
+ line = str(line)
+
line = line.rstrip("\n")
(fields, stats, _) = flow_line_split(line)
@@ -988,7 +993,7 @@ class FlowDB:
self.flow_event(fields_dict, stats_old_dict, stats_dict)
- except ValueError, arg:
+ except ValueError as arg:
logging.error(arg)
self._error_count += 1
raise
@@ -1192,7 +1197,7 @@ def flows_top(args):
flows_read(ihdl, flow_db)
finally:
ihdl.close()
- except OSError, arg:
+ except OSError as arg:
logging.critical(arg)
break
@@ -1220,7 +1225,7 @@ def flows_top(args):
# repeat output
for (count, line) in lines:
- print line
+ print(line)
def flows_script(args):
@@ -1249,7 +1254,7 @@ def flows_script(args):
render = Render(console_width, Render.FIELD_SELECT_SCRIPT)
for line in render.format(flow_db):
- print line
+ print(line)
def main():
diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in
index d646b444a40c46bed2d5d844a199a3b1fe3c3973..f7e97567406ab8554cb3a96cb758fad681f22575 100644
--- a/utilities/ovs-lib.in
+++ b/utilities/ovs-lib.in
@@ -255,20 +255,36 @@ stop_daemon () {
if version_geq "$version" "2.5.90"; then
actions="$graceful $actions"
fi
+ actiontype=""
for action in $actions; do
if pid_exists "$pid" >/dev/null 2>&1; then :; else
- return 0
+ # pid does not exist.
+ if [ -n "$actiontype" ]; then
+ return 0
+ fi
+ # But, does the file exist? We may have had a daemon
+ # segfault with `ovs-appctl exit`. Check one more time
+ # before deciding that the daemon is dead.
+ [ -e "$rundir/$1.pid" ] && sleep 2 && pid=`cat "$rundir/$1.pid"` 2>/dev/null
+ if pid_exists "$pid" >/dev/null 2>&1; then :; else
+ return 0
+ fi
fi
case $action in
EXIT)
action "Exiting $1 ($pid)" \
${bindir}/ovs-appctl -T 1 -t $rundir/$1.$pid.ctl exit $2
+ # The above command could have resulted in delayed
+ # daemon segfault. And if a monitor is running, it
+ # would restart the daemon giving it a new pid.
;;
TERM)
action "Killing $1 ($pid)" kill $pid
+ actiontype="force"
;;
KILL)
action "Killing $1 ($pid) with SIGKILL" kill -9 $pid
+ actiontype="force"
;;
FAIL)
log_failure_msg "Killing $1 ($pid) failed"
diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in
index cb5c6120c82913cde380c63836f9661017f44b63..2017c6eba071b024c7acd13108951c60e3c3c23a 100644
--- a/utilities/ovs-ofctl.8.in
+++ b/utilities/ovs-ofctl.8.in
@@ -562,12 +562,12 @@ between a switch and its controller.
.IP
When a switch has more than one controller configured, only the
traffic to and from a single controller is output. If none of the
-controllers is configured as a master or a slave (using a Nicira
+controllers is configured as a primary or a secondary (using a Nicira
extension to OpenFlow 1.0 or 1.1, or a standard request in OpenFlow
1.2 or later), then a controller is chosen arbitrarily among
-them. If there is a master controller, it is chosen; otherwise, if
-there are any controllers that are not masters or slaves, one is
-chosen arbitrarily; otherwise, a slave controller is chosen
+them. If there is a primary controller, it is chosen; otherwise, if
+there are any controllers that are not primaries or secondaries, one is
+chosen arbitrarily; otherwise, a secondary controller is chosen
arbitrarily. This choice is made once at connection time and does not
change as controllers reconfigure their roles.
.IP
diff --git a/utilities/ovs-pipegen.py b/utilities/ovs-pipegen.py
index ee5797221c2fa8ba503907c968110cda6f4f2839..a3b6a661de6bae5a1490b504fcd49436cd0d2092 100755
--- a/utilities/ovs-pipegen.py
+++ b/utilities/ovs-pipegen.py
@@ -1,5 +1,5 @@
-#! /usr/bin/env python
-# Copyright (c) 2013, 2014, 2015 Nicira, Inc.
+#! /usr/bin/env python3
+# Copyright (c) 2013, 2014, 2015, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/utilities/ovs-vsctl.c b/utilities/ovs-vsctl.c
index bd3972636e66fba4a9e8c87081631cc82f65ba04..37cc72d401d35eb05ebc21ec959e76e6207af79e 100644
--- a/utilities/ovs-vsctl.c
+++ b/utilities/ovs-vsctl.c
@@ -1344,9 +1344,13 @@ cmd_list_zone_tp(struct ctl_context *ctx)
struct ovsrec_ct_timeout_policy *tp = zone->timeout_policy;
- for (int j = 0; j < tp->n_timeouts; j++) {
- ds_put_format(&ctx->output, "%s=%"PRIu64" ",
- tp->key_timeouts[j], tp->value_timeouts[j]);
+ if (tp) {
+ for (int j = 0; j < tp->n_timeouts; j++) {
+ ds_put_format(&ctx->output, "%s=%"PRIu64" ",
+ tp->key_timeouts[j], tp->value_timeouts[j]);
+ }
+ } else {
+ ds_put_cstr(&ctx->output, "system default");
}
ds_chomp(&ctx->output, ' ');
ds_put_char(&ctx->output, '\n');
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index e591c26a6c48493843555748a1691dbc6ae5bd0b..5ed7e8234354bf4640d094c0ba196b2aea47b68b 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -288,6 +288,7 @@ static void bridge_configure_ipfix(struct bridge *);
static void bridge_configure_spanning_tree(struct bridge *);
static void bridge_configure_tables(struct bridge *);
static void bridge_configure_dp_desc(struct bridge *);
+static void bridge_configure_serial_desc(struct bridge *);
static void bridge_configure_aa(struct bridge *);
static void bridge_aa_refresh_queued(struct bridge *);
static bool bridge_aa_need_refresh(struct bridge *);
@@ -329,7 +330,8 @@ static void mirror_destroy(struct mirror *);
static bool mirror_configure(struct mirror *);
static void mirror_refresh_stats(struct mirror *);
-static void iface_configure_lacp(struct iface *, struct lacp_slave_settings *);
+static void iface_configure_lacp(struct iface *,
+ struct lacp_member_settings *);
static bool iface_create(struct bridge *, const struct ovsrec_interface *,
const struct ovsrec_port *);
static bool iface_is_internal(const struct ovsrec_interface *iface,
@@ -634,8 +636,10 @@ static void
get_timeout_policy_from_ovsrec(struct simap *tp,
const struct ovsrec_ct_timeout_policy *tp_cfg)
{
- for (size_t i = 0; i < tp_cfg->n_timeouts; i++) {
- simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
+ if (tp_cfg) {
+ for (size_t i = 0; i < tp_cfg->n_timeouts; i++) {
+ simap_put(tp, tp_cfg->key_timeouts[i], tp_cfg->value_timeouts[i]);
+ }
}
}
@@ -939,6 +943,7 @@ bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg)
bridge_configure_spanning_tree(br);
bridge_configure_tables(br);
bridge_configure_dp_desc(br);
+ bridge_configure_serial_desc(br);
bridge_configure_aa(br);
}
free(managers);
@@ -1193,11 +1198,11 @@ port_configure(struct port *port)
/* Get name. */
s.name = port->name;
- /* Get slaves. */
- s.n_slaves = 0;
- s.slaves = xmalloc(ovs_list_size(&port->ifaces) * sizeof *s.slaves);
+ /* Get members. */
+ s.n_members = 0;
+ s.members = xmalloc(ovs_list_size(&port->ifaces) * sizeof *s.members);
LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
- s.slaves[s.n_slaves++] = iface->ofp_port;
+ s.members[s.n_members++] = iface->ofp_port;
}
/* Get VLAN tag. */
@@ -1266,16 +1271,16 @@ port_configure(struct port *port)
if (s.lacp) {
size_t i = 0;
- s.lacp_slaves = xmalloc(s.n_slaves * sizeof *s.lacp_slaves);
+ s.lacp_members = xmalloc(s.n_members * sizeof *s.lacp_members);
LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
- iface_configure_lacp(iface, &s.lacp_slaves[i++]);
+ iface_configure_lacp(iface, &s.lacp_members[i++]);
}
} else {
- s.lacp_slaves = NULL;
+ s.lacp_members = NULL;
}
/* Get bond settings. */
- if (s.n_slaves > 1) {
+ if (s.n_members > 1) {
s.bond = &bond_settings;
port_configure_bond(port, &bond_settings);
} else {
@@ -1293,9 +1298,9 @@ port_configure(struct port *port)
/* Clean up. */
free(s.cvlans);
- free(s.slaves);
+ free(s.members);
free(s.trunks);
- free(s.lacp_slaves);
+ free(s.lacp_members);
}
/* Pick local port hardware address and datapath ID for 'br'. */
@@ -2273,8 +2278,8 @@ find_local_hw_addr(const struct bridge *br, struct eth_addr *ea,
} else {
/* Choose the interface whose MAC address will represent the port.
* The Linux kernel bonding code always chooses the MAC address of
- * the first slave added to a bond, and the Fedora networking
- * scripts always add slaves to a bond in alphabetical order, so
+ * the first member added to a bond, and the Fedora networking
+ * scripts always add members to a bond in alphabetical order, so
* for compatibility we choose the interface with the name that is
* first in alphabetical order. */
LIST_FOR_EACH (candidate, port_elem, &port->ifaces) {
@@ -2957,7 +2962,7 @@ port_refresh_bond_status(struct port *port, bool force_update)
return;
}
- if (bond_get_changed_active_slave(port->name, &mac, force_update)) {
+ if (bond_get_changed_active_member(port->name, &mac, force_update)) {
struct ds mac_s;
ds_init(&mac_s);
@@ -3013,10 +3018,10 @@ ofp12_controller_role_to_str(enum ofp12_controller_role role)
switch (role) {
case OFPCR12_ROLE_EQUAL:
return "other";
- case OFPCR12_ROLE_MASTER:
- return "master";
- case OFPCR12_ROLE_SLAVE:
- return "slave";
+ case OFPCR12_ROLE_PRIMARY:
+ return "primary";
+ case OFPCR12_ROLE_SECONDARY:
+ return "secondary";
case OFPCR12_ROLE_NOCHANGE:
default:
return NULL;
@@ -3905,49 +3910,48 @@ bridge_configure_remotes(struct bridge *br,
&& (!strncmp(c->target, "punix:", 6)
|| !strncmp(c->target, "unix:", 5))) {
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- char *whitelist;
+ char *allowed;
if (!strncmp(c->target, "unix:", 5)) {
/* Connect to a listening socket */
- whitelist = xasprintf("unix:%s/", ovs_rundir());
+ allowed = xasprintf("unix:%s/", ovs_rundir());
if (strchr(c->target, '/') &&
- !equal_pathnames(c->target, whitelist,
- strlen(whitelist))) {
+ !equal_pathnames(c->target, allowed, strlen(allowed))) {
/* Absolute path specified, but not in ovs_rundir */
VLOG_ERR_RL(&rl, "bridge %s: Not connecting to socket "
"controller \"%s\" due to possibility for "
"remote exploit. Instead, specify socket "
- "in whitelisted \"%s\" or connect to "
+ "in permitted directory \"%s\" or connect to "
"\"unix:%s/%s.mgmt\" (which is always "
"available without special configuration).",
- br->name, c->target, whitelist,
+ br->name, c->target, allowed,
ovs_rundir(), br->name);
- free(whitelist);
+ free(allowed);
continue;
}
} else {
- whitelist = xasprintf("punix:%s/%s.",
+ allowed = xasprintf("punix:%s/%s.",
ovs_rundir(), br->name);
- if (!equal_pathnames(c->target, whitelist, strlen(whitelist))
- || strchr(c->target + strlen(whitelist), '/')) {
+ if (!equal_pathnames(c->target, allowed, strlen(allowed))
+ || strchr(c->target + strlen(allowed), '/')) {
/* Prevent remote ovsdb-server users from accessing
* arbitrary Unix domain sockets and overwriting arbitrary
* local files. */
VLOG_ERR_RL(&rl, "bridge %s: Not adding Unix domain socket "
"controller \"%s\" due to possibility of "
"overwriting local files. Instead, specify "
- "path in whitelisted format \"%s*\" or "
+ "path in permitted format \"%s*\" or "
"connect to \"unix:%s/%s.mgmt\" (which is "
"always available without special "
"configuration).",
- br->name, c->target, whitelist,
+ br->name, c->target, allowed,
ovs_rundir(), br->name);
- free(whitelist);
+ free(allowed);
continue;
}
}
- free(whitelist);
+ free(allowed);
}
bridge_configure_local_iface_netdev(br, c);
@@ -4123,6 +4127,13 @@ bridge_configure_dp_desc(struct bridge *br)
smap_get(&br->cfg->other_config, "dp-desc"));
}
+static void
+bridge_configure_serial_desc(struct bridge *br)
+{
+ ofproto_set_serial_desc(br->ofproto,
+ smap_get(&br->cfg->other_config, "dp-sn"));
+}
+
static struct aa_mapping *
bridge_aa_mapping_find(struct bridge *br, const int64_t isid)
{
@@ -4495,7 +4506,7 @@ port_configure_lacp(struct port *port, struct lacp_settings *s)
}
static void
-iface_configure_lacp(struct iface *iface, struct lacp_slave_settings *s)
+iface_configure_lacp(struct iface *iface, struct lacp_member_settings *s)
{
int priority, portid, key;
@@ -4580,6 +4591,10 @@ port_configure_bond(struct port *port, struct bond_settings *s)
s->lacp_fallback_ab_cfg = smap_get_bool(&port->cfg->other_config,
"lacp-fallback-ab", false);
+ s->primary = NULL;
+ if (s->balance == BM_AB || s->lacp_fallback_ab_cfg) {
+ s->primary = smap_get(&port->cfg->other_config, "bond-primary");
+ }
LIST_FOR_EACH (iface, port_elem, &port->ifaces) {
netdev_set_miimon_interval(iface->netdev, miimon_interval);
@@ -4587,10 +4602,15 @@ port_configure_bond(struct port *port, struct bond_settings *s)
mac_s = port->cfg->bond_active_slave;
if (!mac_s || !ovs_scan(mac_s, ETH_ADDR_SCAN_FMT,
- ETH_ADDR_SCAN_ARGS(s->active_slave_mac))) {
+ ETH_ADDR_SCAN_ARGS(s->active_member_mac))) {
/* OVSDB did not store the last active interface */
- s->active_slave_mac = eth_addr_zero;
+ s->active_member_mac = eth_addr_zero;
}
+
+ /* lb_output action is disabled by default. */
+ s->use_lb_output_action = (s->balance == BM_TCP)
+ && smap_get_bool(&port->cfg->other_config,
+ "lb-output-action", false);
}
/* Returns true if 'port' is synthetic, that is, if we constructed it locally
diff --git a/vswitchd/ovs-vswitchd.8.in b/vswitchd/ovs-vswitchd.8.in
index a234771761ee9a30cc8bbcc884faeb852a730255..50dad7208e858f539596e6a4ffd75aac108a529f 100644
--- a/vswitchd/ovs-vswitchd.8.in
+++ b/vswitchd/ovs-vswitchd.8.in
@@ -107,12 +107,13 @@ how to configure Open vSwitch.
.SS "GENERAL COMMANDS"
.IP "\fBexit\fR \fI--cleanup\fR"
Causes \fBovs\-vswitchd\fR to gracefully terminate. If \fI--cleanup\fR
-is specified, release datapath resources configured by \fBovs\-vswitchd\fR.
-Otherwise, datapath flows and other resources remains undeleted.
-Resources of datapaths that are integrated into \fBovs\-vswitchd\fR (e.g.
-the \fBnetdev\fR datapath type) are always released regardless of
-\fI--cleanup\fR except for ports with \fBinternal\fR type. Use \fI--cleanup\fR
-to release \fBinternal\fR ports too.
+is specified, deletes flows from datapaths and releases other datapath
+resources configured by \fBovs\-vswitchd\fR. Otherwise, datapath
+flows and other resources remains undeleted. Resources of datapaths
+that are integrated into \fBovs\-vswitchd\fR (e.g. the \fBnetdev\fR
+datapath type) are always released regardless of \fI--cleanup\fR
+except for ports with \fBinternal\fR type. Use \fI--cleanup\fR to
+release \fBinternal\fR ports too.
.
.IP "\fBqos/show-types\fR \fIinterface\fR"
Queries the interface for a list of Quality of Service types that are
@@ -197,46 +198,46 @@ These commands manage bonded ports on an Open vSwitch's bridges. To
understand some of these commands, it is important to understand a
detail of the bonding implementation called ``source load balancing''
(SLB). Instead of directly assigning Ethernet source addresses to
-slaves, the bonding implementation computes a function that maps an
+members, the bonding implementation computes a function that maps an
48-bit Ethernet source addresses into an 8-bit value (a ``MAC hash''
value). All of the Ethernet addresses that map to a single 8-bit
-value are then assigned to a single slave.
+value are then assigned to a single member.
.IP "\fBbond/list\fR"
-Lists all of the bonds, and their slaves, on each bridge.
+Lists all of the bonds, and their members, on each bridge.
.
.IP "\fBbond/show\fR [\fIport\fR]"
Lists all of the bond-specific information (updelay, downdelay, time
until the next rebalance) about the given bonded \fIport\fR, or all
bonded ports if no \fIport\fR is given. Also lists information about
-each slave: whether it is enabled or disabled, the time to completion
+each members: whether it is enabled or disabled, the time to completion
of an updelay or downdelay if one is in progress, whether it is the
-active slave, the hashes assigned to the slave. Any LACP information
+active member, the hashes assigned to the member. Any LACP information
related to this bond may be found using the \fBlacp/show\fR command.
.
-.IP "\fBbond/migrate\fR \fIport\fR \fIhash\fR \fIslave\fR"
-Only valid for SLB bonds. Assigns a given MAC hash to a new slave.
+.IP "\fBbond/migrate\fR \fIport\fR \fIhash\fR \fImember\fR"
+Only valid for SLB bonds. Assigns a given MAC hash to a new member.
\fIport\fR specifies the bond port, \fIhash\fR the MAC hash to be
-migrated (as a decimal number between 0 and 255), and \fIslave\fR the
-new slave to be assigned.
+migrated (as a decimal number between 0 and 255), and \fImember\fR the
+new member to be assigned.
.IP
The reassignment is not permanent: rebalancing or fail-over will
-cause the MAC hash to be shifted to a new slave in the usual
+cause the MAC hash to be shifted to a new member in the usual
manner.
.IP
-A MAC hash cannot be migrated to a disabled slave.
-.IP "\fBbond/set\-active\-slave\fR \fIport\fR \fIslave\fR"
-Sets \fIslave\fR as the active slave on \fIport\fR. \fIslave\fR must
+A MAC hash cannot be migrated to a disabled member.
+.IP "\fBbond/set\-active\-member\fR \fIport\fR \fImember\fR"
+Sets \fImember\fR as the active member on \fIport\fR. \fImember\fR must
currently be enabled.
.IP
-The setting is not permanent: a new active slave will be selected
-if \fIslave\fR becomes disabled.
-.IP "\fBbond/enable\-slave\fR \fIport\fR \fIslave\fR"
-.IQ "\fBbond/disable\-slave\fR \fIport\fR \fIslave\fR"
-Enables (or disables) \fIslave\fR on the given bond \fIport\fR, skipping any
+The setting is not permanent: a new active member will be selected
+if \fImember\fR becomes disabled.
+.IP "\fBbond/enable\-member\fR \fIport\fR \fImember\fR"
+.IQ "\fBbond/disable\-member\fR \fIport\fR \fImember\fR"
+Enables (or disables) \fImember\fR on the given bond \fIport\fR, skipping any
updelay (or downdelay).
.IP
This setting is not permanent: it persists only until the carrier
-status of \fIslave\fR changes.
+status of \fImember\fR changes.
.IP "\fBbond/hash\fR \fImac\fR [\fIvlan\fR] [\fIbasis\fR]"
Returns the hash value which would be used for \fImac\fR with \fIvlan\fR
and \fIbasis\fR if specified.
@@ -244,7 +245,7 @@ and \fIbasis\fR if specified.
.IP "\fBlacp/show\fR [\fIport\fR]"
Lists all of the LACP related information about the given \fIport\fR:
active or passive, aggregation key, system id, and system priority. Also
-lists information about each slave: whether it is enabled or disabled,
+lists information about each member: whether it is enabled or disabled,
whether it is attached or detached, port id and priority, actor
information, and partner information. If \fIport\fR is not specified,
then displays detailed information about all interfaces with CFM
@@ -252,7 +253,7 @@ enabled.
.
.IP "\fBlacp/stats-show\fR [\fIport\fR]"
Lists various stats about LACP PDUs (number of RX/TX PDUs, bad PDUs received)
-and slave state (number of time slave's state expired/defaulted and carrier
+and member state (number of times its state expired/defaulted and carrier
status changed) for the given \fIport\fR. If \fIport\fR is not specified,
then displays stats of all interfaces with LACP enabled.
.SS "DPCTL DATAPATH DEBUGGING COMMANDS"
@@ -271,6 +272,7 @@ type).
..
.so lib/dpctl.man
.
+.so lib/dpdk-unixctl.man
.so lib/dpif-netdev-unixctl.man
.so lib/netdev-dpdk-unixctl.man
.so ofproto/ofproto-dpif-unixctl.man
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index 1e72b628b1bdef26a40948a817d21ae13b2ee18b..f007f9c0bc06c1d6563e814880ee3525d3bfd07f 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -228,7 +228,7 @@ parse_options(int argc, char *argv[], char **unixctl_pathp)
break;
case OPT_DISABLE_SYSTEM:
- dp_blacklist_provider("system");
+ dp_disallow_provider("system");
break;
case OPT_DISABLE_SYSTEM_ROUTE:
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 3ddaaefda84974922b0ed05e660020d69d32113c..fee54b0fa058ac224f8b84814ba9f37fba405ac7 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -1072,7 +1072,7 @@
Drop all unencrypted tunneled packets in which the
least-significant bit of skb_mark is 0. This would
be a useful policy if no unencrypted tunneled traffic should exit
- the system without being specially whitelisted by setting
+ the system without being specially permitted by setting
skb_mark to 1.
@@ -1261,6 +1261,18 @@
Human readable description of datapath. It is a maximum 256
byte-long free-form string to describe the datapath for
debugging purposes, e.g. switch3 in room 3120.
+ The value is returned by the switch as a part of reply to OFPMP_DESC
+ request (ofp_desc). The OpenFlow specification (e.g. 1.3.5) describes
+ the ofp_desc structure to contaion "NULL terminated ASCII strings".
+ For the compatibility reasons no more than 255 ASCII characters should be used.
+
+
+
+ Serial number. It is a maximum 32 byte-long free-form string to
+ provide an additional switch identification. The value is returned
+ by the switch as a part of reply to OFPMP_DESC request (ofp_desc).
+ Same as mentioned in the description of ,
+ the string should be no more than 31 ASCII characters for the compatibility.
balance-slb
- Balances flows among slaves based on source MAC address and output
- VLAN, with periodic rebalancing as traffic patterns change.
+ Balances flows among members based on source MAC address and
+ output VLAN, with periodic rebalancing as traffic patterns change.
active-backup
- Assigns all flows to one slave, failing over to a backup slave when
- the active slave is disabled. This is the only bonding mode in which
- interfaces may be plugged into different upstream switches.
+ Assigns all flows to one member, failing over to a backup
+ member when the active member is disabled. This is the
+ only bonding mode in which interfaces may be plugged into different
+ upstream switches.
@@ -1959,8 +1972,8 @@
balance-tcp
-
- Balances flows among slaves based on L3 and L4 protocol information
- such as IP addresses and TCP/UDP ports.
+ Balances flows among members based on L3 and L4 protocol
+ information such as IP addresses and TCP/UDP ports.
@@ -1975,11 +1988,31 @@
- An integer hashed along with flows when choosing output slaves in load
- balanced bonds. When changed, all flows will be assigned different
- hash values possibly causing slave selection decisions to change. Does
- not affect bonding modes which do not employ load balancing such as
- active-backup.
+ An integer hashed along with flows when choosing output members
+ in load balanced bonds. When changed, all flows will be assigned
+ different hash values possibly causing member selection
+ decisions to change. Does not affect bonding modes which do not employ
+ load balancing such as active-backup.
+
+
+
+ Enable/disable usage of optimized lb_output action for
+ balancing flows among output members in load balanced bonds in
+ balance-tcp. When enabled, it uses optimized path for
+ balance-tcp mode by using rss hash and avoids recirculation. This knob
+ does not affect other balancing modes.
+
+
+
+ If a slave interface with this name exists in the bond and
+ is up, it will be made active. Relevant only when is
+ active-backup or if balance-tcp falls back
+ to active-backup (e.g., LACP negotiation fails and
+ is
+ true).
@@ -2358,7 +2391,8 @@
- For a bonded port, record the mac address of the current active slave.
+ For a bonded port, record the MAC address of the current active
+ member.
@@ -2448,7 +2482,8 @@
For the local interface, the default is the lowest-numbered MAC
address among the other bridge ports, either the value of the
in its record,
- if set, or its actual MAC (for bonded ports, the MAC of its slave
+ if set, or its actual MAC (for bonded ports, the MAC of its
+ member
whose name is first in alphabetical order). Internal ports and
bridge ports that are used as port mirroring destinations (see the
table) are ignored.
@@ -2635,6 +2670,39 @@
A pair of virtual devices that act as a patch cable.
+
+ gtpu
+
+
+ GPRS Tunneling Protocol (GTP) is a group of IP-based communications
+ protocols used to carry general packet radio service (GPRS) within
+ GSM, UMTS and LTE networks. GTP-U is used for carrying user data
+ within the GPRS core network and between the radio access network
+ and the core network. The user data transported can be packets in
+ any of IPv4, IPv6, or PPP formats.
+
+
+
+ The protocol is documented at
+ http://www.3gpp.org/DynaReport/29281.htm
+
+
+
+ Open vSwitch uses UDP destination port 2152. The source port used
+ for GTP traffic varies on a per-flow basis and is in the ephemeral
+ port range.
+
+
+
+ Bareudp
+
+
+ The Bareudp tunnel provides a generic L3 encapsulation support for
+ tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a
+ UDP tunnel.
+
+
+
@@ -2642,8 +2710,9 @@
These options apply to interfaces with of
- geneve, gre, ip6gre,
- vxlan, lisp and stt.
+ geneve, bareudp, gre,
+ ip6gre, vxlan, lisp and
+ stt.
@@ -2655,6 +2724,8 @@
one is matched first. is
considered more specific than if
a port defines one and another port defines the other.
+ is not applicable for bareudp
+ tunnels. Hence it is not considered while identifying a bareudp tunnel.
@@ -2721,7 +2792,10 @@
- Optional. The key that received packets must contain, one of:
+
+ Optional, not applicable for bareudp. The key that
+ received packets must contain, one of:
+
- Optional. The key to be set on outgoing packets, one of:
+
+ Optional, not applicable for bareudp. The key to be set
+ on outgoing packets, one of:
+
-
+
gre, ip6gre, geneve,
- and vxlan interfaces support these options.
+ bareudp and vxlan interfaces support these
+ options.
@@ -3033,6 +3111,17 @@
+
+
+
+ Specifies the ethertype of the l3 protocol the bareudp
+ device is tunnelling. For the tunnels which supports multiple
+ ethertypes of a l3 protocol (IP, MPLS) this field specifies the
+ protocol name as a string.
+
+
+
+
These options apply only to patch ports, that is, interfaces
@@ -3183,17 +3272,6 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
-
-
- The value specifies whether or not to enable dequeue zero copy on
- the given interface.
- Must be set before vhost-server-path is specified.
- Only supported by dpdkvhostuserclient interfaces.
- The feature is considered experimental.
-
-
-
@@ -3227,6 +3305,24 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
descriptors will be used by default.
+
+
+
+ Ethernet address to set for this VF interface. If unset then the
+ default MAC address is used:
+
+
+ -
+ For most drivers, the default MAC address assigned by their
+ hardware.
+
+ -
+ For bifurcated drivers, the MAC currently used by the kernel
+ netdevice.
+
+
+ This option may only be used with dpdk VF representors.
+
@@ -3617,6 +3713,13 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
optional Authentication or ``Echo Mode'' features.
+
+ OVS 2.13 and earlier intercepted and processed all BFD packets.
+ OVS 2.14 and later only intercept and process BFD packets destined
+ to a configured BFD instance, and other BFD packets are made available
+ to the OVS flow table for forwarding.
+
+
A controller sets up key-value pairs in the
@@ -5215,16 +5318,21 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
other
Allows the controller access to all OpenFlow features.
master
- Equivalent to other, except that there may be at
- most one master controller at a time. When a controller configures
- itself as master, any existing master is demoted to
- the slave role.
+
+ Equivalent to other, except that there may be at
+ most one such controller at a time. If a given controller
+ promotes itself to this role, ovs-vswitchd
+ demotes any existing controller with the role to slave.
+
+
slave
- Allows the controller read-only access to OpenFlow features.
- Attempts to modify the flow table will be rejected with an
- error. Slave controllers do not receive OFPT_PACKET_IN or
- OFPT_FLOW_REMOVED messages, but they do receive OFPT_PORT_STATUS
- messages.
+
+ Allows the controller read-only access to OpenFlow features.
+ Attempts to modify the flow table will be rejected with an
+ error. Such controllers do not receive OFPT_PACKET_IN or
+ OFPT_FLOW_REMOVED messages, but they do receive OFPT_PORT_STATUS
+ messages.
+
@@ -5776,6 +5884,19 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
higher performance for MPLS and active-active load balancing
bonding modes.
+
+ If this is true, then the datapath supports optimized balance-tcp
+ bond mode. This capability replaces existing hash and
+ recirc actions with new action lb_output
+ and avoids recirculation of packet in datapath. It is supported
+ only for balance-tcp bond mode in netdev datapath. The new action
+ gives higer performance by using bond buckets instead of post
+ recirculation flows for selection of slave port from bond. By default
+ this new action is disabled, however it can be enabled by setting
+ in
+ table.
+
These capabilities are granular because Open vSwitch and its
@@ -6101,9 +6222,9 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 type=patch options:peer=p1 \
- The timeout of the connection after an ICMP error is replied in
- response to an ICMP packet. This timeout is only supported by
- the userspace datapath.
+ The timeout of the connection when ICMP packets have been seen in
+ both direction. This timeout is only supported by the userspace
+ datapath.
diff --git a/windows/automake.mk b/windows/automake.mk
index 80dca14673975412ed9a95311660224f28c3a3ca..48934302882685c0ff414716e76572cfb76ae3be 100644
--- a/windows/automake.mk
+++ b/windows/automake.mk
@@ -12,7 +12,6 @@
# License for the specific language governing permissions and limitations
# under the License.
-PTHREAD_TEMP_DIR=`echo "$(PTHREAD_LDFLAGS)" | sed 's|^.\(.*\).$:\1||'`
windows_installer: all
#Userspace files needed for the installer
cp -f $(top_srcdir)/datapath-windows/misc/OVS.psm1 windows/ovs-windows-installer/Services/OVS.psm1
@@ -27,14 +26,18 @@ windows_installer: all
cp -f $(top_srcdir)/ovsdb/ovsdb-tool.pdb windows/ovs-windows-installer/Symbols/
#Third party files needed by the installer
cp -f $(PTHREAD_WIN32_DIR_DLL_WIN_FORM)/*.dll windows/ovs-windows-installer/Binaries/
- cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC120_CRT_x86.msm" windows/ovs-windows-installer/Redist/Microsoft_VC120_CRT_x86.msm
+ cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC140_CRT_x86.msm" windows/ovs-windows-installer/Redist/Microsoft_VC140_CRT_x86.msm
+ cp -f "/c/Program Files (x86)/Common Files/Merge Modules/Microsoft_VC140_CRT_x64.msm" windows/ovs-windows-installer/Redist/Microsoft_VC140_CRT_x64.msm
#Forwarding extension files needed for the installer
cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8/ovsext.cat
cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8/ovsext.inf
- cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/OVSExt.sys windows/ovs-windows-installer/Driver/Win8/OVSExt.sys
+ cp -f $(top_srcdir)/datapath-windows/x64/Win8$(VSTUDIO_CONFIG)/package/OVSExt.sys windows/ovs-windows-installer/Driver/Win8/ovsext.sys
cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win8.1/ovsext.cat
cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win8.1/ovsext.inf
cp -f $(top_srcdir)/datapath-windows/x64/Win8.1$(VSTUDIO_CONFIG)/package/ovsext.sys windows/ovs-windows-installer/Driver/Win8.1/ovsext.sys
+ cp -f $(top_srcdir)/datapath-windows/x64/Win10$(VSTUDIO_CONFIG)/package/ovsext.cat windows/ovs-windows-installer/Driver/Win10/ovsext.cat
+ cp -f $(top_srcdir)/datapath-windows/x64/Win10$(VSTUDIO_CONFIG)/package/ovsext.inf windows/ovs-windows-installer/Driver/Win10/ovsext.inf
+ cp -f $(top_srcdir)/datapath-windows/x64/Win10$(VSTUDIO_CONFIG)/package/ovsext.sys windows/ovs-windows-installer/Driver/Win10/ovsext.sys
MSBuild.exe windows/ovs-windows-installer.sln //nologo //target:Build //p:Configuration="Release" //p:Version="$(PACKAGE_VERSION)" //p:Platform=$(PLATFORM)
EXTRA_DIST += \
diff --git a/windows/ovs-windows-installer/Driver/.gitignore b/windows/ovs-windows-installer/Driver/.gitignore
index e9994b37d8ec1b4665442280fcbb9ad42ddd9a36..3de51701639d5d177c0a54ba1325df7a15b50456 100644
--- a/windows/ovs-windows-installer/Driver/.gitignore
+++ b/windows/ovs-windows-installer/Driver/.gitignore
@@ -3,3 +3,4 @@
!.gitignore
!Win8
!Win8.1
+!Win10
diff --git a/windows/ovs-windows-installer/Driver/Win10/.gitignore b/windows/ovs-windows-installer/Driver/Win10/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..cec9082b6d68f3fdd70f2fef2818bf74ec6ef362
--- /dev/null
+++ b/windows/ovs-windows-installer/Driver/Win10/.gitignore
@@ -0,0 +1,3 @@
+*
+
+!.gitignore
diff --git a/windows/ovs-windows-installer/Product.wxs b/windows/ovs-windows-installer/Product.wxs
index ea1bc689687b4f6950ec6510e968efad507e8273..61289da6bdcd79ce29a44f3412e86f5efc015064 100644
--- a/windows/ovs-windows-installer/Product.wxs
+++ b/windows/ovs-windows-installer/Product.wxs
@@ -36,6 +36,9 @@
+
+
+
= 602)]]>
@@ -51,9 +54,13 @@
-
-
+
+
+
+
@@ -61,6 +68,7 @@
Description="Installs the Open vSwitch Hyper-V switch extension driver." Display="expand">
+
@@ -250,17 +258,24 @@
-
+
+
+ 10000]]>
+
+
+
+
-
+
+
diff --git a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
index e7404e3b0097f032935298360db629c85f735c20..b8db881949691f3613ca8e4ae292f9839bcb1fe4 100755
--- a/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
+++ b/xenserver/etc_xapi.d_plugins_openvswitch-cfg-update
@@ -1,10 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
#
# xapi plugin script to update the cache of configuration items in the
# ovs-vswitchd configuration that are managed in the xapi database when
# integrated with Citrix management tools.
-# Copyright (C) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+# Copyright (C) 2009, 2010, 2011, 2012, 2013, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/xenserver/opt_xensource_libexec_interface-reconfigure b/xenserver/opt_xensource_libexec_interface-reconfigure
index a82043fb5b12e9eedf117c9be341332c3b62368d..9c20725de20e37d90e29160de82e5e54b2317e3b 100755
--- a/xenserver/opt_xensource_libexec_interface-reconfigure
+++ b/xenserver/opt_xensource_libexec_interface-reconfigure
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
#
# Copyright (c) 2008,2009 Citrix Systems, Inc.
#
diff --git a/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync b/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync
index cf89600253196aea20e56191af9bc4ad63747b66..bff85464ba8589da300ffa60029b27b86f8dd611 100755
--- a/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync
+++ b/xenserver/usr_share_openvswitch_scripts_ovs-xapi-sync
@@ -1,5 +1,5 @@
-#! /usr/bin/env python
-# Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
+#!/usr/bin/env python3
+# Copyright (c) 2009, 2010, 2011, 2012, 2013, 2020 Nicira, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.