Name: ovn-controller-default-bin Namespace: openstack Labels: app.kubernetes.io/managed-by=Helm Annotations: meta.helm.sh/release-name: ovn meta.helm.sh/release-namespace: openstack Data ==== ovn-bgp-agent-init.sh: ---- #!/bin/bash set -ex # See: https://bugs.launchpad.net/neutron/+bug/2028442 mkdir -p /tmp/pod-shared tee > /tmp/pod-shared/ovn.ini << EOF [ovn] ovn_nb_connection=tcp:$OVN_OVSDB_NB_SERVICE_HOST:$OVN_OVSDB_NB_SERVICE_PORT_OVSDB ovn_sb_connection=tcp:$OVN_OVSDB_SB_SERVICE_HOST:$OVN_OVSDB_SB_SERVICE_PORT_OVSDB EOF tee > /tmp/pod-shared/ovn-bgp-agent.ini << EOF [DEFAULT] bgp_router_id=$NODE_IP [frr_k8s] node_name=$NODE_NAME EOF ovn-bgp-agent.sh: ---- #!/bin/bash set -x exec ovn-bgp-agent \ --config-file /etc/ovn-bgp-agent/ovn-bgp-agent.conf \ --config-file /tmp/pod-shared/ovn-bgp-agent.ini \ --config-file /tmp/pod-shared/ovn.ini ovn-controller-init.sh: ---- #!/bin/bash -xe # Copyright 2023 VEXXHOST, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ANNOTATION_KEY="atmosphere.cloud/ovn-system-id" function get_ip_address_from_interface { local interface=$1 local ip=$(ip -4 -o addr s "${interface}" | awk '{ print $4; exit }' | awk -F '/' 'NR==1 {print $1}') if [ -z "${ip}" ] ; then exit 1 fi echo ${ip} } function get_current_system_id { ovs-vsctl --if-exists get Open_vSwitch . external_ids:system-id | tr -d '"' } function get_stored_system_id { kubectl get node "$NODE_NAME" -o "jsonpath={.metadata.annotations.atmosphere\.cloud/ovn-system-id}" } function store_system_id() { local system_id=$1 kubectl annotate node "$NODE_NAME" "$ANNOTATION_KEY=$system_id" } # Detect tunnel interface tunnel_interface="" if [ -z "${tunnel_interface}" ] ; then # search for interface with tunnel network routing tunnel_network_cidr="0/0" if [ -z "${tunnel_network_cidr}" ] ; then tunnel_network_cidr="0/0" fi # If there is not tunnel network gateway, exit tunnel_interface=$(ip -4 route list ${tunnel_network_cidr} | awk -F 'dev' '{ print $2; exit }' \ | awk '{ print $1 }') || exit 1 fi ovs-vsctl set open . external_ids:ovn-encap-ip="$(get_ip_address_from_interface ${tunnel_interface})" # Get the stored system-id from the Kubernetes node annotation stored_system_id=$(get_stored_system_id) # Get the current system-id set in OVS current_system_id=$(get_current_system_id) if [ -n "$stored_system_id" ] && [ "$stored_system_id" != "$current_system_id" ]; then # If the annotation exists and does not match the current system-id, set the system-id to the stored one ovs-vsctl set Open_vSwitch . external_ids:system-id="$stored_system_id" elif [ -z "$current_system_id" ]; then # If no current system-id is set, generate a new one current_system_id=$(uuidgen) ovs-vsctl set Open_vSwitch . external_ids:system-id="$current_system_id" # Store the new system-id in the Kubernetes node annotation store_system_id "$current_system_id" elif [ -z "$stored_system_id" ]; then # If there is no stored system-id, store the current one store_system_id "$current_system_id" fi # Configure OVN remote ovs-vsctl set open . external-ids:ovn-remote="tcp:ovn-ovsdb-sb-0.ovn-ovsdb-sb.openstack.svc.cluster.local:6642" # Configure OVN values ovs-vsctl set open . external-ids:rundir="/var/run/openvswitch" ovs-vsctl set open . external-ids:ovn-encap-type="geneve" ovs-vsctl set open . external-ids:ovn-bridge="br-int" ovs-vsctl set open . external-ids:ovn-bridge-mappings="external:br-ex" GW_ENABLED=$(cat /tmp/gw-enabled/gw-enabled) if [[ ${GW_ENABLED} == enabled ]]; then ovs-vsctl set open . external-ids:ovn-cms-options=enable-chassis-as-gw,availability-zones=nova else ovs-vsctl set open . external-ids:ovn-cms-options=availability-zones=nova fi # Configure hostname ovs-vsctl set open . external-ids:hostname="$(hostname -f)" # Create bridges and create ports # handle any bridge mappings # /tmp/auto_bridge_add is one line json file: {"br-ex1":"eth1","br-ex2":"eth2"} for bmap in `sed 's/[{}"]//g' /tmp/auto_bridge_add | tr "," "\n"` do bridge=${bmap%:*} iface=${bmap#*:} ovs-vsctl --may-exist add-br $bridge -- set bridge $bridge protocols=OpenFlow13 if [ -n "$iface" ] && [ "$iface" != "null" ] && ( ip link show $iface 1>/dev/null 2>&1 ); then ovs-vsctl --may-exist add-port $bridge $iface fi done /usr/local/bin/ovsinit /tmp/auto_bridge_add ovn-network-logging-parser.sh: ---- #!/bin/bash set -ex COMMAND="${@:-start}" function start () { exec uwsgi --ini /etc/neutron/neutron-ovn-network-logging-parser-uwsgi.ini } function stop () { kill -TERM 1 } $COMMAND ovn-stale-port-cleanup.sh: ---- #!/bin/bash # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Periodically remove stale OVS ports from the integration bridge. # # A "stale" port is an OVS Port whose attached Interface has no corresponding # kernel netdev. This commonly happens because libvirt removes the tap device # when a VM is destroyed but the matching OVS port entry is left behind on # br-int. Accumulation of these orphans causes ovn-controller poll-loop # stalls, OVS disconnections, and packet drops. # # Detection criteria (all must hold): # * Port lives on the configured integration bridge (default: br-int). # * Interface has the "iface-id" external_id (it is/was a Neutron VIF). # * Either the Interface "error" column is non-empty (typically # "could not open network device tapXXXX (No such device)") or the # kernel netdev for the Interface "name" is not visible to `ip link`. # # Anti-race protection: # The Neutron OVS agent creates the OVS port with iface-id set BEFORE # libvirt creates the tap device. To avoid deleting a brand-new VIF # that has not finished plugging, every candidate must remain stale # across MIN_STALE_OBSERVATIONS consecutive cycles (default: 2) before # deletion. The observation state lives in /var/run inside the pod # and is reset whenever the port becomes healthy again. # # Operator opt-out: # Interfaces with external_ids:skip_cleanup="true" are never deleted. set -o pipefail INTEGRATION_BRIDGE="${INTEGRATION_BRIDGE:-br-int}" OVS_DB_SOCKET="${OVS_DB_SOCKET:-/run/openvswitch/db.sock}" INTERVAL_SECONDS="${INTERVAL_SECONDS:-3600}" MAX_DELETIONS_PER_CYCLE="${MAX_DELETIONS_PER_CYCLE:-200}" MIN_STALE_OBSERVATIONS="${MIN_STALE_OBSERVATIONS:-2}" DRY_RUN="${DRY_RUN:-0}" STATE_DIR="${STATE_DIR:-/var/run/ovn-stale-port-cleanup}" mkdir -p "${STATE_DIR}" OVS_VSCTL="ovs-vsctl --db=unix:${OVS_DB_SOCKET} --timeout=10" log() { echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) [stale-port-cleanup] $*" } # True (0) if the kernel netdev exists in the host network namespace. # We rely on `ip link show` because the pod runs with hostNetwork=true, # so netlink reflects the host. This is more accurate than reading # /sys/class/net (which is a per-netns view that may not be mounted # from the host). netdev_exists() { ip -o link show dev "$1" &>/dev/null } # Persist that we observed `iface` as stale at least once. Returns the # updated observation count via stdout. record_stale_observation() { local iface="$1" local f="${STATE_DIR}/${iface}.count" local n=0 [[ -r "${f}" ]] && n="$(cat "${f}" 2>/dev/null || echo 0)" n=$((n + 1)) echo "${n}" > "${f}" echo "${n}" } clear_stale_observation() { rm -f "${STATE_DIR}/$1.count" } # Drop state files for interfaces no longer present so the directory # does not grow unbounded. gc_state_dir() { local known="$1" # newline-separated iface names currently in OVSDB local f base shopt -s nullglob for f in "${STATE_DIR}"/*.count; do base="$(basename "${f}" .count)" if ! grep -Fxq "${base}" <<< "${known}"; then rm -f "${f}" fi done shopt -u nullglob } cleanup_cycle() { local bridge="$1" local deletions=0 local candidates=0 local confirmed=0 if ! ${OVS_VSCTL} br-exists "${bridge}" 2>/dev/null; then log "bridge ${bridge} does not exist; skipping cycle" return 0 fi # Pre-fetch the set of interface names currently attached to br-int. # Doing this once avoids an O(n) iface-to-br call per Interface row. local br_ifaces br_ifaces="$(${OVS_VSCTL} list-ports "${bridge}" 2>/dev/null || true)" if [[ -z "${br_ifaces}" ]]; then log "no ports on ${bridge}; nothing to do" gc_state_dir "" return 0 fi # Snapshot Interface rows that look like Neutron VIFs (iface-id set). # Only ask for the name column so we never have to parse the comma- # heavy external_ids map at the CSV layer. local vif_ifaces vif_ifaces="$(${OVS_VSCTL} --columns=name --no-headings --data=bare \ find Interface external_ids:iface-id\!=\"\" 2>/dev/null \ | awk 'NF>0 {print $1}' || true)" if [[ -z "${vif_ifaces}" ]]; then log "no Neutron VIF interfaces in OVSDB" gc_state_dir "" return 0 fi # Intersect: VIF interfaces actually attached to the integration bridge. local target_ifaces target_ifaces="$(grep -Fxf <(printf '%s\n' "${br_ifaces}") <<< "${vif_ifaces}" || true)" gc_state_dir "${target_ifaces}" [[ -z "${target_ifaces}" ]] && { log "no VIF interfaces on ${bridge}"; return 0; } while IFS= read -r iface_name; do [[ -z "${iface_name}" ]] && continue # Per-interface get is safe regardless of map content. local iface_error iface_ext iface_error="$(${OVS_VSCTL} --if-exists get Interface "${iface_name}" error 2>/dev/null || echo '[]')" iface_ext="$(${OVS_VSCTL} --if-exists get Interface "${iface_name}" external_ids 2>/dev/null || echo '{}')" local stale_reason="" if [[ -n "${iface_error}" && "${iface_error}" != "[]" && "${iface_error}" != '""' ]]; then stale_reason="interface error: ${iface_error}" elif ! netdev_exists "${iface_name}"; then stale_reason="kernel netdev missing" else # Healthy now; clear any prior observation count. clear_stale_observation "${iface_name}" continue fi if [[ "${iface_ext}" == *'skip_cleanup="true"'* ]]; then log "skipping ${iface_name} (skip_cleanup=true)" continue fi candidates=$((candidates + 1)) local count count="$(record_stale_observation "${iface_name}")" if (( count < MIN_STALE_OBSERVATIONS )); then log "candidate ${iface_name} (${stale_reason}); observation ${count}/${MIN_STALE_OBSERVATIONS}, deferring" continue fi confirmed=$((confirmed + 1)) log "stale port confirmed on ${bridge}: ${iface_name} (${stale_reason}, observed ${count}x)" if [[ "${DRY_RUN}" == "1" ]]; then continue fi if (( deletions >= MAX_DELETIONS_PER_CYCLE )); then log "deletion cap (${MAX_DELETIONS_PER_CYCLE}) reached; deferring remainder to next cycle" break fi if ${OVS_VSCTL} --if-exists del-port "${bridge}" "${iface_name}"; then deletions=$((deletions + 1)) clear_stale_observation "${iface_name}" log "deleted stale port ${iface_name} from ${bridge}" else log "WARNING: failed to delete port ${iface_name} from ${bridge}" fi done <<< "${target_ifaces}" log "cycle complete: candidates=${candidates} confirmed=${confirmed} deletions=${deletions} dry_run=${DRY_RUN}" } log "starting (bridge=${INTEGRATION_BRIDGE} interval=${INTERVAL_SECONDS}s max_per_cycle=${MAX_DELETIONS_PER_CYCLE} min_obs=${MIN_STALE_OBSERVATIONS} dry_run=${DRY_RUN})" trap 'log "received termination signal, exiting"; exit 0' SIGTERM SIGINT while true; do cleanup_cycle "${INTEGRATION_BRIDGE}" || log "cycle failed (continuing)" sleep "${INTERVAL_SECONDS}" & wait $! done BinaryData ==== Events: