Fix Octavia / Amphora VMs

Fix broken/out Amphora VMs / Ports

Fix broken/out-of-sync amphorae
# check LB status
openstack loadbalancer show ${LB_ID}
# check amphorae status
openstack loadbalancer amphora list --loadbalancer ${LB_ID}
DB_PASS=$(grep octavia_database_password /etc/kolla/passwords.yml | cut -d " " -f2)
# check DB status
mysql --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="select * from amphora where load_balancer_id='${LB_ID}'"
# ensure LB provisioning_status is ERROR
mysql --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="update load_balancer set provisioning_status = 'ERROR' where id = '${LB_ID}'"
# OPTIONAL: delete standalone amphora entries
# delete from amphora where role = "STANDALONE" and load_balancer_id="${LB_ID}";
# check vrrp ports
for i in $(openstack port list -f value | grep octavia-lb-vrrp| awk '{ print $1}'); do
    echo $i; openstack port show $i -c allowed_address_pairs -f value
done > /tmp/vrrp.txt
grep -B1 ${LB_HA_IP} /tmp/vrrp.txt
# check whather LB port exists
grep -B1 ${LB_HA_IP} /tmp/vrrp.txt | egrep '^[a-Z0-9]+' | \
    xargs -L1 openstack port show
# check if both VRRP ports exist and got a MASTER and a BACKUP entry in the DB
# "recover" a DELETED master or backup amphorae and myke sure to use a unallocated fake IP for lb_network_ip
mysql --port=6033 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="update amphora set status='ALLOCATED', lb_network_ip='' where id='${PORT_ID}'";
# failover the amphorae
openstack loadbalancer amphora failover ${LB_AMPHORA_ID}
# OPTIONAL: restore/recreate vrrp port
openstack port create --network 6808046d-9f65-41ab-a8c1-5386d64043d1 octavia-lb-vrrp-$(uuidgen) --fixed-ip ip-address='',subnet='997bc4ce-ae60-4133-9419-c1220cd6347a' --allowed-address ip-address=

Find broken Octavia Loadbalancer / Amphora VMs
The "loadbalancer amphora failover" command are avaiable since OSC Stein release: but the python-octaviaclient 1.7 OSC also works with OS Rocky realase. (pip install -U "python-octaviaclient==1.7.0")

DB_PASS=$(grep octavia_database_password /etc/kolla/passwords.yml | cut -d " " -f2)
LB_IDS=$(openstack loadbalancer amphora list --status ERROR -c loadbalancer_id -f value | sort | uniq)
for LB_ID in ${LB_IDS}; do
    # update LB status to ACTIVE
    mysql --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
        --execute="update load_balancer set provisioning_status='ACTIVE' where id='${LB_ID}';"
    # get amphora ID
    AMPHORA_IDS=$(openstack loadbalancer amphora list --status ERROR --loadbalancer ${LB_ID} -c id -f value)
    	# failover broken amphora VMs
	    openstack loadbalancer amphora failover ${AMPHORA_ID}
	    sleep 60

Manual debug Amphora VMs

# List broken LoadBalancer instances
openstack loadbalancer list --provisioning-status ERROR
openstack loadbalancer list --provisioning-status PENDING_UPDATE
# List broken Loadbalancer VMs
openstack loadbalancer amphora list --provisioning-status ERROR
openstack loadbalancer amphora list --role STANDALONE

Show Loadbalancer state

# List LoadBalancer details
openstack loadbalancer show ${LB_ID}
# List LoadBalancer VMs details
openstack loadbalancer amphora list --loadbalancer ${LB_ID}
# show project
PROJECT_ID=$(openstack loadbalancer show -c project_id -f value ${LB_ID})
openstack project show -c name -f value ${PROJECT_ID}
# show domain
DOMAIN_ID=$(openstack project show -c domain_id -f value ${PROJECT_ID})
openstack domain show -c name -f value ${DOMAIN_ID}

Manual update provisioning_status from PENDING_UPDATE / ERROR state to ACTIVE in Octavia Database
Currently there is no OpenStack CLI command to change the provisioning_status with the CLI

DB_PASS=$(grep octavia_database_password /etc/kolla/passwords.yml | cut -d " " -f2)
mysql --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="update load_balancer set provisioning_status = 'ACTIVE' where id = '${LB_ID}'"

Trigger manual LoadBalancer failover
This will cause a short interruption of the LB service since both VMs will by recreated

# manually failover loadbalancer
openstack loadbalancer failover ${LB_ID}
# manually failover amphora vm
AMPHORA_ID=$(openstack loadbalancer amphora list --loadbalancer ${LB_ID} --status ERROR -c id -f value)
openstack loadbalancer amphora failover ${AMPHORA_ID}

Check LoadBalancer state

# check LoadBalancer state
openstack loadbalancer show ${LB_ID}
# check LoadBalancer VMs state
openstack loadbalancer amphora list --loadbalancer ${LB_ID}


# list all loadbalancer instances
openstack loadbalancer list
# list all Amphora instances
openstack loadbalancer amphora list
# List all Amphora VMs
openstack server list --all --long --name amphora
# get LoadBalancer count
openstack loadbalancer list -c id -f value | wc -l
# get Amphora VMs count
openstack loadbalancer amphora list -c loadbalancer_id -f value | wc -l
openstack server list --all --long --name amphora -c Name -f value | wc -l
# restart LB VM
openstack server reboot --soft amphora-${AMPHORA_ID}
# show LB VM console
openstack console log show amphora-${AMPHORA_ID}
# check amphora vs. server VM
openstack server list --all-projects --name amphora -c Name -f value | cut -d "-" -f2- | sort > /tmp/amphora_vms
openstack loadbalancer amphora list --status ALLOCATED -c id -f value | sort > /tmp/amphora_lb
diff /tmp/amphora_vms /tmp/amphora_lb
openstack loadbalancer amphora list | grep 558196ab-6215-4f4d-a4e8-9eee2a300d33
openstack server show amphora-558196ab-6215-4f4d-a4e8-9eee2a300d33
openstack loadbalancer amphora failover 558196ab-6215-4f4d-a4e8-9eee2a300d33