Fix Octavia / Amphora VMs

Fix broken/out Amphora VMs / Ports
http://eavesdrop.openstack.org/irclogs/%23openstack-lbaas/%23openstack-lbaas.2019-05-17.log.html

Fix broken/out-of-sync amphorae
# check LB status
LB_ID=b98ea4bb-0631-4be9-a9e8-8841bf56bf2f
openstack loadbalancer show ${LB_ID}
 
# check amphorae status
openstack loadbalancer amphora list --loadbalancer ${LB_ID}
 
DB_PASS=$(grep octavia_database_password /etc/kolla/passwords.yml | cut -d " " -f2)
 
# check DB status
mysql --host=db.service.i.example.com --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="select * from amphora where load_balancer_id='${LB_ID}'"
 
# ensure LB provisioning_status is ERROR
mysql --host=db.service.i.example.com --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="update load_balancer set provisioning_status = 'ERROR' where id = '${LB_ID}'"
 
# OPTIONAL: delete standalone amphora entries
# delete from amphora where role = "STANDALONE" and load_balancer_id="${LB_ID}";
 
# check vrrp ports
for i in $(openstack port list -f value | grep octavia-lb-vrrp| awk '{ print $1}'); do
    echo $i; openstack port show $i -c allowed_address_pairs -f value
done > /tmp/vrrp.txt
 
LB_HA_IP=10.0.0.26
grep -B1 ${LB_HA_IP} /tmp/vrrp.txt
 
# check whather LB port exists
grep -B1 ${LB_HA_IP} /tmp/vrrp.txt | egrep '^[a-Z0-9]+' | \
    xargs -L1 openstack port show
 
# check if both VRRP ports exist and got a MASTER and a BACKUP entry in the DB
# "recover" a DELETED master or backup amphorae and myke sure to use a unallocated fake IP for lb_network_ip
PORT_ID=ad73e7f0-bf78-442e-adf3-8cbf98dd805c
mysql --host=db.service.i.example.com --port=6033 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="update amphora set status='ALLOCATED', lb_network_ip='172.16.200.85' where id='${PORT_ID}'";
 
# failover the amphorae
LB_AMPHORA_ID=3db411b1-4b20-429f-80a5-3cb7c451876b
openstack loadbalancer amphora failover ${LB_AMPHORA_ID}
 
# OPTIONAL: restore/recreate vrrp port
openstack port create --network 6808046d-9f65-41ab-a8c1-5386d64043d1 octavia-lb-vrrp-$(uuidgen) --fixed-ip ip-address='10.0.0.40',subnet='997bc4ce-ae60-4133-9419-c1220cd6347a' --allowed-address ip-address=10.0.0.18

Find broken Octavia Loadbalancer / Amphora VMs
The "loadbalancer amphora failover" command are avaiable since OSC Stein release: https://docs.openstack.org/releasenotes/python-octaviaclient/stein.html but the python-octaviaclient 1.7 OSC also works with OS Rocky realase. (pip install -U "python-octaviaclient==1.7.0")

DB_PASS=$(grep octavia_database_password /etc/kolla/passwords.yml | cut -d " " -f2)
 
LB_IDS=$(openstack loadbalancer amphora list --status ERROR -c loadbalancer_id -f value | sort | uniq)
for LB_ID in ${LB_IDS}; do
    # update LB status to ACTIVE
    mysql --host=db.service.stage.i.example.com --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
        --execute="update load_balancer set provisioning_status='ACTIVE' where id='${LB_ID}';"
 
 
    # get amphora ID
    AMPHORA_IDS=$(openstack loadbalancer amphora list --status ERROR --loadbalancer ${LB_ID} -c id -f value)
 
	for AMPHORA_ID in ${AMPHORA_IDS}; do
    	# failover broken amphora VMs
	    openstack loadbalancer amphora failover ${AMPHORA_ID}
 
	    sleep 60
	done
done

Manual debug Amphora VMs

# List broken LoadBalancer instances
openstack loadbalancer list --provisioning-status ERROR
openstack loadbalancer list --provisioning-status PENDING_UPDATE
 
# List broken Loadbalancer VMs
openstack loadbalancer amphora list --provisioning-status ERROR
openstack loadbalancer amphora list --role STANDALONE

Show Loadbalancer state

LB_ID=0ce30f0e-1d75-486c-a09f-79125abf44b8
 
# List LoadBalancer details
openstack loadbalancer show ${LB_ID}
 
# List LoadBalancer VMs details
openstack loadbalancer amphora list --loadbalancer ${LB_ID}
 
# show project
PROJECT_ID=$(openstack loadbalancer show -c project_id -f value ${LB_ID})
openstack project show -c name -f value ${PROJECT_ID}
 
# show domain
DOMAIN_ID=$(openstack project show -c domain_id -f value ${PROJECT_ID})
openstack domain show -c name -f value ${DOMAIN_ID}

Manual update provisioning_status from PENDING_UPDATE / ERROR state to ACTIVE in Octavia Database
Currently there is no OpenStack CLI command to change the provisioning_status with the CLI

DB_PASS=$(grep octavia_database_password /etc/kolla/passwords.yml | cut -d " " -f2)
 
mysql --host=db.service.example.com --port=3306 --database=octavia --user=octavia --password=${DB_PASS} \
    --execute="update load_balancer set provisioning_status = 'ACTIVE' where id = '${LB_ID}'"

Trigger manual LoadBalancer failover
This will cause a short interruption of the LB service since both VMs will by recreated

# manually failover loadbalancer
openstack loadbalancer failover ${LB_ID}
 
# manually failover amphora vm
AMPHORA_ID=$(openstack loadbalancer amphora list --loadbalancer ${LB_ID} --status ERROR -c id -f value)
openstack loadbalancer amphora failover ${AMPHORA_ID}

Check LoadBalancer state

# check LoadBalancer state
openstack loadbalancer show ${LB_ID}
 
# check LoadBalancer VMs state
openstack loadbalancer amphora list --loadbalancer ${LB_ID}

Debug

# list all loadbalancer instances
openstack loadbalancer list
 
# list all Amphora instances
openstack loadbalancer amphora list
 
# List all Amphora VMs
openstack server list --all --long --name amphora
 
# get LoadBalancer count
openstack loadbalancer list -c id -f value | wc -l
 
# get Amphora VMs count
openstack loadbalancer amphora list -c loadbalancer_id -f value | wc -l
openstack server list --all --long --name amphora -c Name -f value | wc -l
 
# restart LB VM
openstack server reboot --soft amphora-${AMPHORA_ID}
 
# show LB VM console
openstack console log show amphora-${AMPHORA_ID}
 
# check amphora vs. server VM
openstack server list --all-projects --name amphora -c Name -f value | cut -d "-" -f2- | sort > /tmp/amphora_vms
openstack loadbalancer amphora list --status ALLOCATED -c id -f value | sort > /tmp/amphora_lb
diff /tmp/amphora_vms /tmp/amphora_lb
 
openstack loadbalancer amphora list | grep 558196ab-6215-4f4d-a4e8-9eee2a300d33
openstack server show amphora-558196ab-6215-4f4d-a4e8-9eee2a300d33
 
openstack loadbalancer amphora failover 558196ab-6215-4f4d-a4e8-9eee2a300d33