Files
rancher/cleanup/ad-guid-unmigration.sh
2023-08-30 08:52:02 -04:00

281 lines
8.0 KiB
Bash
Executable File

#!/bin/bash
# set -x
set -e
# Text to display in the banner
banner_text="This utility will go through all Rancher users and perform an Active Directory lookup using
the configured service account to get the user's distinguished name. Next, it will perform lookups inside Rancher
for all the user's Tokens, ClusterRoleTemplateBindings, and ProjectRoleTemplateBindings. If any of those objects,
including the user object itself are referencing a principalID based on the GUID of that user, those objects will be
updated to reference the distinguished name-based principalID (unless the utility is run with -dry-run, in that case
the only results are log messages indicating the changes that would be made by a run without that flag).
This utility will also detect and correct the case where a single ActiveDirectory GUID is mapped to multiple Rancher
users. That condition was likely caused by a race in the original migration to use GUIDs and resulted in a second
Rancher user being created. This caused Rancher logins to fail for the duplicated user. The utility remedies
that situation by mapping any tokens and bindings to the original user before removing the newer user, which was
created in error.
It is also important to note that migration of ClusterRoleTemplateBindings and ProjectRoleTemplateBindings require
a delete/create operation rather than an update. This will result in new object names for the migrated bindings.
A label with the former object name will be included in the migrated bindings.
The Rancher Agent image to be used with this utility can be found at rancher/rancher-agent:v2.7.6
It is recommended that you perform a Rancher backup prior to running this utility."
CLEAR='\033[0m'
RED='\033[0;31m'
# cluster resources, including the service account used to run the script
cluster_resources_yaml=$(cat << 'EOF'
apiVersion: v1
kind: ServiceAccount
metadata:
name: cattle-cleanup-sa
namespace: cattle-system
labels:
rancher-cleanup: "true"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: cattle-cleanup-binding
labels:
rancher-cleanup: "true"
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cattle-cleanup-role
subjects:
- kind: ServiceAccount
name: cattle-cleanup-sa
namespace: cattle-system
---
apiVersion: batch/v1
kind: Job
metadata:
name: cattle-cleanup-job
namespace: cattle-system
labels:
rancher-cleanup: "true"
spec:
backoffLimit: 6
completions: 1
parallelism: 1
selector:
template:
metadata:
creationTimestamp: null
spec:
containers:
- env:
- name: AD_GUID_CLEANUP
value: "true"
#dryrun - name: DRY_RUN
#dryrun value: "true"
#deletemissing - name: AD_DELETE_MISSING_GUID_USERS
#deletemissing value: "true"
#debug - name: RANCHER_DEBUG
#debug value: "true"
image: agent_image
imagePullPolicy: Always
command: ["agent"]
name: cleanup-agent
resources: {}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
dnsPolicy: ClusterFirst
restartPolicy: OnFailure
schedulerName: default-scheduler
securityContext: {}
serviceAccountName: cattle-cleanup-sa
terminationGracePeriodSeconds: 30
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: cattle-cleanup-role
labels:
rancher-cleanup: "true"
rules:
- apiGroups:
- '*'
resources:
- '*'
verbs:
- '*'
- nonResourceURLs:
- '*'
verbs:
- '*'
EOF
)
# Agent image to use in the yaml file
agent_image="$1"
show_usage() {
if [ -n "$1" ]; then
echo -e "${RED}👉 $1${CLEAR}\n";
fi
echo "Usage: $0 AGENT_IMAGE [OPTIONS]"
echo ""
echo "Options:"
echo -e "\t-h, --help Display this help message"
echo -e "\t-n, --dry-run Display the resources that would be updated without making changes"
echo -e "\t-d, --delete-missing Permanently remove user objects whose GUID cannot be found in Active Directory"
echo -e "\t-v, --debug Run with extra debug verbosity"
}
display_banner() {
local text="$1"
local border_char="="
local text_width=$(($(tput cols)))
local border=$(printf "%${text_width}s" | tr " " "$border_char")
echo "$border"
printf "%-${text_width}s \n" "$text"
echo "$border"
echo "Dry run: $dry_run"
echo "Delete missing: $delete_missing"
echo "Agent image: $agent_image"
echo "Debug: $debug_mode"
if [[ "$dry_run" = true ]] && [[ "$delete_missing" = true ]]
then
echo "Setting the dry-run option to true overrides the delete-missing option. NO CHANGES WILL BE MADE."
fi
echo "$border"
}
OPTS=$(getopt -o hndv -l help,dry-run,delete-missing,debug -- "$@")
if [ $? != 0 ]; then
show_usage "Invalid option"
exit 1
fi
eval set -- "$OPTS"
dry_run=false
delete_missing=false
debug_mode=false
while true; do
case "$1" in
-h | --help)
show_usage
exit 0
;;
-n | --dry-run)
dry_run=true
shift
;;
-d | --delete-missing)
delete_missing=true
shift
;;
-v | --debug)
debug_mode=true
shift
;;
--)
shift
break
;;
*)
show_usage "Invalid option"
exit 1
;;
esac
done
shift "$((OPTIND - 1))"
# Ensure AGENT_IMAGE is provided
if [ $# -lt 1 ]; then
show_usage "AGENT_IMAGE is a required argument"
exit 1
fi
display_banner "${banner_text}"
if [ "$dry_run" != true ]
then
# Check the Rancher version before doing anything.
# If it is v2.7.5, make it clear that configuration is not the recommended way to run this utility.
rancher_version=$(kubectl get settings server-version --template='{{.value}}')
if [ "$rancher_version" = "v2.7.5" ]; then
echo -e "${RED}IT IS NOT RECOMMENDED TO RUN THIS UTILITY AGAINST RANCHER VERSION v2.7.5${CLEAR}"
echo -e "${RED}IF RANCHER v.2.7.5 RESTARTS AFTER RUNNING THIS UTILITY, IT WILL UNDO THE EFFECTS OF THIS UTILITY.${CLEAR}"
echo -e "${RED}IF YOU DO WANT TO RUN THIS UTILITY, IT IS RECOMMENDED THAT YOU MAKE A BACKUP PRIOR TO CONTINUING.${CLEAR}"
read -p "Do you want to continue? (y/n): " choice
if [[ ! $choice =~ ^[Yy]$ ]]; then
echo "Exiting..."
exit 0
fi
fi
fi
read -p "Do you want to continue? (y/n): " choice
if [[ ! $choice =~ ^[Yy]$ ]]; then
echo "Exiting..."
exit 0
fi
# apply the provided rancher agent
yaml=$(sed -e 's=agent_image='"$agent_image"'=' <<< $cluster_resources_yaml)
if [ "$dry_run" = true ]
then
# Uncomment the env var for dry-run mode
yaml=$(sed -e 's/#dryrun // ' <<< "$yaml")
elif [ "$delete_missing" = true ]
then
# Uncomment the env var for missing user cleanup
yaml=$(sed -e 's/#deletemissing // ' <<< "$yaml")
fi
if [ "$debug_mode" = true ]
then
# Uncomment the env var for debug logging
yaml=$(sed -e 's/#debug // ' <<< "$yaml")
fi
echo "$yaml" | kubectl apply -f -
# Get the pod ID to tail the logs
retry_interval=1
max_retries=10
retry_count=0
pod_id=""
while [ $retry_count -lt $max_retries ]; do
pod_id=$(kubectl --namespace=cattle-system get pod -l job-name=cattle-cleanup-job -o jsonpath="{.items[0].metadata.name}")
if [ -n "$pod_id" ]; then
break
else
sleep $retry_interval
((retry_count++))
fi
done
# 600 is equal to 5 minutes, because the sleep interval is 0.5 seconds
job_start_timeout=600
declare -i count=0
until kubectl --namespace=cattle-system logs $pod_id -f
do
if [ $count -gt $job_start_timeout ]
then
echo "Timeout reached, check the job by running kubectl --namespace=cattle-system get jobs"
echo "To cleanup manually, you can run:"
echo " kubectl --namespace=cattle-system delete serviceaccount,job -l rancher-cleanup=true"
echo " kubectl delete clusterrole,clusterrolebinding -l rancher-cleanup=true"
exit 1
fi
sleep 0.5
count+=1
done
# Cleanup after it completes successfully
echo "$yaml" | kubectl delete -f -