#!/bin/bash
#
# remctl-acl-update-flex -- Update remctl ACL files
#
# (Based on the remctl-acl-update script.)
#
# This script copies remctl ACL files into /etc/remctl/acl. The source of
# these files can either be AFS (like the original remctl-acl-update
# script) or from the newer HTTP-based remctl ACL file service. Hence the
# suffix "-flex".

# >> CHANGE: 2024-08-19 (adamhl)
# Historical note: the "systems" and "systems-root" ACL files
# -----------------------------------------------------------------------
# The "systems" ACL file is intended to control access to common non-root
# system-level remctl commands. This script also generates a
# root-principal-only version of the "systems" ACL file called
# "systems-root" to use for remctl commands that are equivalent to having
# root. In the distant past the remctl acl update script managed the two
# ACL files "systems" and "systems-root" and managed them differently if
# the script was run on a KDC server. Because these two ACL files are
# group- and organization-specific, and because we want to simplify
# things, this script no longer manages those two ACL files. If you want
# to manage thses two files you must do so yourself using Puppet (or
# whatever configuration management system you use).

# Exit on any errors.
set -e

SERVICE_URL_PROD="https://aclfile-prod.infra.stanford.edu"
SERVICE_URL_DEV="https://aclfile-dev.infra.stanford.edu"
DEST_DIR="/etc/remctl/acl"

# The path to the canonical files in AFS. This is only used when running
# in "afs" mode. Otherwise, we get the ACL files from the HTTP service.
# Note that ACL files in this directory will have .acl appended, but we
# strip that off when copying them to local disk.
AFS_SOURCE_DIR=/afs/ir.stanford.edu/pubsw/Local/config/share/etc/acl

## #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### ##
## Utility functions
exit_with_error () {
    echo "error: $1"
    exit 1
}

progress () {
    if [ "$VERBOSE" == "1" ]; then
        echo "progress: $1"
    fi
}

dryrun () {
    echo "dryrun: $1"
}

show_help () {
    progress "showing help"
BASENAME=$(basename "$0")
cat <<EOH
$BASENAME -h|--help
$BASENAME -m|--manual
$BASENAME [-v] [--dry-run] [--root-acls] [--source=afs|https] [--dest-dir=<path>] [<acl> ...]
EOH
}

show_manual () {
    progress "showing man page"
    pod2man_path=$(command -v pod2man)
    if [ ! -x "$pod2man_path" ]; then
        exit_with_error "cannot format man page: missing pod2man program"
    else
        pod2man < "$0" | man -l -
    fi
}
## #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### ##
## Command-line parsing

# We use enhanced getopt from util-linux
# https://stackoverflow.com/questions/192249/how-do-i-parse-command-line-arguments-in-bash

# CP1. Exit if enhanced getopt is not available.
! getopt --test > /dev/null
if [[ ${PIPESTATUS[0]} -ne 4 ]]; then
    exit_with_error "I am sorry, 'getopt --test' failed in this environment."
else
    progress 'enhanced getopt is available so we can continue'
fi

# CP2. Define the options, both short and long:
OPTIONS=vnmhs:p:
LONGOPTS=verbose,dry-run,all,root-acls,abort-when-missing,manual,help,source:,dest-dir:,dest-ext:,platform:,baseurl:

# CP3. Ensure there were no invalid options passed.
! PARSED=$(getopt --options=$OPTIONS --longoptions=$LONGOPTS --name "$0" -- "$@")
if [[ ${PIPESTATUS[0]} -ne 0 ]]; then
    # e.g. return value is 1
    #  then getopt has complained about wrong arguments to stdout
    exit_with_error "at least one option is invalid"
fi

# CP4. Read getopt's output this way to handle the quoting right:
eval set -- "$PARSED"

# CP5. Get all the options.
# Set their defaults"
VERBOSE="0"
DRYRUN="0"
ALL_ACLS="0"
MANUAL="0"
HELP="0"
SOURCE="afs"
PLATFORM="prod"
BASEURL=""
ROOT_ACLS="0"
DEST_EXT=""
ABORT_WHEN_MISSING="0"
# Parse the options in order and split until we see --
while true; do
    case "$1" in
        -v|--verbose)
            VERBOSE="1"
            shift
            ;;
        -h|--help)
            HELP="1"
            shift
            ;;
        -n|--dry-run)
            DRYRUN="1"
            shift
            ;;
        -m|--manual)
            MANUAL="1"
            shift
            ;;
        --root-acls)
            ROOT_ACLS="1"
            shift
            ;;
        --all)
            ALL_ACLS="1"
            shift
            ;;
        --abort-when-missing)
            ABORT_WHEN_MISSING="1"
            shift
            ;;
        -s|--source)
            SOURCE=$2
            shift 2
            ;;
        --dest-dir)
            DEST_DIR=$2
            shift 2
            ;;
        --dest-ext)
            DEST_EXT=$2
            shift 2
            ;;
        --platform)
            PLATFORM=$2
            shift 2
            ;;
        --baseurl)
            BASEURL=$2
            shift 2
            ;;
        --)
            shift
            break
            ;;
        *)
            exit_with_error "Programming error"
            ;;
    esac
done

# Grab any remaining arguments

progress "finished parsing command line options"
progress "PLATFORM:           $PLATFORM"
progress "BASEURL:            $BASEURL"
progress "SOURCE:             $SOURCE"
progress "DEST_DIR:           $DEST_DIR"
progress "VERBOSE:            $VERBOSE"
progress "MANUAL:             $MANUAL"
progress "DRYRUN:             $DRYRUN"
progress "ALL_ACLS:           $ALL_ACLS"
progress "ROOT_ACLS:          $ROOT_ACLS"
progress "DEST_EXT:           $DEST_EXT"
progress "ABORT_WHEN_MISSING: $ABORT_WHEN_MISSING"
progress "number of arguments remaining: $#"

if [ "$HELP" = "1" ]; then
    show_help
    exit 0
fi

if [ "$MANUAL" = "1" ]; then
    show_manual
    exit 0
fi

# SOURCE must be either "afs" or "https". Anything else is an error.
src_regex='^afs|https$'
if [[ ! "$SOURCE" =~ $src_regex ]]; then
    exit_with_error "source must be one of 'afs' or 'https'"
fi

# PLATFORM must be either "dev" or "prod". Anything else is an error.
src_regex='^dev|prod$'
if [[ ! "$PLATFORM" =~ $src_regex ]]; then
    exit_with_error "platform must be one of 'dev' or 'prod'"
fi

# Make sure DEST_DIR exists and is a directory.
if [[ ! -d "$DEST_DIR" ]]; then
    exit_with_error "$DEST_DIR does not exist (or is not a directory)"
fi

# If BASEURL is defined use that one, otherwise infer from PLATFORM.
if [[ "$BASEURL" =~ ^http ]]; then
    SERVICE_URL="$BASEURL"
else
    if [[ "$PLATFORM" == 'prod' ]]; then
        SERVICE_URL="$SERVICE_URL_PROD"
    elif [[ "$PLATFORM" == 'dev' ]]; then
        SERVICE_URL="$SERVICE_URL_DEV"
    else
        exit_with_error "programming error"
    fi
fi

progress "SERVICE_URL is $SERVICE_URL"

## #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### ##
## Main processing functions

# Download the file "__all_files.json" from the HTTP remctl ACL file service
# into the supplied file.
http_download_all_json () {
    local download_file

    download_file=$1
    progress "downloading __all_files.json to $download_file..."
    if [ "$VERBOSE" == "1" ]; then
        wget -nv -O "$download_file" "${SERVICE_URL}/"
    else
        wget --quiet -nv -O "$download_file" "${SERVICE_URL}/"
    fi

    progress "...finished downloading __all_files.json"
}

http_process_all_json () {
    local all_json_file
    local source_dir
    all_json_file=$1
    source_dir=$2

    progress "entering http_process_all_json"
    progress "parsing all_json file $all_json_file"
    progress "will write files to directory $source_dir"

    # Extract the remctl ACL file names into a bash array.
    declare -a names
    readarray -t names <<< "$(jq -r 'keys[]' < "$all_json_file")"

    counter=0
    for name in "${names[@]}"; do
        progress "extracting '$name' from the all JSON file"
        file_name="${source_dir}/${name}.acl"

        filter=".[\"$name\"][]"
        members_raw=$(jq -r "$filter" < "$all_json_file" | sort)

        file_name="${source_dir}/${name}.acl"

        # Convert members_raw into an array.
        # If there are no members simply write an empty file.
        if [[ -z "$members_raw" ]]; then
            touch "$file_name"
            progress "$name has no members so $file_name will be empty"
        else
            progress "members_raw is <$members_raw>"
            readarray -t members <<< "$members_raw"
        fi

        for member in "${members[@]}"; do
            if [[ -n "$member" ]]; then
                if [[ ! $member =~ @stanford.edu$ ]]; then
                    member="${member}@stanford.edu"
                fi
                echo "$member" >> "$file_name"
            else
                progress "skipping empty line"
            fi
        done
        progress "wrote members to file $file_name"
        counter=$((counter+1))
    done
    progress "wrote $counter members to directory $source_dir"
}

populate_from_https () {
    local http_download_dir
    http_download_dir=$1

    progress "download directory is $http_download_dir"

    all_json_file="${http_download_dir}/all.json"
    http_download_all_json "$all_json_file"
    http_process_all_json "$all_json_file" "$http_download_dir"
}

cleanup () {
    progress "cleaning up"
    if [[ "${SOURCE}" == "https" ]]; then
        progress "removing https download directory"
        rm -rf "$HTTP_DOWNLOAD_DIR"
    fi
}
## #### #### #### #### #### #### #### #### #### #### #### #### #### #### #### ##

## Get the list ACL files to sync.
acls=""

ACLS_FROM_COMMAND_LINE="0"
if [[ -n "$*" ]] ; then
    acls="$*"
    ACLS_FROM_COMMAND_LINE="1"
fi

# If the $ALL_ACLS flag is set then there had better not be any
# acls from the command line. If so that is an error.
if [[ "$ALL_ACLS" == "1" ]] && [[ "$ACLS_FROM_COMMAND_LINE" == "1" ]]; then
    exit_with_error "you cannot specify acls on the command line AND use the '--all' option"
fi

# If no ACL files to sync show the help screen and exit.
if [[ -z "$acls" ]] && [[ "$ALL_ACLS" != "1" ]]; then
    show_help
    exit 0
fi

## Set SOURCE_DIR and, if downloading from https, populate SOURCE_DIR.
progress "setting SOURCE_DIR..."
if [[ "${SOURCE}" == "afs" ]]; then
    progress "proceeding with SOURCE afs"
    SOURCE_DIR="${AFS_SOURCE_DIR}"
elif [[ "${SOURCE}" == "https" ]]; then
    progress "proceeding with SOURCE https"
    HTTP_DOWNLOAD_DIR=$(mktemp -d -t raf-XXXXX)
    populate_from_https "$HTTP_DOWNLOAD_DIR"
    SOURCE_DIR="${HTTP_DOWNLOAD_DIR}"
else
    exit_with_error "source '$SOURCE' not understood"
fi

progress "SOURCE_DIR is $SOURCE_DIR"

if [[ ! -d "$SOURCE_DIR" ]]; then
    exit_with_error "source directory $SOURCE_DIR does not exist"
fi

## If the --all option was given set the all_acl_files variable.
progress "setting all_acl_files..."
all_acl_files=""
for aclfile in "$SOURCE_DIR"/*.acl
do
    # Get basename of aclfile and add to list.
    aclfile_basename=${aclfile##*/}
    aclfile_basename_no_extension="${aclfile_basename%.*}"
    all_acl_files="${all_acl_files}${aclfile_basename_no_extension} "
done

# Remove trailing whitespace characters from all_acl_files.
all_acl_files="${all_acl_files%"${all_acl_files##*[![:space:]]}"}"
progress "all_acl_files is [${all_acl_files}]"

if [[ "$ALL_ACLS" == "1" ]]; then
    # Set the "acls" variable to be equal to the entire list of ACL files.
    acls="${all_acl_files}"
    progress "will copy ALL acls"
else
    progress "will copy these acls: $acls"
fi

##############################################################################
##############################################################################

# At this point we have a source directory where all the remctl ACL files
# can be found. We are now ready to copy them to the destination
# directory.

# >> CHANGE: 2024-08-19 (adamhl)
# >> We no longer have special logic for the Kerberos KDCs and other servers
# >> that might use the /admin root principal. Instead, if you want to include
# >> security ACL file you must do so yourself through Puppet (or whatever
# >> configuration management system you use).
# ------------------------------------------------------------------------
# THE FOLLOWING COMMENT IS NOW OBSOLETE (2024-08-19)
# Some systems, most notably the Kerberos KDCs, have stricter requirements
# for who has root access.  On those systems, we don't want to update all
# of the ACLs from AFS since this gives too many people access to
# root-equivalent commands like aptitude and puppet. The Kerberos KDCs
# also manage the "security" remctl ACL group, so we also skip that one.
# Detect those systems by seeing if there are admin instances in
# /root/.k5login and, if so, decline to update the operations and systems
# ACLs unless they're explicitly listed.
# END OF OBSOLETE COMMENT
# ------------------------------------------------------------------------


# If we want to abort on any missing ACL file, check the existence of the
# files FIRST.
if [[ "$ABORT_WHEN_MISSING" == "1" ]]; then
    progress "--abort-when-missing set so checking existence of files first"
    for file in $acls ; do
        if [[ ! -f "$SOURCE_DIR/$file.acl" ]]; then
            message="$SOURCE_DIR/$file.acl does not exist"
            echo "error: ${message}; will not copy ANY ACL files"
            cleanup
            exit 1
        else
            progress "file $SOURCE_DIR/$file.acl exists"
        fi
    done
fi

# At this point we know that either all the ACL files exist or, if any are missing,
# we skip the missing ones. So we can copy over the ACL files.
progress "copying all ACL files to destination directory"

for file in $acls ; do
    progress "starting to process ACL file '$file'"

    source_file="$SOURCE_DIR/$file.acl"

    # Make sure the destination file has the correct extention.
    if [[ -z "$DEST_EXT" ]]; then
        dest_file="$DEST_DIR/${file}"
    else
        dest_file="$DEST_DIR/${file}.${DEST_EXT}"
    fi

    # Does the source file exist?
    if [[ ! -f "$SOURCE_DIR/$file.acl" ]]; then
        echo "warning: $SOURCE_DIR/$file.acl does not exist; skipping"
        progress "finished processing ACL file '$file'"
        continue
    fi

    # ACL FILE: do not copy unless source and destination are different.
    file_changed="0"
    if cmp -s "$source_file" "$dest_file" ; then
        msg="not going to copy ACL file $file or ${file}-root: "
        msg+="$file has not changed"
        progress "$msg"
    else
        progress "about to copy ACL file $file"
        file_changed="1"
        if [[ "$DRYRUN" == "0" ]]; then
            cp --preserve=timestamps "$source_file" "$dest_file"
            progress "copied $source_file to $dest_file"
        else
            dryrun "would have copied $source_file to $dest_file"
        fi
    fi

    # ROOT ACL FILE: create/update rootl acl file if it does not exist or file_changed is set.
    if [[ "$ROOT_ACLS" == "1" ]]; then
        progress "--root-acls option provided so creating root principal version of the ACL file ..."

        # Make sure the acl root destination file has the correct extention.
        if [[ -z "$DEST_EXT" ]]; then
            acl_file_root_dest="${DEST_DIR}/${file}-root"
        else
            acl_file_root_dest="${DEST_DIR}/${file}-root.${DEST_EXT}"
        fi

        if [[ "$file_changed" == "1" ]] || [[ ! -f "$acl_file_root_dest" ]]; then
            set +e
            if [[ "$DRYRUN" == "0" ]]; then
                grep '/root@' "$dest_file" > "$acl_file_root_dest"
                progress "created $acl_file_root_dest"
            else
                dryrun "would have created $acl_file_root_dest"
            fi
            set -e
        else
            progress "no need to update ${file}-root"
        fi
    else
        progress "--root-acls option not provided so not managing root principal version of the ACL file ..."
    fi

    progress "finished processing ACL file '$file'"
done

cleanup

exit 0

# Documentation.  Use a hack to hide this from the shell.  Because of the
# above exit line, this should never be executed.
DOCS=<<__END_OF_DOCS__

=for stopwords
ACL ACLs AFS Allbery acl remctl-acl-update helpdesk-all pubsw remctl
timestamps

=head1 NAME

remctl-acl-update-flex - Update remctl ACLs on local disk

=head1 SYNOPSIS

B<remctl-acl-update-flex> [options] I<acl> I<acl> ...

B<remctl-acl-update-flex> [options] --all

B<remctl-acl-update-flex> [-h|--help]

B<remctl-acl-update-flex> [-m|--manual]

=head1 DESCRIPTION

For many years Stanford has made available a collection of remctl ACL
files corresponding to workgroups within Stanford. These files contain
lists of Kerberos principals of three types: regular user names, root user
names, and non-user service principal names. The advantage of this is that
rather than requiring each system and service admin to maintain their own
ACL files corresponding to a list of users in a workgroup, we manage this
centrally leveraging Stanford's preferred grouping service Stanford
Workgroups.

The collection of these remctl ACL files is maintained centrally and is
accessible via AFS (for on-campus servers and services) and via HTTPS (for
all servers and services, either on- or off-premise). The
B<remctl-acl-update-flex> script's purpose it so copy some (or all) of
these files to a local directory.

The default action is to sync the list of ACL files specified as a
space-delimited list of arguments to this shell script. This is done by
checking to see if the source version is newer than the version on local
disk. If so, copy the version from source to the directory
F</etc/remctl/acl>, preserving timestamps. The destination directory can
be changed.

The source is either AFS (the default) or the HTTPS remctl ACL file
service (used when B<--source> is set to "https").

This script only adds/updates files; no remctl files are ever I<removed>.

If the B<--all> option is given then all the ACL files are copied.

If any ACLs cannot be found (e.g., they were provided with I<acl> ...
but misspelled) then the script will skip those that are missing and write
a warning to standard output. If you want the script to abort before copying
if any ACL files are missing use the B<--abort-when-missing> option.

=head1 ROOT ACL FILES

ACL files created by the upstream ACL builder service contain a list of
service and user principals. For every regular user principal in the file
there will also be a corresponding root principal (provided that the user
I<has> a root principal). For example, the B<iedo> ACL file contains these
principals

    adamhl@stanford.edu
    adamhl/root@stanford.edu
    aguzhavi@stanford.edu
    aguzhavi/root@stanford.edu
    alanxge@stanford.edu
    bnbarnes@stanford.edu
    bnbarnes/root@stanford.edu
    lonlone@stanford.edu
    lonlone/root@stanford.edu
    pjudd@stanford.edu
    spinto@stanford.edu
    spinto/root@stanford.edu

Depending on how this script is called an extra ACL file based on the
regular ACL file, called the "root" ACL file, will be created. For the above
example, the root ACL file will be called B<iedo-root> and will contain
I<only> the root principals:

    adamhl/root@stanford.edu
    aguzhavi/root@stanford.edu
    bnbarnes/root@stanford.edu
    lonlone/root@stanford.edu
    spinto/root@stanford.edu

To get root ACL files for the supplied ACL files provide the
B<--root-acls> option. If you want some of your ACL files to get root ACL
file versions and some to I<not> get root ACL files run
B<remctl-acl-update-flex> twice, once with the B<--root-acls> option, and
once without. See the EXAMPLES section for an example of this.

=head1 OPTIONS

=over 4

=item B<--root-acls>

If this option is provided then both the non-root and root version of the
ACL file is written to B<dest-dir>. For example, the result of
C<remctl-acl-update-flex --root-acls tcg-staff> will be that both the
F<tcg-staff> and F<tcg-staff-root> ACL files will be written to
B<dest-dir>.

=item B<--source>=afs|https

Use the B<--source> option to specify which source you want to use for the
ACL file downloads: AFS or the HTTPS service. Both sources have the same
files. The default is C<afs>. Supplying any other value than "afs" or
"https" will result in an error.

=item B<--dest-dir>=<path>

Normally this script will write the remctl ACL files to the directory
B</etc/remctl/acl>. To override this use the B<--dest-dir> option. For
example, to write the remctl ACL files to F</tmp/acl> use
"B<--dest-dir>=F</tmp/acl>".  Note that if the destination directory does
not already exist the script will exit with an error.

=item B<--dest-ext>=<ext>

When this script creates the remctl ACL files the files have no extension.
For example, the F<its-idg> ACL file has the name "its-idg". However,
there may be times when you want the ACL files to have a common extension.
To do this supply the B<--dest-ext> option. For example, to have all
generated ACL files have the extension ".acl" use the option
C<--dest-ext=acl>.

=item B<--all>

Copy I<all> remctl ACL files. Do not use this option and also specify ACL
files on the command line; doing so will result in an error. Furthermore,
when using the B<--all> option the filenames created will have the ".acl"
extension. This is in order to be compatible with the legacy aclbuilder
process.

=item B<--abort-when-missing>

Normally this script skips any missing ACL files. If you supply
B<--abort-when-missing> then the script first checks that all the ACL
files exists before copying; if any are missing it aborts without copying
any files.

=item B<--dry-run|-n>

Run in "dry-run" mode. This mode downloads the file to a temporary
directory (just like normal mode) but does B<not> write them to the
destination directory. Use with the B<--verbose> option when debugging.

=item B<--platform>=dev|prod

(Only relevant when the source is "https".)
Normally this program will download the remctl ACL files from the
production remctl ACL file storage bucket. Howevever, if you want to
download them from the I<non-production> bucket, use this option like so:
B<--platform=dev>. This option only recognizes two values: "prod" or "dev".
See also EXAMPLES below.

=item B<--baseurl>=<url>

(Only relevant when the source is "https".)
The script normally infers the remctl ACL file service URL from the
B<--platform> option, but you can override this and specify the URL
yourself by using the B<--baseurl> option.

=item B<--verbose|-v>

Run in verbose mode.

=item B<--help|-h>

Show a help summary and exit.

=item B<--manual|-m>

Show the man page and exit. Will only work if pod2man is installed.

=back

=head1 EXAMPLES

To update the ACL files "coreinfra", "its-idg", and "as-cia":

    remctl-acl-update-flex coreinfra its-idg as-cia

To update the ACL files "coreinfra", "its-idg", and "as-cia" using HTTPS
as the source:

    remctl-acl-update-flex coreinfra its-idg as-cia --source=https

To update the ACL files "coreinfra", "its-idg", and "as-cia" and to generate
the corresponding root ACL files as well:

    remctl-acl-update-flex coreinfra its-idg as-cia --root-acls

The above will update/generate the following six ACL files: "coreinfra", "its-idg", "as-cia",
"coreinfra-root", "its-idg-root", and "as-cia-root".

If you want to update the ACL files "coreinfra", "its-idg", and "as-cia"
but only generate the corresponding root ACL files for "coreinfra" and
"its-idg" and not for "as-cia" run the script twice:

    remctl-acl-update-flex coreinfra its-idg --root-acls
    remctl-acl-update-flex as-cia

To update from https using the non-production storage bucket:

    remctl-acl-update-flex coreinfra its-idg as-cia --root-acls --source=https --platform=dev

To update ALL the remctl files:

    remctl-acl-update-flex -all


=head1 FILES

=over 4

=item F</afs/ir.stanford.edu/pubsw/Local/config/share/etc/acl>

The directory in pubsw that is checked for newer copies of the ACLs. If
this directory doesn't exist, the script exits quietly without changing
anything. The ACLs in this directory are expected to end in C<.acl> but
are copied over without that extension. Note that this directory is
*not* used when the B<--source> option is set to "https".

=item F</etc/remctl/acl>

The local directory where the ACL files are stored (unless overridden by
the B<--dest-dir> option).

=back

=head1 AUTHOR

Russ Allbery <rra@stanford.edu>

Updated by Adam H. Lewenberg <adamhl@stanford.edu>

=cut

__END_OF_DOCS__
