Not one of my own, rather a colleague came up with the following to improve the speed of my slow `while` loop. It takes advantage of the `parallel` command by turning the task for extracting the paths and making the actual copy into a function which can then be farmed out to multiple processes, taking input from the same file. #!/bin/bash # # Description : This script takes a single file that lists loadable sources on s3 and exports # the compiled text files to the directory in which it is invoked, preserving # the scope/user_id/imei directory structure. # The copy is parallelised with the GNU command "parallel". The number of processors # used for the copy is 16 by default and can be modified with the -j option. # See https://www.gnu.org/software/parallel/ for more information. # Note that parallel can be installed in a conda environment with: # conda install parallel # # set -e set -o pipefail usage() { echo "usage: aws-export.sh [ -f LOADABLE_SOURCE_FILE ] [ -o OUTPUT_DIR ] [ -j JOBS ]" } while getopts 'f:o:j:?h' option do case $option in f) LOADABLE_SRC=${OPTARG};; o) OUTPUT_DIR=${OPTARG};; j) JOBS=${OPTARG};; h|?) usage exit 0;; esac done echo "########################################" echo "Exporting compiled text files from AWS" echo "########################################" echo "Date : $(date)" echo "Host : $(hostname)" echo "########################################" echo "" echo "" if [ -z "$LOADABLE_SRC" ]; then echo "You have not specified a file that lists compiled text with the '-f' flag." echo "Please enter the path to such a file : " read -r LOADABLE_SRC fi if [ -z "$OUTPUT_DIR" ]; then OUTPUT_DIR='.' echo "########################################" echo "Exporting files to current directory" echo "########################################" echo "" fi if [ -z "$JOBS" ]; then JOBS=16 fi echo "*** Parallel execution using $JOBS CPU threads ***" echo "" aws_copy() { echo "Copying file $1" SUB_DIR=$(echo "$1" | awk '{split($0, path, "/"); print path[4]"/"path[5]"/"path[6]"/"}') echo "Target is : $2/$SUB_DIR" aws s3 cp "$1" "$2/$SUB_DIR" 2>/dev/null ; \ echo "########################################" } # Set timer to zero SECONDS=0 export -f aws_copy parallel -j $JOBS aws_copy {} $OUTPUT_DIR < "$LOADABLE_SRC" DURATION=$SECONDS echo "" echo "" echo "Export took : $DURATION seconds" echo "Files exported to : $OUTPUT_DIR" echo "########################################"