Efficiently update (copy) only very few files to Google drive

Thanks. I wrote a bash script that determines the list of files to upload by comparing the local and the remote listings based on my original thought to use sqlite to compare (instead of filtering for --max-age=24h as you suggested, as this would not work in my case; see previous reply).

The script will likely need further tweaking. I will update as appropriate.

To use the script, copy the below to a file (I named the script fastsync.sh), give it execute permission, and run it as ./fastsync.sh --help. The script should print instructions.

#!/bin/bash
set -o errexit
set -o nounset

### user modifiable ###
# set aliases to use alternative program versions
#alias rclone="/opt/rclone/rclone"
#alias tar=
#alias sqlite3=
### end user modifiable ###

usage() {
   local -a -r exitcodes=( "no error" 
                           "commandline error"
                           "unconfigured backend" )
   [ "$1" -gt 0 ] && echo "Exit code $1 - ${exitcodes[$1]}"
   echo -e "
   usage: $( basename "$0" ) [ <flags> ] copy|sync [--] source destination
      Supported flags:
      --help             This usage text
      --progress         Prints stages of script progress to stdout
      -v, -vv, -vvv      Prints debug messages to errout; more v -> more details
      --version          Prints version of this script and of used tools
      --filter-from <filename>
      $( printf "%s\n      " "${supportedflags[@]}" )"
   echo "See rclone documentation for meaning of flags and commands.
   
   2021-04-05 - script released into the public domain. No warranty.
   Send bug reports and requests for additional flags to:
   <software at quantentunnel dot de>
   and mention fastsync in the subject line to bypass spam filter."
   exit $1
}

version() {
   echo "$( basename "$0" ) 1.0.4" # rcs version number (major.2-digit minor)
   rclone version | head -1
   echo -n 'sqlite3 ' && sqlite3 --version
   tar --version | head -1
}

# parse commandline
declare -r -a supportedflags=( --skip-links --dry-run )
declare -i progress=0
declare -i verbose=0
declare -a parameters=( )
declare -a flags=( )
declare filterfrom=''
while [ $# -gt 0 ] ; do
   case "$1" in
      --help)
         usage 0
         ;;
      --progress)
         date
         echo "Parsing and validating commandline ..."
         progress=1
         ;;
      -v)
         verbose=1
         ;;
      -vv)
         verbose=2
         ;;
      -vvv)
         verbose=3
         ;;
      --filter-from)
         shift
         filterfrom="$1"
         ;;
      --version)
         version
         exit 0
         ;;
      --?*)
         [[ " ${supportedflags[*]} " =~ " $1 " ]] || \
            { echo "Unsupported flag: $1" ; usage 1 ; }
         flags+=( $1 )
         ;;
      --)
         shift
         break
         ;;
      *)
         parameters+=( $1 )
         ;;
   esac
   shift
done         
parameters+=( "$@" )
[ ${verbose} -ge 2 ] && version
[ ${#parameters[@]} -gt 3 ] && \
   echo -e "Too many positional parameters: ${parameters[@]}\n" && usage 1
[ ${#parameters[@]} -lt 3 ] && \
   echo -e "Too few positional parameters: ${parameters[@]}\n" && usage 1
declare -r command="${parameters[0]}"
declare -r source="${parameters[1]}"
declare -r destination="${parameters[2]}"
[ "${command}" = 'copy' ] || [ "${command}" = 'sync' ] || usage 1
for i in {1..2} ; do
   [ ${verbose} -ge 2 ] && [[ -d "${parameters[$i]}" ]] && \
      echo "${parameters[$i]} is a local directory"
   [ ${verbose} -ge 2 ] && rclone listremotes |& \
      grep --regexp="^${parameters[$i]%%:*}:$" > /dev/null && \
      echo "${parameters[$i]} is a configured rclone backend"
   [[ -d "${parameters[$i]}" ]] || \
      rclone listremotes |& \
      grep --regexp="^${parameters[$i]%%:*}:$" >/dev/null || \
      { echo "${parameters[$i]} is unknown"; usage 2; }
   [ ${verbose} -gt 1 ] && \
      >&2 echo "'${parameters[$i]}' (or '${parameters[$i]%%:*}:') is valid"
done
[ ${verbose} -gt 0 ] && \
   >&2 echo "${command} from '${source}' to '${destination}'"
[ ${verbose} -gt 0 ] && \
   >&2 echo -e "progress=${progress} verbose=${verbose} ${flags[*]}
   filter-from='${filterfrom}'"

[ ${progress} -ne 0 ] && echo "Prepare sqlite db as cache ..."
declare -r db="$( mktemp --suffix='.db' --tmpdir fastsync-XXXXXX )"
[ ${verbose} -gt 0 ] && >&2 echo "Metadata cache will be in '${db}'"
sqlite3 -batch "${db}" <<"EOF"
   CREATE TABLE source(filesize INTEGER NOT NULL,
                       filetime TEXT NOT NULL,
                       filepath TEXT NOT NULL UNIQUE);
   CREATE TABLE dest  (filesize INTEGER NOT NULL,
                       filetime TEXT NOT NULL,
                       filepath TEXT NOT NULL UNIQUE);
   CREATE VIEW copy AS          -- files that require source -> dest
      SELECT s.filepath
         FROM source s LEFT JOIN dest d ON s.filepath=d.filepath
         WHERE s.filesize != d.filesize 
            OR d.filesize IS NULL 
            OR s.filetime > d.filetime;
   CREATE VIEW sync AS          -- also files that need to be deleted on dest
      SELECT filepath FROM copy
      UNION ALL
      SELECT s.filepath
         FROM dest d LEFT JOIN source s ON d.filepath=s.filepath
         WHERE s.filepath IS NULL;
EOF

[ ${progress} -ne 0 ] && echo "Retrieving metadata from source (${source}) ..."
rclone lsf ${flags[@]} \
       $( [ -n "${filterfrom}" ] && echo "--filter-from ${filterfrom}" ) \
       --recursive --csv --files-only --format "stp" "${source}" \
       | sqlite3 -csv "${db}" ".import '|cat -' source"
[ ${verbose} -ge 1 ] && \
   >&2 echo "Metadata for $( sqlite3 "${db}" 'SELECT count(*) FROM source' )\
   files retrieved from ${source}"
[ ${verbose} -ge 3 ] && \
   >&2 sqlite3 -separator ' ' -header "${db}" 'SELECT * FROM source'

[ ${progress} -ne 0 ] && \
   echo "Retrieving metadata from destination (${destination}) ..."
rclone lsf --format "stp" --csv --recursive --files-only "${destination}" \
   | sqlite3 -csv "${db}" ".import '|cat -' dest"
[ ${verbose} -ge 1 ] && \
   >&2 echo "Metadata for $( sqlite3 "${db}" 'SELECT count(*) FROM dest' )\
   files retrieved from ${destination}"
[ ${verbose} -ge 3 ] && \
   >&2 sqlite3 -separator ' ' -header "${db}" 'SELECT * FROM dest'

[ ${progress} -ne 0 ] && \
   echo "Identifying the files requiring update for ${command} ..."
declare -r filelist="$( mktemp --suffix='.list' --tmpdir fastsync-XXXXXX )"
sqlite3 "${db}" "SELECT * FROM ${command}" > "${filelist}"
[ ${verbose} -ge 1 ] && \
   >&2 echo "$( sqlite3 "${db}" "SELECT count(*) FROM ${command}" )\
   files require updating (stored in '${filelist}')"
[ ${verbose} -ge 2 ] && \
   >&2 echo -e "For reference:
   $( sqlite3 "${db}" "SELECT count(*) FROM copy" ) for 'rclone copy'
   $( sqlite3 "${db}" "SELECT count(*) FROM sync" ) for 'rclone sync'"

[ ${progress} -ne 0 ] && echo "Executing rclone ${command} ..."
rclone ${command} ${flags[@]} --no-traverse --files-from "${filelist}" \
       $( [ ${progress} -ne 0 ] && echo "--progress" ) \
       $( [ ${verbose} -ge 2 ] && echo "-v" ) \
       -- "${source}" "${destination}"

[ ${progress} -ne 0 ] && echo "Cleaning up ..."
[ ${verbose} -ge 2 ] && \
   >&2 echo "Keeping '${db}' and '${filelist}' due to selected verbosity"
[ ${verbose} -lt 2 ] && rm "${db}"
[ ${verbose} -lt 2 ] && rm "${filelist}"
[ ${progress} -ne 0 ] && date
exit 0