Force Download & Hash

I've implemented the single threaded version of HashSum and HashLister as suggested. There are several comments where I am uncertain about the best way to implement in this code base mainly due to differences between the methods I used as references.

// hashSum returns the human readable hash for ht passed in.  This may
// be UNSUPPORTED or ERROR. If it isn't returning a valid hash it will
// return an error.
func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object) (string, error) {
	var sum string
	var err error

	// If downloadFlag is true, download and hash the file.
	// If downloadFlag is false, call o.Hash asking the backend for the hash
	if downloadFlag {
		
		// Setup accounting as NewTransfer.  Same approach used in Cat
		tr := accounting.Stats(ctx).NewTransfer(o)
		defer func() {
			tr.Done(ctx, err)
		}()

		// Setup download options.  Similar approach to Cat.
		// We do not have a start / end, so use only fs.Config.DownloadHeaders.
		// Unsure if copying to a new OpenOption object is required.
		// Copy has a very similar setup but includes hashOption
		var options []fs.OpenOption
		for _, option := range fs.Config.DownloadHeaders {
			options = append(options, option)
		}

		// Using NewReOpen.  Error handling seems to be done differently in each function.  This version is from Cat
		in, err := NewReOpen(ctx, o, fs.Config.LowLevelRetries, options...)
		if err != nil {
			err = fs.CountError(err)
			fs.Errorf(o, "Failed to open: %v", err)
			return "ERROR", err
		}
		in = tr.Account(ctx, in).WithBuffer() // account and buffer the transfer

		// Unclear if we need to close in.
		// Copy, which uses NewReOpen does.
		// Cat, rcat, and other methods use their own readCloser but don't appear to close
		// If the error does not occur until close, we log the error but return the hash as okay
		defer func() {
			err = in.Close()
			if err != nil {
				err = fs.CountError(err)
				fs.Errorf(o, "Failed to close: %v", err)
			}
		}()

		// Create a multihasher to receive the transfer
		// Should use hash.StreamTypes here instead of creating the hasher and copying?
		// Not doing fs.CountError or fs.Errorf here because other functions that create MultiHashers do not.  Should we?
		hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
		if err != nil {
			return "UNSUPPORTED", err
		}

		// Using fs.CountError based on Cat
		// StreamTypes does not do this, if copy fails it just returns the error
		_, err = io.Copy(hasher, in)
		if err != nil {
			err = fs.CountError(err)
			fs.Errorf(o, "Failed to copy file to hasher: %v", err)
			return "ERROR", err
		}

		// hasher.Sum returns an error, but I don't believe it's possible to error here.
		// It appears the only error is unsupported.
		// If NewMultiHasherTypes succeeds and ht has not changed, we should not receive this error.
		// Sum returns a byte array, and we want to return a hex string, so encode
		byteSum, err := hasher.Sum(ht)
		if err != nil {
			return "ERROR", err
		}
		sum = hex.EncodeToString(byteSum)
	} else {
		// If not downloadFlag, do the original version of HashSum

		tr := accounting.Stats(ctx).NewCheckingTransfer(o)
		defer func() {
			tr.Done(ctx, err)
		}()

		sum, err = o.Hash(ctx, ht)
		if err == hash.ErrUnsupported {
			return "UNSUPPORTED", err
		} else if err != nil {
			// This was previously at the debug level.
			// Changed to the error level to be consistent with the above
			fs.Errorf(o, "Failed to read %v: %v", ht, err)
			return "ERROR", err
		}
	}

And HashLister:

// HashLister does an md5sum equivalent for the hash type passed in
// Updated to handle both standard hex encoding and base64
// HashListerBase64 appears to have an odd case for non nil errors where the sum is not base64 encoded but the width is set based on base64 encoding
// In this new function, the standard width is instead used for the outputBase64 == True && err != nil case
func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFlag bool, f fs.Fs, w io.Writer) error {
	return ListFn(ctx, f, func(o fs.Object) {
		sum, err := hashSum(ctx, ht, downloadFlag, o)
		if outputBase64 && err == nil {
			hexBytes, _ := hex.DecodeString(sum)
			sum = base64.URLEncoding.EncodeToString(hexBytes)
			width := base64.URLEncoding.EncodedLen(hash.Width(ht) / 2)
			syncFprintf(w, "%*s  %s\n", width, sum, o.Remote())
		} else {
			syncFprintf(w, "%*s  %s\n", hash.Width(ht), sum, o.Remote())	
		}
	})
}

I have modified the HashSum unit tests as follows. This test is passing. Is more required here?

func TestHashSums(t *testing.T) {
	ctx := context.Background()
	r := fstest.NewRun(t)
	defer r.Finalise()
	file1 := r.WriteBoth(ctx, "potato2", "------------------------------------------------------------", t1)
	file2 := r.WriteBoth(ctx, "empty space", "-", t2)

	fstest.CheckItems(t, r.Fremote, file1, file2)

	// MD5 Sum

	var buf bytes.Buffer
	err := operations.HashLister(ctx, hash.MD5, false, true, r.Fremote, &buf)
	require.NoError(t, err)
	res := buf.String()
	if !strings.Contains(res, "336d5ebc5436534e61d16e63ddfca327  empty space\n") &&
		!strings.Contains(res, "                     UNSUPPORTED  empty space\n") &&
		!strings.Contains(res, "                                  empty space\n") {
		t.Errorf("empty space missing: %q", res)
	}
	if !strings.Contains(res, "d6548b156ea68a4e003e786df99eee76  potato2\n") &&
		!strings.Contains(res, "                     UNSUPPORTED  potato2\n") &&
		!strings.Contains(res, "                                  potato2\n") {
		t.Errorf("potato2 missing: %q", res)
	}

	// SHA1 Sum

	buf.Reset()
	err = operations.HashLister(ctx, hash.SHA1, false, false, r.Fremote, &buf)
	require.NoError(t, err)
	res = buf.String()
	if !strings.Contains(res, "3bc15c8aae3e4124dd409035f32ea2fd6835efc9  empty space\n") &&
		!strings.Contains(res, "                             UNSUPPORTED  empty space\n") &&
		!strings.Contains(res, "                                          empty space\n") {
		t.Errorf("empty space missing: %q", res)
	}
	if !strings.Contains(res, "9dc7f7d3279715991a22853f5981df582b7f9f6d  potato2\n") &&
		!strings.Contains(res, "                             UNSUPPORTED  potato2\n") &&
		!strings.Contains(res, "                                          potato2\n") {
		t.Errorf("potato2 missing: %q", res)
	}

	// QuickXorHash Sum

	buf.Reset()
	var ht hash.Type
	err = ht.Set("QuickXorHash")
	require.NoError(t, err)
	err = operations.HashLister(ctx, ht, false, true, r.Fremote, &buf)
	require.NoError(t, err)
	res = buf.String()
	if !strings.Contains(res, "2d00000000000000000000000100000000000000  empty space\n") &&
		!strings.Contains(res, "                             UNSUPPORTED  empty space\n") &&
		!strings.Contains(res, "                                          empty space\n") {
		t.Errorf("empty space missing: %q", res)
	}
	if !strings.Contains(res, "4001dad296b6b4a52d6d694b67dad296b6b4a52d  potato2\n") &&
		!strings.Contains(res, "                             UNSUPPORTED  potato2\n") &&
		!strings.Contains(res, "                                          potato2\n") {
		t.Errorf("potato2 missing: %q", res)
	}

	// QuickXorHash Sum with Base64 Encoded

	buf.Reset()
	err = operations.HashLister(ctx, ht, true, false, r.Fremote, &buf)
	require.NoError(t, err)
	res = buf.String()
	if !strings.Contains(res, "LQAAAAAAAAAAAAAAAQAAAAAAAAA=  empty space\n") &&
		!strings.Contains(res, "                 UNSUPPORTED  empty space\n") &&
		!strings.Contains(res, "                              empty space\n") {
		t.Errorf("empty space missing: %q", res)
	}
	if !strings.Contains(res, "QAHa0pa2tKUtbWlLZ9rSlra0pS0=  potato2\n") &&
		!strings.Contains(res, "                 UNSUPPORTED  potato2\n") &&
		!strings.Contains(res, "                              potato2\n") {
		t.Errorf("potato2 missing: %q", res)
	}
}

I have redirected the commands md5sum and sha1sum to HashLister. I have implemented the download flag and base64 flag (where required) for those two commands as well as hashsum and dbhashsum. I have removed operations.Md5sum, operations.Sha1sum, and operations.HashListerBase64.

  • I wanted to replace the commands other than hashsum with an alias to hashsum + the appropriate argument to avoid implementing the same flags multiple times. I could not determine how to do this.
  • Are there unit tests for the commands?