I've implemented the single threaded version of HashSum and HashLister as suggested. There are several comments where I am uncertain about the best way to implement in this code base mainly due to differences between the methods I used as references.
// hashSum returns the human readable hash for ht passed in. This may
// be UNSUPPORTED or ERROR. If it isn't returning a valid hash it will
// return an error.
func hashSum(ctx context.Context, ht hash.Type, downloadFlag bool, o fs.Object) (string, error) {
var sum string
var err error
// If downloadFlag is true, download and hash the file.
// If downloadFlag is false, call o.Hash asking the backend for the hash
if downloadFlag {
// Setup accounting as NewTransfer. Same approach used in Cat
tr := accounting.Stats(ctx).NewTransfer(o)
defer func() {
tr.Done(ctx, err)
}()
// Setup download options. Similar approach to Cat.
// We do not have a start / end, so use only fs.Config.DownloadHeaders.
// Unsure if copying to a new OpenOption object is required.
// Copy has a very similar setup but includes hashOption
var options []fs.OpenOption
for _, option := range fs.Config.DownloadHeaders {
options = append(options, option)
}
// Using NewReOpen. Error handling seems to be done differently in each function. This version is from Cat
in, err := NewReOpen(ctx, o, fs.Config.LowLevelRetries, options...)
if err != nil {
err = fs.CountError(err)
fs.Errorf(o, "Failed to open: %v", err)
return "ERROR", err
}
in = tr.Account(ctx, in).WithBuffer() // account and buffer the transfer
// Unclear if we need to close in.
// Copy, which uses NewReOpen does.
// Cat, rcat, and other methods use their own readCloser but don't appear to close
// If the error does not occur until close, we log the error but return the hash as okay
defer func() {
err = in.Close()
if err != nil {
err = fs.CountError(err)
fs.Errorf(o, "Failed to close: %v", err)
}
}()
// Create a multihasher to receive the transfer
// Should use hash.StreamTypes here instead of creating the hasher and copying?
// Not doing fs.CountError or fs.Errorf here because other functions that create MultiHashers do not. Should we?
hasher, err := hash.NewMultiHasherTypes(hash.NewHashSet(ht))
if err != nil {
return "UNSUPPORTED", err
}
// Using fs.CountError based on Cat
// StreamTypes does not do this, if copy fails it just returns the error
_, err = io.Copy(hasher, in)
if err != nil {
err = fs.CountError(err)
fs.Errorf(o, "Failed to copy file to hasher: %v", err)
return "ERROR", err
}
// hasher.Sum returns an error, but I don't believe it's possible to error here.
// It appears the only error is unsupported.
// If NewMultiHasherTypes succeeds and ht has not changed, we should not receive this error.
// Sum returns a byte array, and we want to return a hex string, so encode
byteSum, err := hasher.Sum(ht)
if err != nil {
return "ERROR", err
}
sum = hex.EncodeToString(byteSum)
} else {
// If not downloadFlag, do the original version of HashSum
tr := accounting.Stats(ctx).NewCheckingTransfer(o)
defer func() {
tr.Done(ctx, err)
}()
sum, err = o.Hash(ctx, ht)
if err == hash.ErrUnsupported {
return "UNSUPPORTED", err
} else if err != nil {
// This was previously at the debug level.
// Changed to the error level to be consistent with the above
fs.Errorf(o, "Failed to read %v: %v", ht, err)
return "ERROR", err
}
}
And HashLister:
// HashLister does an md5sum equivalent for the hash type passed in
// Updated to handle both standard hex encoding and base64
// HashListerBase64 appears to have an odd case for non nil errors where the sum is not base64 encoded but the width is set based on base64 encoding
// In this new function, the standard width is instead used for the outputBase64 == True && err != nil case
func HashLister(ctx context.Context, ht hash.Type, outputBase64 bool, downloadFlag bool, f fs.Fs, w io.Writer) error {
return ListFn(ctx, f, func(o fs.Object) {
sum, err := hashSum(ctx, ht, downloadFlag, o)
if outputBase64 && err == nil {
hexBytes, _ := hex.DecodeString(sum)
sum = base64.URLEncoding.EncodeToString(hexBytes)
width := base64.URLEncoding.EncodedLen(hash.Width(ht) / 2)
syncFprintf(w, "%*s %s\n", width, sum, o.Remote())
} else {
syncFprintf(w, "%*s %s\n", hash.Width(ht), sum, o.Remote())
}
})
}
I have modified the HashSum unit tests as follows. This test is passing. Is more required here?
func TestHashSums(t *testing.T) {
ctx := context.Background()
r := fstest.NewRun(t)
defer r.Finalise()
file1 := r.WriteBoth(ctx, "potato2", "------------------------------------------------------------", t1)
file2 := r.WriteBoth(ctx, "empty space", "-", t2)
fstest.CheckItems(t, r.Fremote, file1, file2)
// MD5 Sum
var buf bytes.Buffer
err := operations.HashLister(ctx, hash.MD5, false, true, r.Fremote, &buf)
require.NoError(t, err)
res := buf.String()
if !strings.Contains(res, "336d5ebc5436534e61d16e63ddfca327 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "d6548b156ea68a4e003e786df99eee76 potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// SHA1 Sum
buf.Reset()
err = operations.HashLister(ctx, hash.SHA1, false, false, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "3bc15c8aae3e4124dd409035f32ea2fd6835efc9 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "9dc7f7d3279715991a22853f5981df582b7f9f6d potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum
buf.Reset()
var ht hash.Type
err = ht.Set("QuickXorHash")
require.NoError(t, err)
err = operations.HashLister(ctx, ht, false, true, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "2d00000000000000000000000100000000000000 empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "4001dad296b6b4a52d6d694b67dad296b6b4a52d potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
// QuickXorHash Sum with Base64 Encoded
buf.Reset()
err = operations.HashLister(ctx, ht, true, false, r.Fremote, &buf)
require.NoError(t, err)
res = buf.String()
if !strings.Contains(res, "LQAAAAAAAAAAAAAAAQAAAAAAAAA= empty space\n") &&
!strings.Contains(res, " UNSUPPORTED empty space\n") &&
!strings.Contains(res, " empty space\n") {
t.Errorf("empty space missing: %q", res)
}
if !strings.Contains(res, "QAHa0pa2tKUtbWlLZ9rSlra0pS0= potato2\n") &&
!strings.Contains(res, " UNSUPPORTED potato2\n") &&
!strings.Contains(res, " potato2\n") {
t.Errorf("potato2 missing: %q", res)
}
}
I have redirected the commands md5sum and sha1sum to HashLister. I have implemented the download flag and base64 flag (where required) for those two commands as well as hashsum and dbhashsum. I have removed operations.Md5sum, operations.Sha1sum, and operations.HashListerBase64.
- I wanted to replace the commands other than hashsum with an alias to hashsum + the appropriate argument to avoid implementing the same flags multiple times. I could not determine how to do this.
- Are there unit tests for the commands?