diff --git a/ais/target.go b/ais/target.go index c48fc6d62e..5242899a3f 100644 --- a/ais/target.go +++ b/ais/target.go @@ -762,7 +762,6 @@ func (t *target) getObject(w http.ResponseWriter, r *http.Request, dpq *dpq, bck // do if ecode, err := goi.getObject(); err != nil { if !goi.isIOErr { - debug.Assert(err != errSendingResp && !cos.IsRetriableConnErr(err), err) t.statsT.IncNonIOErr() } diff --git a/ais/tgtfshc.go b/ais/tgtfshc.go index ad5ae849a9..b5c72ab440 100644 --- a/ais/tgtfshc.go +++ b/ais/tgtfshc.go @@ -5,8 +5,6 @@ package ais import ( - "fmt" - "github.com/NVIDIA/aistore/cmn" "github.com/NVIDIA/aistore/cmn/cos" "github.com/NVIDIA/aistore/cmn/debug" @@ -23,17 +21,20 @@ func (t *target) SoftFSHC() { } func (t *target) FSHC(err error, mi *fs.Mountpath, fqn string) { - config := cmn.GCO.Get() + debug.Assert(mi != nil) + config := cmn.GCO.Get() if cmn.IsErrCapExceeded(err) { cs := t.oos(config) - nlog.Errorf("%s: OOS (%s) via FSHC", t, cs.String()) + nlog.Errorf("%s: OOS (%s) via FSHC, %s", t, cs.String(), mi) return } if !config.FSHC.Enabled { return } + + // NOTE: filter-out non-IO errors if !t.fshc.IsErr(err) { if cmn.Rom.FastV(4, cos.SmoduleAIS) { nlog.Warningln(err, "is not one of the error types to trigger FSHC, ignoring...") @@ -41,32 +42,16 @@ func (t *target) FSHC(err error, mi *fs.Mountpath, fqn string) { return } - s := fmt.Sprintf("waking up FSHC to check %s, err: %v", mi, err) // or maybe not (waking up) - - if mi == nil { - mi, _, err = fs.FQN2Mpath(fqn) - if err != nil { - if e, ok := err.(*cmn.ErrMpathNotFound); ok { - if e.Disabled() { - nlog.Errorf("%s: %s is disabled, not %s", t, e.Mpath(), s) - return - } - } - nlog.Errorf("%s: %v, %s", t, err, s) - return - } - debug.Assert(mi != nil) - } if !mi.IsAvail() { - nlog.Warningln(mi.String(), "is not available, skipping FSHC") + nlog.Warningln(mi.String(), "is not available (possibly disabled or detached), skipping FSHC") return } - // yes "waking up" - nlog.Errorln(t.String()+":", s) + nlog.Errorf("%s: waking up FSHC to check %s, err: %v", t, mi, err) // - // metrics: counting I/O errors on a per mountpath (`NameSuffix` below) basis + // counting I/O errors on a per mountpath + // TODO -- FIXME: remove `NameSuffix` // t.statsT.AddMany(cos.NamedVal64{Name: stats.ErrIOCount, NameSuffix: mi.Path, Value: 1}) t.fshc.OnErr(mi, fqn) diff --git a/ais/tgtobj.go b/ais/tgtobj.go index d0d09ae0d2..a6dc0eb533 100644 --- a/ais/tgtobj.go +++ b/ais/tgtobj.go @@ -1163,13 +1163,12 @@ func (goi *getOI) transmit(r io.Reader, buf []byte, fqn string) error { written, err := cos.CopyBuffer(goi.w, r, buf) if err != nil { if !cos.IsRetriableConnErr(err) { - mi := goi.lom.Mountpath() - if fqn != goi.lom.FQN { - mi = nil - } - goi.t.FSHC(err, mi, fqn) + nlog.Errorln("failed to GET", goi.lom.String()+":", err) + goi.t.FSHC(err, goi.lom.Mountpath(), fqn) + } else if cmn.Rom.FastV(4, cos.SmoduleAIS) { + nlog.Warningln("failed to GET", goi.lom.String()+":", err) } - nlog.Errorln(cmn.NewErrFailedTo(goi.t, "GET", fqn, err)) + // at this point, error is already written into the response - // return special code to indicate just that return errSendingResp diff --git a/core/lcopy.go b/core/lcopy.go index 3a0ecfeb02..bd37167237 100644 --- a/core/lcopy.go +++ b/core/lcopy.go @@ -156,7 +156,12 @@ func (lom *LOM) syncMetaWithCopies() (err error) { } lom.delCopyMd(copyFQN) if err1 := cos.Stat(copyFQN); err1 != nil && !os.IsNotExist(err1) { - T.FSHC(err, nil, copyFQN) // TODO: notify scrubber + mi, _, err2 := fs.FQN2Mpath(copyFQN) + if err2 != nil { + nlog.Errorln("nested err:", err2, "fqn:", copyFQN) + } else { + T.FSHC(err, mi, copyFQN) + } } } return