Skip to content

Commit

Permalink
never call FSHC with nil mountpath; reduce code
Browse files Browse the repository at this point in the history
* also, remove erroneous assert

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Jul 24, 2024
1 parent bd927bc commit fdf575e
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 32 deletions.
1 change: 0 additions & 1 deletion ais/target.go
Original file line number Diff line number Diff line change
Expand Up @@ -762,7 +762,6 @@ func (t *target) getObject(w http.ResponseWriter, r *http.Request, dpq *dpq, bck
// do
if ecode, err := goi.getObject(); err != nil {
if !goi.isIOErr {
debug.Assert(err != errSendingResp && !cos.IsRetriableConnErr(err), err)
t.statsT.IncNonIOErr()
}

Expand Down
33 changes: 9 additions & 24 deletions ais/tgtfshc.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
package ais

import (
"fmt"

"github.com/NVIDIA/aistore/cmn"
"github.com/NVIDIA/aistore/cmn/cos"
"github.com/NVIDIA/aistore/cmn/debug"
Expand All @@ -23,50 +21,37 @@ func (t *target) SoftFSHC() {
}

func (t *target) FSHC(err error, mi *fs.Mountpath, fqn string) {
config := cmn.GCO.Get()
debug.Assert(mi != nil)

config := cmn.GCO.Get()
if cmn.IsErrCapExceeded(err) {
cs := t.oos(config)
nlog.Errorf("%s: OOS (%s) via FSHC", t, cs.String())
nlog.Errorf("%s: OOS (%s) via FSHC, %s", t, cs.String(), mi)
return
}

if !config.FSHC.Enabled {
return
}

// NOTE: filter-out non-IO errors
if !t.fshc.IsErr(err) {
if cmn.Rom.FastV(4, cos.SmoduleAIS) {
nlog.Warningln(err, "is not one of the error types to trigger FSHC, ignoring...")
}
return
}

s := fmt.Sprintf("waking up FSHC to check %s, err: %v", mi, err) // or maybe not (waking up)

if mi == nil {
mi, _, err = fs.FQN2Mpath(fqn)
if err != nil {
if e, ok := err.(*cmn.ErrMpathNotFound); ok {
if e.Disabled() {
nlog.Errorf("%s: %s is disabled, not %s", t, e.Mpath(), s)
return
}
}
nlog.Errorf("%s: %v, %s", t, err, s)
return
}
debug.Assert(mi != nil)
}
if !mi.IsAvail() {
nlog.Warningln(mi.String(), "is not available, skipping FSHC")
nlog.Warningln(mi.String(), "is not available (possibly disabled or detached), skipping FSHC")
return
}

// yes "waking up"
nlog.Errorln(t.String()+":", s)
nlog.Errorf("%s: waking up FSHC to check %s, err: %v", t, mi, err)

//
// metrics: counting I/O errors on a per mountpath (`NameSuffix` below) basis
// counting I/O errors on a per mountpath
// TODO -- FIXME: remove `NameSuffix`
//
t.statsT.AddMany(cos.NamedVal64{Name: stats.ErrIOCount, NameSuffix: mi.Path, Value: 1})
t.fshc.OnErr(mi, fqn)
Expand Down
11 changes: 5 additions & 6 deletions ais/tgtobj.go
Original file line number Diff line number Diff line change
Expand Up @@ -1163,13 +1163,12 @@ func (goi *getOI) transmit(r io.Reader, buf []byte, fqn string) error {
written, err := cos.CopyBuffer(goi.w, r, buf)
if err != nil {
if !cos.IsRetriableConnErr(err) {
mi := goi.lom.Mountpath()
if fqn != goi.lom.FQN {
mi = nil
}
goi.t.FSHC(err, mi, fqn)
nlog.Errorln("failed to GET", goi.lom.String()+":", err)
goi.t.FSHC(err, goi.lom.Mountpath(), fqn)
} else if cmn.Rom.FastV(4, cos.SmoduleAIS) {
nlog.Warningln("failed to GET", goi.lom.String()+":", err)
}
nlog.Errorln(cmn.NewErrFailedTo(goi.t, "GET", fqn, err))

// at this point, error is already written into the response -
// return special code to indicate just that
return errSendingResp
Expand Down
7 changes: 6 additions & 1 deletion core/lcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,12 @@ func (lom *LOM) syncMetaWithCopies() (err error) {
}
lom.delCopyMd(copyFQN)
if err1 := cos.Stat(copyFQN); err1 != nil && !os.IsNotExist(err1) {
T.FSHC(err, nil, copyFQN) // TODO: notify scrubber
mi, _, err2 := fs.FQN2Mpath(copyFQN)
if err2 != nil {
nlog.Errorln("nested err:", err2, "fqn:", copyFQN)
} else {
T.FSHC(err, mi, copyFQN)
}
}
}
return
Expand Down

0 comments on commit fdf575e

Please sign in to comment.