From 0277ff4239b6c80d854e17a4bb57134d6c497b6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 1 Jul 2022 21:24:22 +0200 Subject: [PATCH 1/6] wdpost: move fault/recover code to a separate file --- storage/wdpost/wdpost_run.go | 269 -------------------------- storage/wdpost/wdpost_run_faults.go | 289 ++++++++++++++++++++++++++++ 2 files changed, 289 insertions(+), 269 deletions(-) create mode 100644 storage/wdpost/wdpost_run_faults.go diff --git a/storage/wdpost/wdpost_run.go b/storage/wdpost/wdpost_run.go index d77181f4bbd..1a77831cb1c 100644 --- a/storage/wdpost/wdpost_run.go +++ b/storage/wdpost/wdpost_run.go @@ -241,275 +241,6 @@ func (s *WindowPoStScheduler) checkSectors(ctx context.Context, check bitfield.B return sbf, nil } -// declareRecoveries identifies sectors that were previously marked as faulty -// for our miner, but are now recovered (i.e. are now provable again) and -// still not reported as such. -// -// It then reports the recovery on chain via a `DeclareFaultsRecovered` -// message to our miner actor. -// -// This is always invoked ahead of time, before the deadline for the evaluated -// sectors arrives. That way, recoveries are declared in preparation for those -// sectors to be proven. -// -// If a declaration is made, it awaits for build.MessageConfidence confirmations -// on chain before returning. -// -// TODO: the waiting should happen in the background. Right now this -// is blocking/delaying the actual generation and submission of WindowPoSts in -// this deadline! -func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.RecoveryDeclaration, *types.SignedMessage, error) { - ctx, span := trace.StartSpan(ctx, "storage.declareRecoveries") - defer span.End() - - faulty := uint64(0) - params := &miner.DeclareFaultsRecoveredParams{ - Recoveries: []miner.RecoveryDeclaration{}, - } - - for partIdx, partition := range partitions { - unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors) - if err != nil { - return nil, nil, xerrors.Errorf("subtracting recovered set from fault set: %w", err) - } - - uc, err := unrecovered.Count() - if err != nil { - return nil, nil, xerrors.Errorf("counting unrecovered sectors: %w", err) - } - - if uc == 0 { - continue - } - - faulty += uc - - recovered, err := s.checkSectors(ctx, unrecovered, tsk) - if err != nil { - return nil, nil, xerrors.Errorf("checking unrecovered sectors: %w", err) - } - - // if all sectors failed to recover, don't declare recoveries - recoveredCount, err := recovered.Count() - if err != nil { - return nil, nil, xerrors.Errorf("counting recovered sectors: %w", err) - } - - if recoveredCount == 0 { - continue - } - - params.Recoveries = append(params.Recoveries, miner.RecoveryDeclaration{ - Deadline: dlIdx, - Partition: uint64(partIdx), - Sectors: recovered, - }) - } - - recoveries := params.Recoveries - if len(recoveries) == 0 { - if faulty != 0 { - log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty) - } - - return recoveries, nil, nil - } - - enc, aerr := actors.SerializeParams(params) - if aerr != nil { - return recoveries, nil, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr) - } - - msg := &types.Message{ - To: s.actor, - Method: builtin.MethodsMiner.DeclareFaultsRecovered, - Params: enc, - Value: types.NewInt(0), - } - spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)} - if err := s.prepareMessage(ctx, msg, spec); err != nil { - return recoveries, nil, err - } - - sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}) - if err != nil { - return recoveries, sm, xerrors.Errorf("pushing message to mpool: %w", err) - } - - log.Warnw("declare faults recovered Message CID", "cid", sm.Cid()) - - rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true) - if err != nil { - return recoveries, sm, xerrors.Errorf("declare faults recovered wait error: %w", err) - } - - if rec.Receipt.ExitCode != 0 { - return recoveries, sm, xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode) - } - - return recoveries, sm, nil -} - -// declareFaults identifies the sectors on the specified proving deadline that -// are faulty, and reports the faults on chain via the `DeclareFaults` message -// to our miner actor. -// -// This is always invoked ahead of time, before the deadline for the evaluated -// sectors arrives. That way, faults are declared before a penalty is accrued. -// -// If a declaration is made, it awaits for build.MessageConfidence confirmations -// on chain before returning. -// -// TODO: the waiting should happen in the background. Right now this -// is blocking/delaying the actual generation and submission of WindowPoSts in -// this deadline! -func (s *WindowPoStScheduler) declareFaults(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.FaultDeclaration, *types.SignedMessage, error) { - ctx, span := trace.StartSpan(ctx, "storage.declareFaults") - defer span.End() - - bad := uint64(0) - params := &miner.DeclareFaultsParams{ - Faults: []miner.FaultDeclaration{}, - } - - for partIdx, partition := range partitions { - nonFaulty, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors) - if err != nil { - return nil, nil, xerrors.Errorf("determining non faulty sectors: %w", err) - } - - good, err := s.checkSectors(ctx, nonFaulty, tsk) - if err != nil { - return nil, nil, xerrors.Errorf("checking sectors: %w", err) - } - - newFaulty, err := bitfield.SubtractBitField(nonFaulty, good) - if err != nil { - return nil, nil, xerrors.Errorf("calculating faulty sector set: %w", err) - } - - c, err := newFaulty.Count() - if err != nil { - return nil, nil, xerrors.Errorf("counting faulty sectors: %w", err) - } - - if c == 0 { - continue - } - - bad += c - - params.Faults = append(params.Faults, miner.FaultDeclaration{ - Deadline: dlIdx, - Partition: uint64(partIdx), - Sectors: newFaulty, - }) - } - - faults := params.Faults - if len(faults) == 0 { - return faults, nil, nil - } - - log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad) - - enc, aerr := actors.SerializeParams(params) - if aerr != nil { - return faults, nil, xerrors.Errorf("could not serialize declare faults parameters: %w", aerr) - } - - msg := &types.Message{ - To: s.actor, - Method: builtin.MethodsMiner.DeclareFaults, - Params: enc, - Value: types.NewInt(0), // TODO: Is there a fee? - } - spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)} - if err := s.prepareMessage(ctx, msg, spec); err != nil { - return faults, nil, err - } - - sm, err := s.api.MpoolPushMessage(ctx, msg, spec) - if err != nil { - return faults, sm, xerrors.Errorf("pushing message to mpool: %w", err) - } - - log.Warnw("declare faults Message CID", "cid", sm.Cid()) - - rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true) - if err != nil { - return faults, sm, xerrors.Errorf("declare faults wait error: %w", err) - } - - if rec.Receipt.ExitCode != 0 { - return faults, sm, xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode) - } - - return faults, sm, nil -} - -func (s *WindowPoStScheduler) asyncFaultRecover(di dline.Info, ts *types.TipSet) { - go func() { - // check faults / recoveries for the *next* deadline. It's already too - // late to declare them for this deadline - declDeadline := (di.Index + 2) % di.WPoStPeriodDeadlines - - partitions, err := s.api.StateMinerPartitions(context.TODO(), s.actor, declDeadline, ts.Key()) - if err != nil { - log.Errorf("getting partitions: %v", err) - return - } - - var ( - sigmsg *types.SignedMessage - recoveries []miner.RecoveryDeclaration - faults []miner.FaultDeclaration - - // optionalCid returns the CID of the message, or cid.Undef is the - // message is nil. We don't need the argument (could capture the - // pointer), but it's clearer and purer like that. - optionalCid = func(sigmsg *types.SignedMessage) cid.Cid { - if sigmsg == nil { - return cid.Undef - } - return sigmsg.Cid() - } - ) - - if recoveries, sigmsg, err = s.declareRecoveries(context.TODO(), declDeadline, partitions, ts.Key()); err != nil { - // TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse - log.Errorf("checking sector recoveries: %v", err) - } - - s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStRecoveries], func() interface{} { - j := WdPoStRecoveriesProcessedEvt{ - evtCommon: s.getEvtCommon(err), - Declarations: recoveries, - MessageCID: optionalCid(sigmsg), - } - j.Error = err - return j - }) - - if ts.Height() > build.UpgradeIgnitionHeight { - return // FORK: declaring faults after ignition upgrade makes no sense - } - - if faults, sigmsg, err = s.declareFaults(context.TODO(), declDeadline, partitions, ts.Key()); err != nil { - // TODO: This is also potentially really bad, but we try to post anyways - log.Errorf("checking sector faults: %v", err) - } - - s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStFaults], func() interface{} { - return WdPoStFaultsProcessedEvt{ - evtCommon: s.getEvtCommon(err), - Declarations: faults, - MessageCID: optionalCid(sigmsg), - } - }) - }() -} - // runPoStCycle runs a full cycle of the PoSt process: // // 1. performs recovery declarations for the next deadline. diff --git a/storage/wdpost/wdpost_run_faults.go b/storage/wdpost/wdpost_run_faults.go new file mode 100644 index 00000000000..06d365fae3b --- /dev/null +++ b/storage/wdpost/wdpost_run_faults.go @@ -0,0 +1,289 @@ +package wdpost + +import ( + "context" + "github.com/ipfs/go-cid" + "go.opencensus.io/trace" + "golang.org/x/xerrors" + + "github.com/filecoin-project/go-bitfield" + "github.com/filecoin-project/go-state-types/abi" + "github.com/filecoin-project/go-state-types/builtin" + "github.com/filecoin-project/go-state-types/builtin/v8/miner" + "github.com/filecoin-project/go-state-types/dline" + "github.com/filecoin-project/lotus/api" + "github.com/filecoin-project/lotus/build" + "github.com/filecoin-project/lotus/chain/actors" + "github.com/filecoin-project/lotus/chain/types" +) + +// declareRecoveries identifies sectors that were previously marked as faulty +// for our miner, but are now recovered (i.e. are now provable again) and +// still not reported as such. +// +// It then reports the recovery on chain via a `DeclareFaultsRecovered` +// message to our miner actor. +// +// This is always invoked ahead of time, before the deadline for the evaluated +// sectors arrives. That way, recoveries are declared in preparation for those +// sectors to be proven. +// +// If a declaration is made, it awaits for build.MessageConfidence confirmations +// on chain before returning. +// +// TODO: the waiting should happen in the background. Right now this +// is blocking/delaying the actual generation and submission of WindowPoSts in +// this deadline! +func (s *WindowPoStScheduler) declareRecoveries(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.RecoveryDeclaration, *types.SignedMessage, error) { + ctx, span := trace.StartSpan(ctx, "storage.declareRecoveries") + defer span.End() + + faulty := uint64(0) + params := &miner.DeclareFaultsRecoveredParams{ + Recoveries: []miner.RecoveryDeclaration{}, + } + + for partIdx, partition := range partitions { + unrecovered, err := bitfield.SubtractBitField(partition.FaultySectors, partition.RecoveringSectors) + if err != nil { + return nil, nil, xerrors.Errorf("subtracting recovered set from fault set: %w", err) + } + + uc, err := unrecovered.Count() + if err != nil { + return nil, nil, xerrors.Errorf("counting unrecovered sectors: %w", err) + } + + if uc == 0 { + continue + } + + faulty += uc + + recovered, err := s.checkSectors(ctx, unrecovered, tsk) + if err != nil { + return nil, nil, xerrors.Errorf("checking unrecovered sectors: %w", err) + } + + // if all sectors failed to recover, don't declare recoveries + recoveredCount, err := recovered.Count() + if err != nil { + return nil, nil, xerrors.Errorf("counting recovered sectors: %w", err) + } + + if recoveredCount == 0 { + continue + } + + params.Recoveries = append(params.Recoveries, miner.RecoveryDeclaration{ + Deadline: dlIdx, + Partition: uint64(partIdx), + Sectors: recovered, + }) + } + + recoveries := params.Recoveries + if len(recoveries) == 0 { + if faulty != 0 { + log.Warnw("No recoveries to declare", "deadline", dlIdx, "faulty", faulty) + } + + return recoveries, nil, nil + } + + enc, aerr := actors.SerializeParams(params) + if aerr != nil { + return recoveries, nil, xerrors.Errorf("could not serialize declare recoveries parameters: %w", aerr) + } + + msg := &types.Message{ + To: s.actor, + Method: builtin.MethodsMiner.DeclareFaultsRecovered, + Params: enc, + Value: types.NewInt(0), + } + spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)} + if err := s.prepareMessage(ctx, msg, spec); err != nil { + return recoveries, nil, err + } + + sm, err := s.api.MpoolPushMessage(ctx, msg, &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)}) + if err != nil { + return recoveries, sm, xerrors.Errorf("pushing message to mpool: %w", err) + } + + log.Warnw("declare faults recovered Message CID", "cid", sm.Cid()) + + rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true) + if err != nil { + return recoveries, sm, xerrors.Errorf("declare faults recovered wait error: %w", err) + } + + if rec.Receipt.ExitCode != 0 { + return recoveries, sm, xerrors.Errorf("declare faults recovered wait non-0 exit code: %d", rec.Receipt.ExitCode) + } + + return recoveries, sm, nil +} + +// declareFaults identifies the sectors on the specified proving deadline that +// are faulty, and reports the faults on chain via the `DeclareFaults` message +// to our miner actor. +// +// NOTE: THIS CODE ISN'T INVOKED AFTER THE IGNITION UPGRADE +// +// This is always invoked ahead of time, before the deadline for the evaluated +// sectors arrives. That way, faults are declared before a penalty is accrued. +// +// If a declaration is made, it awaits for build.MessageConfidence confirmations +// on chain before returning. +// +// TODO: the waiting should happen in the background. Right now this +// is blocking/delaying the actual generation and submission of WindowPoSts in +// this deadline! +func (s *WindowPoStScheduler) declareFaults(ctx context.Context, dlIdx uint64, partitions []api.Partition, tsk types.TipSetKey) ([]miner.FaultDeclaration, *types.SignedMessage, error) { + ctx, span := trace.StartSpan(ctx, "storage.declareFaults") + defer span.End() + + bad := uint64(0) + params := &miner.DeclareFaultsParams{ + Faults: []miner.FaultDeclaration{}, + } + + for partIdx, partition := range partitions { + nonFaulty, err := bitfield.SubtractBitField(partition.LiveSectors, partition.FaultySectors) + if err != nil { + return nil, nil, xerrors.Errorf("determining non faulty sectors: %w", err) + } + + good, err := s.checkSectors(ctx, nonFaulty, tsk) + if err != nil { + return nil, nil, xerrors.Errorf("checking sectors: %w", err) + } + + newFaulty, err := bitfield.SubtractBitField(nonFaulty, good) + if err != nil { + return nil, nil, xerrors.Errorf("calculating faulty sector set: %w", err) + } + + c, err := newFaulty.Count() + if err != nil { + return nil, nil, xerrors.Errorf("counting faulty sectors: %w", err) + } + + if c == 0 { + continue + } + + bad += c + + params.Faults = append(params.Faults, miner.FaultDeclaration{ + Deadline: dlIdx, + Partition: uint64(partIdx), + Sectors: newFaulty, + }) + } + + faults := params.Faults + if len(faults) == 0 { + return faults, nil, nil + } + + log.Errorw("DETECTED FAULTY SECTORS, declaring faults", "count", bad) + + enc, aerr := actors.SerializeParams(params) + if aerr != nil { + return faults, nil, xerrors.Errorf("could not serialize declare faults parameters: %w", aerr) + } + + msg := &types.Message{ + To: s.actor, + Method: builtin.MethodsMiner.DeclareFaults, + Params: enc, + Value: types.NewInt(0), // TODO: Is there a fee? + } + spec := &api.MessageSendSpec{MaxFee: abi.TokenAmount(s.feeCfg.MaxWindowPoStGasFee)} + if err := s.prepareMessage(ctx, msg, spec); err != nil { + return faults, nil, err + } + + sm, err := s.api.MpoolPushMessage(ctx, msg, spec) + if err != nil { + return faults, sm, xerrors.Errorf("pushing message to mpool: %w", err) + } + + log.Warnw("declare faults Message CID", "cid", sm.Cid()) + + rec, err := s.api.StateWaitMsg(context.TODO(), sm.Cid(), build.MessageConfidence, api.LookbackNoLimit, true) + if err != nil { + return faults, sm, xerrors.Errorf("declare faults wait error: %w", err) + } + + if rec.Receipt.ExitCode != 0 { + return faults, sm, xerrors.Errorf("declare faults wait non-0 exit code: %d", rec.Receipt.ExitCode) + } + + return faults, sm, nil +} + +func (s *WindowPoStScheduler) asyncFaultRecover(di dline.Info, ts *types.TipSet) { + go func() { + // check faults / recoveries for the *next* deadline. It's already too + // late to declare them for this deadline + declDeadline := (di.Index + 2) % di.WPoStPeriodDeadlines + + partitions, err := s.api.StateMinerPartitions(context.TODO(), s.actor, declDeadline, ts.Key()) + if err != nil { + log.Errorf("getting partitions: %v", err) + return + } + + var ( + sigmsg *types.SignedMessage + recoveries []miner.RecoveryDeclaration + faults []miner.FaultDeclaration + + // optionalCid returns the CID of the message, or cid.Undef is the + // message is nil. We don't need the argument (could capture the + // pointer), but it's clearer and purer like that. + optionalCid = func(sigmsg *types.SignedMessage) cid.Cid { + if sigmsg == nil { + return cid.Undef + } + return sigmsg.Cid() + } + ) + + if recoveries, sigmsg, err = s.declareRecoveries(context.TODO(), declDeadline, partitions, ts.Key()); err != nil { + // TODO: This is potentially quite bad, but not even trying to post when this fails is objectively worse + log.Errorf("checking sector recoveries: %v", err) + } + + s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStRecoveries], func() interface{} { + j := WdPoStRecoveriesProcessedEvt{ + evtCommon: s.getEvtCommon(err), + Declarations: recoveries, + MessageCID: optionalCid(sigmsg), + } + j.Error = err + return j + }) + + if ts.Height() > build.UpgradeIgnitionHeight { + return // FORK: declaring faults after ignition upgrade makes no sense + } + + if faults, sigmsg, err = s.declareFaults(context.TODO(), declDeadline, partitions, ts.Key()); err != nil { + // TODO: This is also potentially really bad, but we try to post anyways + log.Errorf("checking sector faults: %v", err) + } + + s.journal.RecordEvent(s.evtTypes[evtTypeWdPoStFaults], func() interface{} { + return WdPoStFaultsProcessedEvt{ + evtCommon: s.getEvtCommon(err), + Declarations: faults, + MessageCID: optionalCid(sigmsg), + } + }) + }() +} From 84881f64ad35a19d2b0d4eccdc8f8c39e5ac191d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 1 Jul 2022 21:24:54 +0200 Subject: [PATCH 2/6] sealer: Config for disabling builtin PoSt --- .../en/default-lotus-miner-config.toml | 18 +++++++++++++++++- node/config/doc_gen.go | 18 +++++++++++++++++- node/config/storage.go | 4 +++- node/config/types.go | 16 +++++++++++++++- storage/sealer/manager.go | 16 +++++++++++----- storage/sealer/manager_post.go | 14 ++++++++------ 6 files changed, 71 insertions(+), 15 deletions(-) diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index 44a9117e3e6..a88782d2b97 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -309,12 +309,28 @@ [Proving] - # Maximum number of sector checks to run in parallel. (0 = unlimited) + # WARNING: Setting this value too high may make the node crash by running out of stack + # WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due + # to late submission. # # type: int # env var: LOTUS_PROVING_PARALLELCHECKLIMIT #ParallelCheckLimit = 128 + # WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need + # to be recovered. Before enabling this option, make sure your PoSt workers work correctly. + # + # type: bool + # env var: LOTUS_PROVING_DISABLEBUILTINWINDOWPOST + #DisableBuiltinWindowPoSt = false + + # WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. + # Before enabling this option, make sure your PoSt workers work correctly. + # + # type: bool + # env var: LOTUS_PROVING_DISABLEBUILTINWINNINGPOST + #DisableBuiltinWinningPoSt = false + [Sealing] # Upper bound on how many sectors can be waiting for more deals to be packed in it before it begins sealing at any given time. diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index 68d81f80273..22c3bdbfa84 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -627,7 +627,23 @@ over the worker address if this flag is set.`, Name: "ParallelCheckLimit", Type: "int", - Comment: `Maximum number of sector checks to run in parallel. (0 = unlimited)`, + Comment: `WARNING: Setting this value too high may make the node crash by running out of stack +WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due +to late submission.`, + }, + { + Name: "DisableBuiltinWindowPoSt", + Type: "bool", + + Comment: `WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need +to be recovered. Before enabling this option, make sure your PoSt workers work correctly.`, + }, + { + Name: "DisableBuiltinWinningPoSt", + Type: "bool", + + Comment: `WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. +Before enabling this option, make sure your PoSt workers work correctly.`, }, }, "Pubsub": []DocField{ diff --git a/node/config/storage.go b/node/config/storage.go index 16f688075ee..99ca0df8708 100644 --- a/node/config/storage.go +++ b/node/config/storage.go @@ -67,6 +67,8 @@ func (c *StorageMiner) StorageManager() sealer.Config { Assigner: c.Storage.Assigner, - ParallelCheckLimit: c.Proving.ParallelCheckLimit, + ParallelCheckLimit: c.Proving.ParallelCheckLimit, + DisableBuiltinWindowPoSt: c.Proving.DisableBuiltinWindowPoSt, + DisableBuiltinWinningPoSt: c.Proving.DisableBuiltinWinningPoSt, } } diff --git a/node/config/types.go b/node/config/types.go index cd05c8ce5e7..beb9eb4951b 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -221,9 +221,23 @@ type RetrievalPricingDefault struct { type ProvingConfig struct { // Maximum number of sector checks to run in parallel. (0 = unlimited) + // + // WARNING: Setting this value too high may make the node crash by running out of stack + // WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due + // to late submission. ParallelCheckLimit int - // todo disable builtin post + // Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present. + // + // WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need + // to be recovered. Before enabling this option, make sure your PoSt workers work correctly. + DisableBuiltinWindowPoSt bool + + // Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present. + // + // WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. + // Before enabling this option, make sure your PoSt workers work correctly. + DisableBuiltinWinningPoSt bool } type SealingConfig struct { diff --git a/storage/sealer/manager.go b/storage/sealer/manager.go index 47112cbc0f3..3e5e6032bb6 100644 --- a/storage/sealer/manager.go +++ b/storage/sealer/manager.go @@ -69,8 +69,10 @@ type Manager struct { workLk sync.Mutex work *statestore.StateStore - parallelCheckLimit int - disallowRemoteFinalize bool + parallelCheckLimit int + disableBuiltinWindowPoSt bool + disableBuiltinWinningPoSt bool + disallowRemoteFinalize bool callToWork map[storiface.CallID]WorkID // used when we get an early return and there's no callToWork mapping @@ -120,7 +122,9 @@ type Config struct { ResourceFiltering ResourceFilteringStrategy // PoSt config - ParallelCheckLimit int + ParallelCheckLimit int + DisableBuiltinWindowPoSt bool + DisableBuiltinWinningPoSt bool DisallowRemoteFinalize bool @@ -156,8 +160,10 @@ func New(ctx context.Context, lstor *paths.Local, stor paths.Store, ls paths.Loc localProver: prover, - parallelCheckLimit: sc.ParallelCheckLimit, - disallowRemoteFinalize: sc.DisallowRemoteFinalize, + parallelCheckLimit: sc.ParallelCheckLimit, + disableBuiltinWindowPoSt: sc.DisableBuiltinWindowPoSt, + disableBuiltinWinningPoSt: sc.DisableBuiltinWinningPoSt, + disallowRemoteFinalize: sc.DisallowRemoteFinalize, work: mss, callToWork: map[storiface.CallID]WorkID{}, diff --git a/storage/sealer/manager_post.go b/storage/sealer/manager_post.go index e88807a29f0..a4b812f8fb6 100644 --- a/storage/sealer/manager_post.go +++ b/storage/sealer/manager_post.go @@ -17,7 +17,9 @@ import ( ) func (m *Manager) GenerateWinningPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.ExtendedSectorInfo, randomness abi.PoStRandomness) ([]proof.PoStProof, error) { - if !m.winningPoStSched.CanSched(ctx) { + if !m.disableBuiltinWinningPoSt && !m.winningPoStSched.CanSched(ctx) { + // if builtin PoSt isn't disabled, and there are no workers, compute the PoSt locally + log.Info("GenerateWinningPoSt run at lotus-miner") return m.localProver.GenerateWinningPoSt(ctx, minerID, sectorInfo, randomness) } @@ -76,7 +78,9 @@ func (m *Manager) generateWinningPoSt(ctx context.Context, minerID abi.ActorID, } func (m *Manager) GenerateWindowPoSt(ctx context.Context, minerID abi.ActorID, sectorInfo []proof.ExtendedSectorInfo, randomness abi.PoStRandomness) (proof []proof.PoStProof, skipped []abi.SectorID, err error) { - if !m.windowPoStSched.CanSched(ctx) { + if !m.disableBuiltinWindowPoSt && !m.windowPoStSched.CanSched(ctx) { + // if builtin PoSt isn't disabled, and there are no workers, compute the PoSt locally + log.Info("GenerateWindowPoSt run at lotus-miner") return m.localProver.GenerateWindowPoSt(ctx, minerID, sectorInfo, randomness) } @@ -230,11 +234,9 @@ func (m *Manager) generatePartitionWindowPost(ctx context.Context, spt abi.Regis } func (m *Manager) GenerateWinningPoStWithVanilla(ctx context.Context, proofType abi.RegisteredPoStProof, minerID abi.ActorID, randomness abi.PoStRandomness, proofs [][]byte) ([]proof.PoStProof, error) { - //TODO implement me - panic("implement me") + panic("worker-level api shouldn't be called at this level") } func (m *Manager) GenerateWindowPoStWithVanilla(ctx context.Context, proofType abi.RegisteredPoStProof, minerID abi.ActorID, randomness abi.PoStRandomness, proofs [][]byte, partitionIdx int) (proof.PoStProof, error) { - //TODO implement me - panic("implement me") + panic("worker-level api shouldn't be called at this level") } From 59f3161fd63bfe3c048014d630cd9cb23de4f0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 1 Jul 2022 22:20:05 +0200 Subject: [PATCH 3/6] wdpost: Config for disabling sector prechecks --- node/builder_miner.go | 2 +- node/config/doc_gen.go | 16 +++++++++++----- node/config/types.go | 32 ++++++++++++++++++++++++++++++++ node/modules/storageminer.go | 4 ++-- storage/wdpost/wdpost_run.go | 10 ++++++++-- storage/wdpost/wdpost_sched.go | 3 +++ 6 files changed, 57 insertions(+), 10 deletions(-) diff --git a/node/builder_miner.go b/node/builder_miner.go index 8c78091dc54..b72b237618b 100644 --- a/node/builder_miner.go +++ b/node/builder_miner.go @@ -117,7 +117,7 @@ func ConfigStorageMiner(c interface{}) Option { Override(new(*miner.Miner), modules.SetupBlockProducer), Override(new(gen.WinningPoStProver), storage.NewWinningPoStProver), Override(new(*storage.Miner), modules.StorageMiner(cfg.Fees)), - Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(cfg.Fees)), + Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler(cfg.Fees, cfg.Proving)), Override(new(sectorblocks.SectorBuilder), From(new(*storage.Miner))), ), diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index 22c3bdbfa84..bea7c197320 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -627,16 +627,15 @@ over the worker address if this flag is set.`, Name: "ParallelCheckLimit", Type: "int", - Comment: `WARNING: Setting this value too high may make the node crash by running out of stack -WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due -to late submission.`, + Comment: `After changing this option, confirm that the new value works in your setup by invoking +'lotus-miner proving compute window-post 0'`, }, { Name: "DisableBuiltinWindowPoSt", Type: "bool", - Comment: `WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need -to be recovered. Before enabling this option, make sure your PoSt workers work correctly.`, + Comment: `After changing this option, confirm that the new value works in your setup by invoking +'lotus-miner proving compute window-post 0'`, }, { Name: "DisableBuiltinWinningPoSt", @@ -645,6 +644,13 @@ to be recovered. Before enabling this option, make sure your PoSt workers work c Comment: `WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. Before enabling this option, make sure your PoSt workers work correctly.`, }, + { + Name: "DisableWDPoStPreChecks", + Type: "bool", + + Comment: `After changing this option, confirm that the new value works in your setup by invoking +'lotus-miner proving compute window-post 0'`, + }, }, "Pubsub": []DocField{ { diff --git a/node/config/types.go b/node/config/types.go index beb9eb4951b..9a1dcbe00a0 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -225,12 +225,18 @@ type ProvingConfig struct { // WARNING: Setting this value too high may make the node crash by running out of stack // WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due // to late submission. + // + // After changing this option, confirm that the new value works in your setup by invoking + // 'lotus-miner proving compute window-post 0' ParallelCheckLimit int // Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present. // // WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need // to be recovered. Before enabling this option, make sure your PoSt workers work correctly. + // + // After changing this option, confirm that the new value works in your setup by invoking + // 'lotus-miner proving compute window-post 0' DisableBuiltinWindowPoSt bool // Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present. @@ -238,6 +244,32 @@ type ProvingConfig struct { // WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. // Before enabling this option, make sure your PoSt workers work correctly. DisableBuiltinWinningPoSt bool + + // Disable WindowPoSt provable sector readability checks. + // + // In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges + // from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as + // we're only interested in checkdng that sector data can be read. + // + // When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process + // can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by + // the builtin logic not skipping snark computation when some sectors need to be skipped. + // + // When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and + // if challenges for some sectors aren't readable, those sectors will just get skipped. + // + // Disabling sector pre-checks will slightly requice IO load when proving sectors, possibly resulting in shorter + // time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should + // be negligible. + // + // NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers. + // + // NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is + // sent to the chain + // + // After changing this option, confirm that the new value works in your setup by invoking + // 'lotus-miner proving compute window-post 0' + DisableWDPoStPreChecks bool } type SealingConfig struct { diff --git a/node/modules/storageminer.go b/node/modules/storageminer.go index 3e538247771..94d36547454 100644 --- a/node/modules/storageminer.go +++ b/node/modules/storageminer.go @@ -254,7 +254,7 @@ func StorageMiner(fc config.MinerFeeConfig) func(params StorageMinerParams) (*st } } -func WindowPostScheduler(fc config.MinerFeeConfig) func(params StorageMinerParams) (*wdpost.WindowPoStScheduler, error) { +func WindowPostScheduler(fc config.MinerFeeConfig, pc config.ProvingConfig) func(params StorageMinerParams) (*wdpost.WindowPoStScheduler, error) { return func(params StorageMinerParams) (*wdpost.WindowPoStScheduler, error) { var ( mctx = params.MetricsCtx @@ -269,7 +269,7 @@ func WindowPostScheduler(fc config.MinerFeeConfig) func(params StorageMinerParam ctx := helpers.LifecycleCtx(mctx, lc) - fps, err := wdpost.NewWindowedPoStScheduler(api, fc, as, sealer, verif, sealer, j, maddr) + fps, err := wdpost.NewWindowedPoStScheduler(api, fc, pc, as, sealer, verif, sealer, j, maddr) if err != nil { return nil, err } diff --git a/storage/wdpost/wdpost_run.go b/storage/wdpost/wdpost_run.go index 1a77831cb1c..85464ce2ddf 100644 --- a/storage/wdpost/wdpost_run.go +++ b/storage/wdpost/wdpost_run.go @@ -335,9 +335,15 @@ func (s *WindowPoStScheduler) runPoStCycle(ctx context.Context, manual bool, di return nil, xerrors.Errorf("adding recoveries to set of sectors to prove: %w", err) } - good, err := s.checkSectors(ctx, toProve, ts.Key()) + good, err := toProve.Copy() if err != nil { - return nil, xerrors.Errorf("checking sectors to skip: %w", err) + return nil, xerrors.Errorf("copy toProve: %w", err) + } + if !s.disablePreChecks { + good, err = s.checkSectors(ctx, toProve, ts.Key()) + if err != nil { + return nil, xerrors.Errorf("checking sectors to skip: %w", err) + } } good, err = bitfield.SubtractBitField(good, postSkipped) diff --git a/storage/wdpost/wdpost_sched.go b/storage/wdpost/wdpost_sched.go index 6ac77d9ba91..66e8f9f1866 100644 --- a/storage/wdpost/wdpost_sched.go +++ b/storage/wdpost/wdpost_sched.go @@ -70,6 +70,7 @@ type WindowPoStScheduler struct { faultTracker sealer.FaultTracker proofType abi.RegisteredPoStProof partitionSectors uint64 + disablePreChecks bool ch *changeHandler actor address.Address @@ -84,6 +85,7 @@ type WindowPoStScheduler struct { // NewWindowedPoStScheduler creates a new WindowPoStScheduler scheduler. func NewWindowedPoStScheduler(api NodeAPI, cfg config.MinerFeeConfig, + pcfg config.ProvingConfig, as *ctladdr.AddressSelector, sp storiface.ProverPoSt, verif storiface.Verifier, @@ -104,6 +106,7 @@ func NewWindowedPoStScheduler(api NodeAPI, faultTracker: ft, proofType: mi.WindowPoStProofType, partitionSectors: mi.WindowPoStPartitionSectors, + disablePreChecks: pcfg.DisableWDPoStPreChecks, actor: actor, evtTypes: [...]journal.EventType{ From d5100f883d586b8a002c5878235d8cab7bd62061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Fri, 1 Jul 2022 22:21:19 +0200 Subject: [PATCH 4/6] cfgdoc-gen: Handle empty lines in comments --- documentation/en/default-lotus-config.toml | 3 ++ .../en/default-lotus-miner-config.toml | 44 +++++++++++++++++ node/config/cfgdocgen/gen.go | 2 +- node/config/doc_gen.go | 48 +++++++++++++++++-- storage/wdpost/wdpost_run_faults.go | 2 + 5 files changed, 93 insertions(+), 6 deletions(-) diff --git a/documentation/en/default-lotus-config.toml b/documentation/en/default-lotus-config.toml index ff4be96c8af..d33abd3c251 100644 --- a/documentation/en/default-lotus-config.toml +++ b/documentation/en/default-lotus-config.toml @@ -15,6 +15,9 @@ [Backup] + # When set to true disables metadata log (.lotus/kvlog). This can save disk + # space by reducing metadata redundancy. + # # Note that in case of metadata corruption it might be much harder to recover # your node if metadata log is disabled # diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index a88782d2b97..984ec08fb7e 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -15,6 +15,9 @@ [Backup] + # When set to true disables metadata log (.lotus/kvlog). This can save disk + # space by reducing metadata redundancy. + # # Note that in case of metadata corruption it might be much harder to recover # your node if metadata log is disabled # @@ -309,21 +312,33 @@ [Proving] + # Maximum number of sector checks to run in parallel. (0 = unlimited) + # # WARNING: Setting this value too high may make the node crash by running out of stack # WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due # to late submission. + # + # After changing this option, confirm that the new value works in your setup by invoking + # 'lotus-miner proving compute window-post 0' # # type: int # env var: LOTUS_PROVING_PARALLELCHECKLIMIT #ParallelCheckLimit = 128 + # Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present. + # # WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need # to be recovered. Before enabling this option, make sure your PoSt workers work correctly. + # + # After changing this option, confirm that the new value works in your setup by invoking + # 'lotus-miner proving compute window-post 0' # # type: bool # env var: LOTUS_PROVING_DISABLEBUILTINWINDOWPOST #DisableBuiltinWindowPoSt = false + # Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present. + # # WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. # Before enabling this option, make sure your PoSt workers work correctly. # @@ -331,6 +346,35 @@ # env var: LOTUS_PROVING_DISABLEBUILTINWINNINGPOST #DisableBuiltinWinningPoSt = false + # Disable WindowPoSt provable sector readability checks. + # + # In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges + # from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as + # we're only interested in checkdng that sector data can be read. + # + # When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process + # can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by + # the builtin logic not skipping snark computation when some sectors need to be skipped. + # + # When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and + # if challenges for some sectors aren't readable, those sectors will just get skipped. + # + # Disabling sector pre-checks will slightly requice IO load when proving sectors, possibly resulting in shorter + # time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should + # be negligible. + # + # NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers. + # + # NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is + # sent to the chain + # + # After changing this option, confirm that the new value works in your setup by invoking + # 'lotus-miner proving compute window-post 0' + # + # type: bool + # env var: LOTUS_PROVING_DISABLEWDPOSTPRECHECKS + #DisableWDPoStPreChecks = false + [Sealing] # Upper bound on how many sectors can be waiting for more deals to be packed in it before it begins sealing at any given time. diff --git a/node/config/cfgdocgen/gen.go b/node/config/cfgdocgen/gen.go index 8d0efb65e6b..5133501523c 100644 --- a/node/config/cfgdocgen/gen.go +++ b/node/config/cfgdocgen/gen.go @@ -53,7 +53,7 @@ func run() error { continue } case stType: - if strings.HasPrefix(line, "// ") { + if strings.HasPrefix(line, "//") { cline := strings.TrimSpace(strings.TrimPrefix(line, "//")) currentComment = append(currentComment, cline) continue diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index bea7c197320..4761ff75347 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -34,7 +34,10 @@ var Doc = map[string][]DocField{ Name: "DisableMetadataLog", Type: "bool", - Comment: `Note that in case of metadata corruption it might be much harder to recover + Comment: `When set to true disables metadata log (.lotus/kvlog). This can save disk +space by reducing metadata redundancy. + +Note that in case of metadata corruption it might be much harder to recover your node if metadata log is disabled`, }, }, @@ -627,28 +630,63 @@ over the worker address if this flag is set.`, Name: "ParallelCheckLimit", Type: "int", - Comment: `After changing this option, confirm that the new value works in your setup by invoking + Comment: `Maximum number of sector checks to run in parallel. (0 = unlimited) + +WARNING: Setting this value too high may make the node crash by running out of stack +WARNING: Setting this value too low may make sector challenge reading much slower, resulting in failed PoSt due +to late submission. + +After changing this option, confirm that the new value works in your setup by invoking 'lotus-miner proving compute window-post 0'`, }, { Name: "DisableBuiltinWindowPoSt", Type: "bool", - Comment: `After changing this option, confirm that the new value works in your setup by invoking + Comment: `Disable Window PoSt computation on the lotus-miner process even if no window PoSt workers are present. + +WARNING: If no windowPoSt workers are connected, window PoSt WILL FAIL resulting in faulty sectors which will need +to be recovered. Before enabling this option, make sure your PoSt workers work correctly. + +After changing this option, confirm that the new value works in your setup by invoking 'lotus-miner proving compute window-post 0'`, }, { Name: "DisableBuiltinWinningPoSt", Type: "bool", - Comment: `WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. + Comment: `Disable Winning PoSt computation on the lotus-miner process even if no winning PoSt workers are present. + +WARNING: If no WinningPoSt workers are connected, Winning PoSt WILL FAIL resulting in lost block rewards. Before enabling this option, make sure your PoSt workers work correctly.`, }, { Name: "DisableWDPoStPreChecks", Type: "bool", - Comment: `After changing this option, confirm that the new value works in your setup by invoking + Comment: `Disable WindowPoSt provable sector readability checks. + +In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges +from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as +we're only interested in checkdng that sector data can be read. + +When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process +can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by +the builtin logic not skipping snark computation when some sectors need to be skipped. + +When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and +if challenges for some sectors aren't readable, those sectors will just get skipped. + +Disabling sector pre-checks will slightly requice IO load when proving sectors, possibly resulting in shorter +time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should +be negligible. + +NOTE: It likely is a bad idea to disable sector pre-checks in setups with no PoSt workers. + +NOTE: Even when this option is enabled, recovering sectors will be checked before recovery declaration message is +sent to the chain + +After changing this option, confirm that the new value works in your setup by invoking 'lotus-miner proving compute window-post 0'`, }, }, diff --git a/storage/wdpost/wdpost_run_faults.go b/storage/wdpost/wdpost_run_faults.go index 06d365fae3b..0eda986fbd0 100644 --- a/storage/wdpost/wdpost_run_faults.go +++ b/storage/wdpost/wdpost_run_faults.go @@ -2,6 +2,7 @@ package wdpost import ( "context" + "github.com/ipfs/go-cid" "go.opencensus.io/trace" "golang.org/x/xerrors" @@ -11,6 +12,7 @@ import ( "github.com/filecoin-project/go-state-types/builtin" "github.com/filecoin-project/go-state-types/builtin/v8/miner" "github.com/filecoin-project/go-state-types/dline" + "github.com/filecoin-project/lotus/api" "github.com/filecoin-project/lotus/build" "github.com/filecoin-project/lotus/chain/actors" From 9a97d83889bea35ae519ddceaff33cd1e731503a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Magiera?= Date: Mon, 4 Jul 2022 16:15:23 +0200 Subject: [PATCH 5/6] wdpost: itests for new config fields --- .circleci/config.yml | 10 ++ itests/wdpost_check_config_test.go | 188 ++++++++++++++++++++++++++ itests/wdpost_worker_config_test.go | 201 ++++++++++++++++++++++++++++ 3 files changed, 399 insertions(+) create mode 100644 itests/wdpost_check_config_test.go create mode 100644 itests/wdpost_worker_config_test.go diff --git a/.circleci/config.yml b/.circleci/config.yml index 52a37a59fa3..914fa2ffddb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -958,6 +958,11 @@ workflows: suite: itest-verifreg target: "./itests/verifreg_test.go" + - test: + name: test-itest-wdpost_check_config + suite: itest-wdpost_check_config + target: "./itests/wdpost_check_config_test.go" + - test: name: test-itest-wdpost_dispute suite: itest-wdpost_dispute @@ -968,6 +973,11 @@ workflows: suite: itest-wdpost target: "./itests/wdpost_test.go" + - test: + name: test-itest-wdpost_worker_config + suite: itest-wdpost_worker_config + target: "./itests/wdpost_worker_config_test.go" + - test: name: test-itest-worker suite: itest-worker diff --git a/itests/wdpost_check_config_test.go b/itests/wdpost_check_config_test.go new file mode 100644 index 00000000000..cc10af04f9f --- /dev/null +++ b/itests/wdpost_check_config_test.go @@ -0,0 +1,188 @@ +package itests + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/itests/kit" + "github.com/filecoin-project/lotus/node" + "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/node/impl" + "github.com/filecoin-project/lotus/node/modules" + "github.com/filecoin-project/lotus/storage/sealer/mock" + "github.com/filecoin-project/lotus/storage/sealer/storiface" + "github.com/filecoin-project/lotus/storage/wdpost" +) + +func TestWindowPostNoPreChecks(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + client, miner, ens := kit.EnsembleMinimal(t, + kit.LatestActorsAt(-1), + kit.MockProofs(), + kit.ConstructorOpts( + node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler( + config.DefaultStorageMiner().Fees, + config.ProvingConfig{ + DisableWDPoStPreChecks: true, + }, + )))) + ens.InterconnectAll().BeginMining(2 * time.Millisecond) + + nSectors := 10 + + miner.PledgeSectors(ctx, nSectors, 0, nil) + + maddr, err := miner.ActorAddress(ctx) + require.NoError(t, err) + di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + mid, err := address.IDFromAddress(maddr) + require.NoError(t, err) + + t.Log("Running one proving period") + waitUntil := di.Open + di.WPoStProvingPeriod + t.Logf("End for head.Height > %d", waitUntil) + + ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + t.Logf("Now head.Height = %d", ts.Height()) + + p, err := client.StateMinerPower(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + ssz, err := miner.ActorSectorSize(ctx, maddr) + require.NoError(t, err) + + require.Equal(t, p.MinerPower, p.TotalPower) + require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(nSectors+kit.DefaultPresealsPerBootstrapMiner))) + + t.Log("Drop some sectors") + + // Drop 2 sectors from deadline 2 partition 0 (full partition / deadline) + { + parts, err := client.StateMinerPartitions(ctx, maddr, 2, types.EmptyTSK) + require.NoError(t, err) + require.Greater(t, len(parts), 0) + + secs := parts[0].AllSectors + n, err := secs.Count() + require.NoError(t, err) + require.Equal(t, uint64(2), n) + + // Drop the partition + err = secs.ForEach(func(sid uint64) error { + return miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkCorrupted(storiface.SectorRef{ + ID: abi.SectorID{ + Miner: abi.ActorID(mid), + Number: abi.SectorNumber(sid), + }, + }, true) + }) + require.NoError(t, err) + } + + var s storiface.SectorRef + + // Drop 1 sectors from deadline 3 partition 0 + { + parts, err := client.StateMinerPartitions(ctx, maddr, 3, types.EmptyTSK) + require.NoError(t, err) + require.Greater(t, len(parts), 0) + + secs := parts[0].AllSectors + n, err := secs.Count() + require.NoError(t, err) + require.Equal(t, uint64(2), n) + + // Drop the sector + sn, err := secs.First() + require.NoError(t, err) + + all, err := secs.All(2) + require.NoError(t, err) + t.Log("the sectors", all) + + s = storiface.SectorRef{ + ID: abi.SectorID{ + Miner: abi.ActorID(mid), + Number: abi.SectorNumber(sn), + }, + } + + err = miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(s, true) + require.NoError(t, err) + } + + di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + t.Log("Go through another PP, wait for sectors to become faulty") + waitUntil = di.Open + di.WPoStProvingPeriod + t.Logf("End for head.Height > %d", waitUntil) + + ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + t.Logf("Now head.Height = %d", ts.Height()) + + p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + require.Equal(t, p.MinerPower, p.TotalPower) + + sectors := p.MinerPower.RawBytePower.Uint64() / uint64(ssz) + require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-3, int(sectors)) // -3 just removed sectors + + t.Log("Recover one sector") + + err = miner.StorageMiner.(*impl.StorageMinerAPI).IStorageMgr.(*mock.SectorMgr).MarkFailed(s, false) + require.NoError(t, err) + + di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + waitUntil = di.Open + di.WPoStProvingPeriod + t.Logf("End for head.Height > %d", waitUntil) + + ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + t.Logf("Now head.Height = %d", ts.Height()) + + p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + require.Equal(t, p.MinerPower, p.TotalPower) + + sectors = p.MinerPower.RawBytePower.Uint64() / uint64(ssz) + require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-2, int(sectors)) // -2 not recovered sectors + + // pledge a sector after recovery + + miner.PledgeSectors(ctx, 1, nSectors, nil) + + { + // Wait until proven. + di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + waitUntil := di.Open + di.WPoStProvingPeriod + t.Logf("End for head.Height > %d\n", waitUntil) + + ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + t.Logf("Now head.Height = %d", ts.Height()) + } + + p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + require.Equal(t, p.MinerPower, p.TotalPower) + + sectors = p.MinerPower.RawBytePower.Uint64() / uint64(ssz) + require.Equal(t, nSectors+kit.DefaultPresealsPerBootstrapMiner-2+1, int(sectors)) // -2 not recovered sectors + 1 just pledged +} diff --git a/itests/wdpost_worker_config_test.go b/itests/wdpost_worker_config_test.go new file mode 100644 index 00000000000..a84896eae1a --- /dev/null +++ b/itests/wdpost_worker_config_test.go @@ -0,0 +1,201 @@ +package itests + +import ( + "context" + "testing" + "time" + + logging "github.com/ipfs/go-log/v2" + "github.com/stretchr/testify/require" + + "github.com/filecoin-project/go-address" + "github.com/filecoin-project/go-state-types/abi" + + "github.com/filecoin-project/lotus/chain/types" + "github.com/filecoin-project/lotus/itests/kit" + "github.com/filecoin-project/lotus/node" + "github.com/filecoin-project/lotus/node/config" + "github.com/filecoin-project/lotus/node/impl" + "github.com/filecoin-project/lotus/node/modules" + "github.com/filecoin-project/lotus/storage/sealer" + "github.com/filecoin-project/lotus/storage/sealer/sealtasks" + "github.com/filecoin-project/lotus/storage/sealer/storiface" + "github.com/filecoin-project/lotus/storage/wdpost" +) + +func TestWindowPostNoBuiltinWindow(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _ = logging.SetLogLevel("storageminer", "INFO") + + sectors := 2 * 48 * 2 + + client, miner, _, ens := kit.EnsembleWorker(t, + kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines + kit.LatestActorsAt(-1), + kit.ConstructorOpts( + node.Override(new(sealer.Config), func() sealer.Config { + c := config.DefaultStorageMiner().StorageManager() + c.DisableBuiltinWindowPoSt = true + return c + }), + node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler( + config.DefaultStorageMiner().Fees, + config.ProvingConfig{ + DisableWDPoStPreChecks: false, + }, + ))), + kit.ThroughRPC()) // generic non-post worker + + maddr, err := miner.ActorAddress(ctx) + require.NoError(t, err) + + di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0] + + di = di.NextNotElapsed() + + t.Log("Running one proving period") + waitUntil := di.Open + di.WPoStChallengeWindow*2 + wdpost.SubmitConfidence + client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + + t.Log("Waiting for post message") + bm.Stop() + + var lastPending []*types.SignedMessage + for i := 0; i < 500; i++ { + lastPending, err = client.MpoolPending(ctx, types.EmptyTSK) + require.NoError(t, err) + + if len(lastPending) > 0 { + break + } + time.Sleep(10 * time.Millisecond) + } + + // expect no post messages + require.Equal(t, len(lastPending), 0) +} + +func TestWindowPostNoBuiltinWindowWithWorker(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + _ = logging.SetLogLevel("storageminer", "INFO") + + sectors := 2 * 48 * 2 + + client, miner, _, ens := kit.EnsembleWorker(t, + kit.PresealSectors(sectors), // 2 sectors per partition, 2 partitions in all 48 deadlines + kit.LatestActorsAt(-1), + kit.ConstructorOpts( + node.Override(new(sealer.Config), func() sealer.Config { + c := config.DefaultStorageMiner().StorageManager() + c.DisableBuiltinWindowPoSt = true + return c + }), + node.Override(new(*wdpost.WindowPoStScheduler), modules.WindowPostScheduler( + config.DefaultStorageMiner().Fees, + config.ProvingConfig{ + DisableBuiltinWindowPoSt: true, + DisableBuiltinWinningPoSt: false, + DisableWDPoStPreChecks: false, + }, + ))), + kit.ThroughRPC(), + kit.WithTaskTypes([]sealtasks.TaskType{sealtasks.TTGenerateWindowPoSt})) + + maddr, err := miner.ActorAddress(ctx) + require.NoError(t, err) + + di, err := client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + bm := ens.InterconnectAll().BeginMining(2 * time.Millisecond)[0] + + di = di.NextNotElapsed() + + t.Log("Running one proving period") + waitUntil := di.Open + di.WPoStChallengeWindow*2 + wdpost.SubmitConfidence + client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + + t.Log("Waiting for post message") + bm.Stop() + + var lastPending []*types.SignedMessage + for i := 0; i < 500; i++ { + lastPending, err = client.MpoolPending(ctx, types.EmptyTSK) + require.NoError(t, err) + + if len(lastPending) > 0 { + break + } + time.Sleep(40 * time.Millisecond) + } + + require.Greater(t, len(lastPending), 0) + + t.Log("post message landed") + + bm.MineBlocks(ctx, 2*time.Millisecond) + + waitUntil = di.Open + di.WPoStChallengeWindow*3 + t.Logf("End for head.Height > %d", waitUntil) + + ts := client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + t.Logf("Now head.Height = %d", ts.Height()) + + p, err := client.StateMinerPower(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + ssz, err := miner.ActorSectorSize(ctx, maddr) + require.NoError(t, err) + + require.Equal(t, p.MinerPower, p.TotalPower) + require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(sectors))) + + mid, err := address.IDFromAddress(maddr) + require.NoError(t, err) + + di, err = client.StateMinerProvingDeadline(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + // Remove one sector in the next deadline (so it's skipped) + { + parts, err := client.StateMinerPartitions(ctx, maddr, di.Index+1, types.EmptyTSK) + require.NoError(t, err) + require.Greater(t, len(parts), 0) + + secs := parts[0].AllSectors + n, err := secs.Count() + require.NoError(t, err) + require.Equal(t, uint64(2), n) + + // Drop the sector + sid, err := secs.First() + require.NoError(t, err) + + t.Logf("Drop sector %d; dl %d part %d", sid, di.Index+1, 0) + + err = miner.BaseAPI.(*impl.StorageMinerAPI).IStorageMgr.Remove(ctx, storiface.SectorRef{ + ID: abi.SectorID{ + Miner: abi.ActorID(mid), + Number: abi.SectorNumber(sid), + }, + }) + require.NoError(t, err) + } + + waitUntil = di.Close + di.WPoStChallengeWindow + ts = client.WaitTillChain(ctx, kit.HeightAtLeast(waitUntil)) + t.Logf("Now head.Height = %d", ts.Height()) + + p, err = client.StateMinerPower(ctx, maddr, types.EmptyTSK) + require.NoError(t, err) + + require.Equal(t, p.MinerPower, p.TotalPower) + require.Equal(t, p.MinerPower.RawBytePower, types.NewInt(uint64(ssz)*uint64(sectors-1))) +} From b35a3258166cb6e9452211b95c2c8e6cfc0bdf69 Mon Sep 17 00:00:00 2001 From: Aayush Rajasekaran Date: Mon, 4 Jul 2022 11:17:00 -0400 Subject: [PATCH 6/6] Fixup typos --- documentation/en/default-lotus-miner-config.toml | 4 ++-- node/config/doc_gen.go | 4 ++-- node/config/types.go | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/documentation/en/default-lotus-miner-config.toml b/documentation/en/default-lotus-miner-config.toml index 984ec08fb7e..b3992690499 100644 --- a/documentation/en/default-lotus-miner-config.toml +++ b/documentation/en/default-lotus-miner-config.toml @@ -350,7 +350,7 @@ # # In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges # from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as - # we're only interested in checkdng that sector data can be read. + # we're only interested in checking that sector data can be read. # # When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process # can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by @@ -359,7 +359,7 @@ # When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and # if challenges for some sectors aren't readable, those sectors will just get skipped. # - # Disabling sector pre-checks will slightly requice IO load when proving sectors, possibly resulting in shorter + # Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter # time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should # be negligible. # diff --git a/node/config/doc_gen.go b/node/config/doc_gen.go index 4761ff75347..8e35af782f5 100644 --- a/node/config/doc_gen.go +++ b/node/config/doc_gen.go @@ -668,7 +668,7 @@ Before enabling this option, make sure your PoSt workers work correctly.`, In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as -we're only interested in checkdng that sector data can be read. +we're only interested in checking that sector data can be read. When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by @@ -677,7 +677,7 @@ the builtin logic not skipping snark computation when some sectors need to be sk When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and if challenges for some sectors aren't readable, those sectors will just get skipped. -Disabling sector pre-checks will slightly requice IO load when proving sectors, possibly resulting in shorter +Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should be negligible. diff --git a/node/config/types.go b/node/config/types.go index 9a1dcbe00a0..f9272794226 100644 --- a/node/config/types.go +++ b/node/config/types.go @@ -249,7 +249,7 @@ type ProvingConfig struct { // // In normal operation, when preparing to compute WindowPoSt, lotus-miner will perform a round of reading challenges // from all sectors to confirm that those sectors can be proven. Challenges read in this process are discarded, as - // we're only interested in checkdng that sector data can be read. + // we're only interested in checking that sector data can be read. // // When using builtin proof computation (no PoSt workers, and DisableBuiltinWindowPoSt is set to false), this process // can save a lot of time and compute resources in the case that some sectors are not readable - this is caused by @@ -258,7 +258,7 @@ type ProvingConfig struct { // When using PoSt workers, this process is mostly redundant, with PoSt workers challenges will be read once, and // if challenges for some sectors aren't readable, those sectors will just get skipped. // - // Disabling sector pre-checks will slightly requice IO load when proving sectors, possibly resulting in shorter + // Disabling sector pre-checks will slightly reduce IO load when proving sectors, possibly resulting in shorter // time to produce window PoSt. In setups with good IO capabilities the effect of this option on proving time should // be negligible. //