Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

speedup SiStripClusterizer(FromRaw) using ThreeThresholdAlgorithm #47061

Merged
merged 4 commits into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@ class SiStripClusterizerConditions {

struct Det {
bool valid() const { return ind != invalidI; }
uint16_t rawNoise(const uint16_t strip) const { return SiStripNoises::getRawNoise(strip, noiseRange); }
float noise(const uint16_t strip) const { return SiStripNoises::getNoise(strip, noiseRange); }
uint16_t rawNoise(const uint16_t strip) const { return rawNoises[strip]; }
float noise(const uint16_t strip) const { return 0.1f * float(rawNoises[strip]); }
float weight(const uint16_t strip) const { return m_weight[strip / 128]; }
bool bad(const uint16_t strip) const { return quality->IsStripBad(qualityRange, strip); }
bool bad(const uint16_t strip) const { return qualityBits[strip]; }
bool allBadBetween(uint16_t L, const uint16_t& R) const {
while (++L < R && bad(L)) {
};
return L == R;
}
static constexpr uint16_t kMaxStrips = 768;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could one think about sparing some empty entries by using vectors and resizing at construction time with the correct number of strips (4 APVs vs 6 APVs)? The size is known via the detid.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mmusich
do you happen to know if the number of APVs is strictly connected to the already available variables in the SiStripClusterizerConditions::emplace_back method: invGains or connections ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think invGains.size() should know about it:

std::vector<float> invGains;
invGains.reserve(6);
std::transform(
gainRange.first, gainRange.second, std::back_inserter(invGains), [](auto gain) { return 1.f / gain; });

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

using vectors

I expect that access to vector<bool> is slower than array<bool. I guess I can use vector<uint8_t> or char to have a matching performance

SiStripQuality const* quality;
SiStripNoises::Range noiseRange;
SiStripQuality::Range qualityRange;
std::array<bool, kMaxStrips> qualityBits = {};
SiStripNoises::Range noiseRange;
std::array<uint16_t, kMaxStrips> rawNoises = {};
float m_weight[6];
uint32_t detId = 0;
unsigned short ind = invalidI;
Expand Down Expand Up @@ -72,7 +75,15 @@ class SiStripClusterizerConditions {
auto& det = m_dets.emplace_back();
det.quality = m_quality;
det.qualityRange = qualityRange;
for (uint16_t s = 0U; s < det.kMaxStrips; ++s)
det.qualityBits[s] = m_quality->IsStripBad(qualityRange, s);
det.noiseRange = noiseRange;
auto maxRange8 = (noiseRange.second - noiseRange.first) * 8;
for (uint16_t s = 0U; s < det.kMaxStrips; ++s) {
if (9 * s >= maxRange8)
break;
det.rawNoises[s] = SiStripNoises::getRawNoise(s, noiseRange);
}
for (uint32_t i = 0; i != invGains.size(); ++i) {
det.m_weight[i] = invGains[i];
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#ifndef RecoLocalTracker_SiStripClusterizer_ThreeThresholdAlgorithm_h
#define RecoLocalTracker_SiStripClusterizer_ThreeThresholdAlgorithm_h

#include "DataFormats/SiStripCluster/interface/SiStripCluster.h"
#include "DataFormats/SiStripCluster/interface/SiStripClusterTools.h"
#include "DataFormats/SiStripDigi/interface/SiStripDigi.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "RecoLocalTracker/SiStripClusterizer/interface/StripClusterizerAlgorithm.h"
#include "RecoLocalTracker/SiStripClusterizer/interface/SiStripApvShotCleaner.h"

#include <cmath>
#include <numeric>

class ThreeThresholdAlgorithm final : public StripClusterizerAlgorithm {
friend class StripClusterizerAlgorithmFactory;

Expand Down Expand Up @@ -64,4 +72,112 @@ class ThreeThresholdAlgorithm final : public StripClusterizerAlgorithm {
float minGoodCharge;
};

template <class digiDetSet>
inline void ThreeThresholdAlgorithm::clusterizeDetUnit_(const digiDetSet& digis, output_t::TSFastFiller& output) const {
const auto& cond = conditions();
if (cond.isModuleBad(digis.detId()))
return;

auto const& det = cond.findDetId(digis.detId());
if (!det.valid())
return;

#ifdef EDM_ML_DEBUG
if (!cond.isModuleUsable(digis.detId()))
edm::LogWarning("ThreeThresholdAlgorithm") << " id " << digis.detId() << " not usable???" << std::endl;
#endif

typename digiDetSet::const_iterator scan(digis.begin()), end(digis.end());

SiStripApvShotCleaner ApvCleaner;
if (RemoveApvShots) {
ApvCleaner.clean(digis, scan, end);
}

output.reserve(16);
State state(det);
while (scan != end) {
while (scan != end && !candidateEnded(state, scan->strip()))
addToCandidate(state, *scan++);
endCandidate(state, output);
}
}

inline bool ThreeThresholdAlgorithm::candidateEnded(State const& state, const uint16_t& testStrip) const {
uint16_t holes = testStrip - state.lastStrip - 1;
return (((!state.ADCs.empty()) & // a candidate exists, and
(holes > MaxSequentialHoles) // too many holes if not all are bad strips, and
) &&
(holes > MaxSequentialBad || // (too many bad strips anyway, or
!state.det().allBadBetween(state.lastStrip, testStrip) // not all holes are bad strips)
));
}

inline void ThreeThresholdAlgorithm::addToCandidate(State& state, uint16_t strip, uint8_t adc) const {
float Noise = state.det().noise(strip);
if (adc < static_cast<uint8_t>(Noise * ChannelThreshold) || state.det().bad(strip))
return;

if (state.candidateLacksSeed)
state.candidateLacksSeed = adc < static_cast<uint8_t>(Noise * SeedThreshold);
if (state.ADCs.empty())
state.lastStrip = strip - 1; // begin candidate
while (++state.lastStrip < strip)
state.ADCs.push_back(0); // pad holes

if (state.ADCs.size() <= MaxClusterSize)
state.ADCs.push_back(adc);
state.noiseSquared += Noise * Noise;
}

inline void ThreeThresholdAlgorithm::clusterizeDetUnit(const edmNew::DetSet<SiStripDigi>& digis,
output_t::TSFastFiller& output) const {
clusterizeDetUnit_(digis, output);
}

template <class T>
inline void ThreeThresholdAlgorithm::endCandidate(State& state, T& out) const {
if (candidateAccepted(state)) {
applyGains(state);
if (MaxAdjacentBad > 0)
appendBadNeighbors(state);
if (minGoodCharge <= 0 ||
siStripClusterTools::chargePerCM(state.det().detId, state.ADCs.begin(), state.ADCs.end()) > minGoodCharge)
out.push_back(std::move(SiStripCluster(firstStrip(state), state.ADCs.begin(), state.ADCs.end())));
}
clearCandidate(state);
}

inline bool ThreeThresholdAlgorithm::candidateAccepted(State const& state) const {
return (!state.candidateLacksSeed && state.ADCs.size() <= MaxClusterSize &&
state.noiseSquared * ClusterThresholdSquared <=
std::pow(float(std::accumulate(state.ADCs.begin(), state.ADCs.end(), int(0))), 2.f));
}

inline void ThreeThresholdAlgorithm::applyGains(State& state) const {
uint16_t strip = firstStrip(state);
for (auto& adc : state.ADCs) {
#ifdef EDM_ML_DEBUG
// if(adc > 255) throw InvalidChargeException( SiStripDigi(strip,adc) );
#endif
// if(adc > 253) continue; //saturated, do not scale
auto charge = int(float(adc) * state.det().weight(strip++) + 0.5f); //adding 0.5 turns truncation into rounding
if (adc < 254)
adc = (charge > 1022 ? 255 : (charge > 253 ? 254 : charge));
}
}

inline void ThreeThresholdAlgorithm::appendBadNeighbors(State& state) const {
uint8_t max = MaxAdjacentBad;
while (0 < max--) {
if (state.det().bad(firstStrip(state) - 1)) {
state.ADCs.insert(state.ADCs.begin(), 0);
}
if (state.det().bad(state.lastStrip + 1)) {
state.ADCs.push_back(0);
state.lastStrip++;
}
}
}

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "RecoLocalTracker/SiStripZeroSuppression/interface/SiStripRawProcessingFactory.h"

#include "RecoLocalTracker/SiStripClusterizer/interface/StripClusterizerAlgorithm.h"
#include "RecoLocalTracker/SiStripClusterizer/interface/ThreeThresholdAlgorithm.h"
#include "RecoLocalTracker/SiStripZeroSuppression/interface/SiStripRawProcessingAlgorithms.h"

#include "DataFormats/SiStripCluster/interface/SiStripCluster.h"
Expand Down Expand Up @@ -88,10 +89,11 @@ namespace {
return buffer;
}

template <class AlgoT>
class ClusterFiller final : public StripClusterizerAlgorithm::output_t::Getter {
public:
ClusterFiller(const FEDRawDataCollection& irawColl,
StripClusterizerAlgorithm& iclusterizer,
const AlgoT& iclusterizer,
SiStripRawProcessingAlgorithms& irawAlgos,
bool idoAPVEmulatorCheck,
bool legacy,
Expand All @@ -118,7 +120,7 @@ namespace {

const FEDRawDataCollection& rawColl;

StripClusterizerAlgorithm& clusterizer;
const AlgoT& clusterizer;
const SiStripClusterizerConditions& conditions;
SiStripRawProcessingAlgorithms& rawAlgos;

Expand Down Expand Up @@ -180,7 +182,7 @@ class SiStripClusterizerFromRaw final : public edm::stream::EDProducer<> {
legacy_(conf.getParameter<bool>("LegacyUnpacker")),
hybridZeroSuppressed_(conf.getParameter<bool>("HybridZeroSuppressed")) {
productToken_ = consumes<FEDRawDataCollection>(conf.getParameter<edm::InputTag>("ProductLabel"));
produces<edmNew::DetSetVector<SiStripCluster> >();
produces<edmNew::DetSetVector<SiStripCluster>>();
assert(clusterizer_.get());
assert(rawAlgos_.get());
}
Expand All @@ -192,12 +194,23 @@ class SiStripClusterizerFromRaw final : public edm::stream::EDProducer<> {
edm::Handle<FEDRawDataCollection> rawData;
ev.getByToken(productToken_, rawData);

std::unique_ptr<edmNew::DetSetVector<SiStripCluster> > output(
onDemand ? new edmNew::DetSetVector<SiStripCluster>(
std::shared_ptr<edmNew::DetSetVector<SiStripCluster>::Getter>(std::make_shared<ClusterFiller>(
*rawData, *clusterizer_, *rawAlgos_, doAPVEmulatorCheck_, legacy_, hybridZeroSuppressed_)),
clusterizer_->conditions().allDetIds())
: new edmNew::DetSetVector<SiStripCluster>());
const ThreeThresholdAlgorithm* clusterizer3 = dynamic_cast<const ThreeThresholdAlgorithm*>(clusterizer_.get());
std::unique_ptr<edmNew::DetSetVector<SiStripCluster>> output;
if (onDemand) {
if (clusterizer3 == nullptr)
output = std::make_unique<edmNew::DetSetVector<SiStripCluster>>(edmNew::DetSetVector<SiStripCluster>(
std::shared_ptr<edmNew::DetSetVector<SiStripCluster>::Getter>(
std::make_shared<ClusterFiller<StripClusterizerAlgorithm>>(
*rawData, *clusterizer_, *rawAlgos_, doAPVEmulatorCheck_, legacy_, hybridZeroSuppressed_)),
clusterizer_->conditions().allDetIds()));
else
output = std::make_unique<edmNew::DetSetVector<SiStripCluster>>(edmNew::DetSetVector<SiStripCluster>(
std::shared_ptr<edmNew::DetSetVector<SiStripCluster>::Getter>(
std::make_shared<ClusterFiller<ThreeThresholdAlgorithm>>(
*rawData, *clusterizer3, *rawAlgos_, doAPVEmulatorCheck_, legacy_, hybridZeroSuppressed_)),
clusterizer_->conditions().allDetIds()));
} else
output = std::make_unique<edmNew::DetSetVector<SiStripCluster>>(edmNew::DetSetVector<SiStripCluster>());

if (onDemand)
assert(output->onDemand());
Expand Down Expand Up @@ -265,20 +278,38 @@ void SiStripClusterizerFromRaw::initialize(const edm::EventSetup& es) {
}

void SiStripClusterizerFromRaw::run(const FEDRawDataCollection& rawColl, edmNew::DetSetVector<SiStripCluster>& output) {
ClusterFiller filler(rawColl, *clusterizer_, *rawAlgos_, doAPVEmulatorCheck_, legacy_, hybridZeroSuppressed_);
const ThreeThresholdAlgorithm* clusterizer3 = dynamic_cast<const ThreeThresholdAlgorithm*>(clusterizer_.get());
if (clusterizer3 == nullptr) {
ClusterFiller<StripClusterizerAlgorithm> filler(
rawColl, *clusterizer_, *rawAlgos_, doAPVEmulatorCheck_, legacy_, hybridZeroSuppressed_);

// loop over good det in cabling
for (auto idet : clusterizer_->conditions().allDetIds()) {
StripClusterizerAlgorithm::output_t::TSFastFiller record(output, idet);
// loop over good det in cabling
for (auto idet : clusterizer_->conditions().allDetIds()) {
StripClusterizerAlgorithm::output_t::TSFastFiller record(output, idet);

filler.fill(record);
filler.fill(record);

if (record.empty())
record.abort();
} // end loop over dets
if (record.empty())
record.abort();
} // end loop over dets
} else {
ClusterFiller<ThreeThresholdAlgorithm> filler(
rawColl, *clusterizer3, *rawAlgos_, doAPVEmulatorCheck_, legacy_, hybridZeroSuppressed_);

// loop over good det in cabling
for (auto idet : clusterizer_->conditions().allDetIds()) {
StripClusterizerAlgorithm::output_t::TSFastFiller record(output, idet);

filler.fill(record);

if (record.empty())
record.abort();
} // end loop over dets
}
}

namespace {
template <class AlgoT>
class StripByStripAdder {
public:
typedef std::output_iterator_tag iterator_category;
Expand All @@ -287,7 +318,7 @@ namespace {
typedef void pointer;
typedef void reference;

StripByStripAdder(StripClusterizerAlgorithm& clusterizer,
StripByStripAdder(const AlgoT& clusterizer,
StripClusterizerAlgorithm::State& state,
StripClusterizerAlgorithm::output_t::TSFastFiller& record)
: clusterizer_(clusterizer), state_(state), record_(record) {}
Expand All @@ -302,7 +333,7 @@ namespace {
StripByStripAdder& operator++(int) { return *this; }

private:
StripClusterizerAlgorithm& clusterizer_;
const AlgoT& clusterizer_;
StripClusterizerAlgorithm::State& state_;
StripClusterizerAlgorithm::output_t::TSFastFiller& record_;
};
Expand All @@ -325,7 +356,8 @@ namespace {
};
} // namespace

void ClusterFiller::fill(StripClusterizerAlgorithm::output_t::TSFastFiller& record) const {
template <class AlgoT>
void ClusterFiller<AlgoT>::fill(StripClusterizerAlgorithm::output_t::TSFastFiller& record) const {
try { // edmNew::CapacityExaustedException
incReady();

Expand Down Expand Up @@ -392,7 +424,7 @@ void ClusterFiller::fill(StripClusterizerAlgorithm::output_t::TSFastFiller& reco

using namespace sistrip;
if LIKELY (fedchannelunpacker::isZeroSuppressed(mode, legacy_, lmode)) {
auto perStripAdder = StripByStripAdder(clusterizer, state, record);
auto perStripAdder = StripByStripAdder<AlgoT>(clusterizer, state, record);
const auto isNonLite = fedchannelunpacker::isNonLiteZS(mode, legacy_, lmode);
const uint8_t pCode = (isNonLite ? buffer->packetCode(legacy_, fedCh) : 0);
auto st_ch = fedchannelunpacker::StatusCode::SUCCESS;
Expand Down
Loading