Skip to content

Commit 24690bd

Browse files
authored
Merge pull request #47269 from hqucms/dev/hgc-scaling-test
[HGCAL] Fix the implementation of scaling test in HGCalRecHitProducers
2 parents 7c570bf + fd7399c commit 24690bd

File tree

2 files changed

+69
-17
lines changed

2 files changed

+69
-17
lines changed

RecoLocalCalo/HGCalRecAlgos/plugins/alpaka/HGCalRecHitCalibrationAlgorithms.dev.cc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,6 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
122122
HGCalCalibParamDevice const& device_calib,
123123
HGCalConfigParamDevice const& device_config) const {
124124
LogDebug("HGCalRecHitCalibrationAlgorithms") << "\n\nINFO -- Start of calibrate\n\n" << std::endl;
125-
LogDebug("HGCalRecHitCalibrationAlgorithms")
126-
<< "N blocks: " << n_blocks_ << "\tN threads: " << n_threads_ << std::endl;
127-
auto grid = make_workdiv<Acc1D>(n_blocks_, n_threads_);
128125

129126
LogDebug("HGCalRecHitCalibrationAlgorithms") << "\n\nINFO -- Copying the digis to the device\n\n" << std::endl;
130127
HGCalDigiDevice device_digis(host_digis.view().metadata().size(), queue);
@@ -134,6 +131,16 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
134131
<< "\n\nINFO -- Allocating rechits buffer and initiating values" << std::endl;
135132
HGCalRecHitDevice device_recHits(device_digis.view().metadata().size(), queue);
136133

134+
// number of items per group
135+
uint32_t items = n_threads_;
136+
// use as many groups as needed to cover the whole problem
137+
uint32_t groups = divide_up_by(device_digis.view().metadata().size(), items);
138+
// map items to
139+
// - threads with a single element per thread on a GPU backend
140+
// - elements within a single thread on a CPU backend
141+
auto grid = make_workdiv<Acc1D>(groups, items);
142+
LogDebug("HGCalRecHitCalibrationAlgorithms") << "N groups: " << groups << "\tN items: " << items << std::endl;
143+
137144
alpaka::exec<Acc1D>(queue,
138145
grid,
139146
HGCalRecHitCalibrationKernel_flagRecHits{},

RecoLocalCalo/HGCalRecAlgos/plugins/alpaka/HGCalRecHitProducers.cc

Lines changed: 59 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
#include "CondFormats/HGCalObjects/interface/alpaka/HGCalCalibrationParameterDevice.h"
3434
#include "RecoLocalCalo/HGCalRecAlgos/interface/alpaka/HGCalRecHitCalibrationAlgorithms.h"
3535

36+
// flag to assist the computational performance test
37+
// #define HGCAL_PERF_TEST
38+
3639
namespace ALPAKA_ACCELERATOR_NAMESPACE {
3740

3841
using namespace cms::alpakatools;
@@ -47,11 +50,11 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
4750
edm::ESWatcher<HGCalElectronicsMappingRcd> calibWatcher_;
4851
edm::ESWatcher<HGCalModuleConfigurationRcd> configWatcher_;
4952
const edm::EDGetTokenT<hgcaldigi::HGCalDigiHost> digisToken_;
50-
device::ESGetToken<hgcalrechit::HGCalCalibParamDevice, HGCalModuleConfigurationRcd> calibToken_;
53+
edm::ESGetToken<hgcalrechit::HGCalCalibParamHost, HGCalModuleConfigurationRcd> calibToken_;
5154
device::ESGetToken<hgcalrechit::HGCalConfigParamDevice, HGCalModuleConfigurationRcd> configToken_;
5255
const device::EDPutToken<hgcalrechit::HGCalRecHitDevice> recHitsToken_;
5356
const HGCalRecHitCalibrationAlgorithms calibrator_;
54-
int n_hits_scale;
57+
const int n_hits_scale;
5558
};
5659

5760
HGCalRecHitsProducer::HGCalRecHitsProducer(const edm::ParameterSet& iConfig)
@@ -61,7 +64,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
6164
configToken_{esConsumes(iConfig.getParameter<edm::ESInputTag>("configSource"))},
6265
recHitsToken_{produces()},
6366
calibrator_{iConfig.getParameter<int>("n_blocks"), iConfig.getParameter<int>("n_threads")},
64-
n_hits_scale{iConfig.getParameter<int>("n_hits_scale")} {}
67+
n_hits_scale{iConfig.getParameter<int>("n_hits_scale")} {
68+
#ifndef HGCAL_PERF_TEST
69+
if (n_hits_scale > 1) {
70+
throw cms::Exception("RuntimeError") << "Build with `HGCAL_PERF_TEST` flag to activate `n_hits_scale`.";
71+
}
72+
#endif
73+
}
6574

6675
void HGCalRecHitsProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
6776
edm::ParameterSetDescription desc;
@@ -78,7 +87,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
7887
auto& queue = iEvent.queue();
7988

8089
// Read digis
81-
auto const& deviceCalibParamProvider = iSetup.getData(calibToken_);
90+
auto const& hostCalibParamProvider = iSetup.getData(calibToken_);
8291
auto const& deviceConfigParamProvider = iSetup.getData(configToken_);
8392
auto const& hostDigisIn = iEvent.get(digisToken_);
8493

@@ -87,28 +96,53 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
8796
if (calibWatcher_.check(iSetup)) {
8897
for (int i = 0; i < deviceConfigParamProvider.view().metadata().size(); i++) {
8998
log << "idx = " << i << ", "
90-
<< "gain = " << deviceConfigParamProvider.view()[i].gain() << ","
91-
<< "ADC_ped = " << deviceCalibParamProvider.view()[i].ADC_ped() << ", "
92-
<< "CM_slope = " << deviceCalibParamProvider.view()[i].CM_slope() << ", "
93-
<< "CM_ped = " << deviceCalibParamProvider.view()[i].CM_ped() << ", "
94-
<< "BXm1_slope = " << deviceCalibParamProvider.view()[i].BXm1_slope() << ", ";
99+
<< "gain = " << deviceConfigParamProvider.view()[i].gain() << ", ";
100+
}
101+
for (int i = 0; i < hostCalibParamProvider.view().metadata().size(); i++) {
102+
log << "idx = " << i << ", "
103+
<< "ADC_ped = " << hostCalibParamProvider.view()[i].ADC_ped() << ", "
104+
<< "CM_slope = " << hostCalibParamProvider.view()[i].CM_slope() << ", "
105+
<< "CM_ped = " << hostCalibParamProvider.view()[i].CM_ped() << ", "
106+
<< "BXm1_slope = " << hostCalibParamProvider.view()[i].BXm1_slope() << ", ";
95107
}
96108
}
97109
});
98110

99-
int oldSize = hostDigisIn.view().metadata().size();
100-
int newSize = oldSize * n_hits_scale;
111+
#ifdef HGCAL_PERF_TEST
112+
uint32_t oldSize = hostDigisIn.view().metadata().size();
113+
uint32_t newSize = oldSize * (n_hits_scale > 0 ? (unsigned)n_hits_scale : 1);
101114
auto hostDigis = HGCalDigiHost(newSize, queue);
115+
auto hostCalibParam = HGCalCalibParamHost(newSize, queue);
102116
// TODO: replace with memcp ?
103-
for (int i = 0; i < newSize; i++) {
117+
for (uint32_t i = 0; i < newSize; i++) {
104118
hostDigis.view()[i].tctp() = hostDigisIn.view()[i % oldSize].tctp();
105119
hostDigis.view()[i].adcm1() = hostDigisIn.view()[i % oldSize].adcm1();
106120
hostDigis.view()[i].adc() = hostDigisIn.view()[i % oldSize].adc();
107121
hostDigis.view()[i].tot() = hostDigisIn.view()[i % oldSize].tot();
108122
hostDigis.view()[i].toa() = hostDigisIn.view()[i % oldSize].toa();
109123
hostDigis.view()[i].cm() = hostDigisIn.view()[i % oldSize].cm();
110124
hostDigis.view()[i].flags() = hostDigisIn.view()[i % oldSize].flags();
125+
126+
hostCalibParam.view()[i].ADC_ped() = hostCalibParamProvider.view()[i % oldSize].ADC_ped();
127+
hostCalibParam.view()[i].Noise() = hostCalibParamProvider.view()[i % oldSize].Noise();
128+
hostCalibParam.view()[i].CM_slope() = hostCalibParamProvider.view()[i % oldSize].CM_slope();
129+
hostCalibParam.view()[i].CM_ped() = hostCalibParamProvider.view()[i % oldSize].CM_ped();
130+
hostCalibParam.view()[i].BXm1_slope() = hostCalibParamProvider.view()[i % oldSize].BXm1_slope();
131+
hostCalibParam.view()[i].TOTtoADC() = hostCalibParamProvider.view()[i % oldSize].TOTtoADC();
132+
hostCalibParam.view()[i].TOT_ped() = hostCalibParamProvider.view()[i % oldSize].TOT_ped();
133+
hostCalibParam.view()[i].TOT_lin() = hostCalibParamProvider.view()[i % oldSize].TOT_lin();
134+
hostCalibParam.view()[i].TOT_P0() = hostCalibParamProvider.view()[i % oldSize].TOT_P0();
135+
hostCalibParam.view()[i].TOT_P1() = hostCalibParamProvider.view()[i % oldSize].TOT_P1();
136+
hostCalibParam.view()[i].TOT_P2() = hostCalibParamProvider.view()[i % oldSize].TOT_P2();
137+
hostCalibParam.view()[i].TOAtops() = hostCalibParamProvider.view()[i % oldSize].TOAtops();
138+
hostCalibParam.view()[i].MIPS_scale() = hostCalibParamProvider.view()[i % oldSize].MIPS_scale();
139+
hostCalibParam.view()[i].valid() = hostCalibParamProvider.view()[i % oldSize].valid();
111140
}
141+
#else
142+
const auto& hostDigis = hostDigisIn;
143+
const auto& hostCalibParam = hostCalibParamProvider;
144+
#endif
145+
112146
LogDebug("HGCalRecHitsProducer") << "Loaded host digis: " << hostDigis.view().metadata().size(); //<< std::endl;
113147

114148
LogDebug("HGCalRecHitsProducer") << "\n\nINFO -- calling calibrate method"; //<< std::endl;
@@ -118,13 +152,24 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE {
118152
auto start = std::chrono::steady_clock::now();
119153
#endif
120154

121-
auto recHits = calibrator_.calibrate(queue, hostDigis, deviceCalibParamProvider, deviceConfigParamProvider);
155+
LogDebug("HGCalRecHitsProducer") << "\n\nINFO -- Copying the calib to the device\n\n" << std::endl;
156+
HGCalCalibParamDevice deviceCalibParam(hostCalibParam.view().metadata().size(), queue);
157+
alpaka::memcpy(queue, deviceCalibParam.buffer(), hostCalibParam.const_buffer());
158+
159+
#ifdef HGCAL_PERF_TEST
160+
auto tmpRecHits = calibrator_.calibrate(queue, hostDigis, deviceCalibParam, deviceConfigParamProvider);
161+
HGCalRecHitDevice recHits(oldSize, queue);
162+
alpaka::memcpy(queue, recHits.buffer(), tmpRecHits.const_buffer(), oldSize);
163+
#else
164+
auto recHits = calibrator_.calibrate(queue, hostDigis, deviceCalibParam, deviceConfigParamProvider);
165+
#endif
122166

123167
#ifdef EDM_ML_DEBUG
124168
alpaka::wait(queue);
125169
auto stop = std::chrono::steady_clock::now();
126170
std::chrono::duration<float> elapsed = stop - start;
127-
LogDebug("HGCalRecHitsProducer") << "Time spent calibrating: " << elapsed.count(); //<< std::endl;
171+
LogDebug("HGCalRecHitsProducer") << "Time spent calibrating " << hostDigis.view().metadata().size()
172+
<< " digis: " << elapsed.count(); //<< std::endl;
128173
#endif
129174

130175
LogDebug("HGCalRecHitsProducer") << "\n\nINFO -- storing rec hits in the event"; //<< std::endl;

0 commit comments

Comments
 (0)