Skip to content

Feature/cerati ng2caf #532

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions sbncode/CAFMaker/CAFMakerParams.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,24 @@ namespace caf
"" //Empty by default, configured in icaruscode cafmaker_defs
};

Atom<art::InputTag> NuGraphSliceHitLabel {
Name("NuGraphSliceHitLabel"),
Comment("Label of NuGraph slice hit map."),
"" //Empty by default, please set to e.g. art::InputTag("nuslhits")
};

Atom<art::InputTag> NuGraphFilterLabel {
Name("NuGraphFilterLabel"),
Comment("Label of NuGraph filter."),
"" //Empty by default, please set to e.g. art::InputTag("NuGraph","filter")
};

Atom<art::InputTag> NuGraphSemanticLabel {
Name("NuGraphSemanticLabel"),
Comment("Label of NuGraph semantic."),
"" //Empty by default, please set to e.g. art::InputTag("NuGraph","semantic")
};

Atom<string> OpFlashLabel {
Name("OpFlashLabel"),
Comment("Label of PMT flash."),
Expand Down
47 changes: 47 additions & 0 deletions sbncode/CAFMaker/CAFMaker_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
#include "lardataobj/RecoBase/Vertex.h"
#include "lardataobj/RecoBase/Shower.h"
#include "lardataobj/RecoBase/MCSFitResult.h"
#include "lardataobj/RecoBase/Cluster.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the inclusion of FillReco prevents this from being an actual problem, but I am trying to recall if our style guidelines suggest that

#include "lardataobj/AnalysisBase/MVAOutput.h"

should still be included here too since FeatureVector is used directly in CAFMaker_module?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm only leaving this as comments for now as I have more pieces to look at and so don't want to leave an actual "review" result yet, but I'll put a couple thoughts here.

I guess the inclusion of FillReco prevents this from being an actual problem, but I'm trying to recall if our style guidelines suggest that you should actually put

#include "lardataobj/AnalysisBase/MVAOutput.h"

here as an include too since FeatureVector is directly used in the CAFMaker module.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure, I'll add this include!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding!


#include "nusimdata/SimulationBase/MCFlux.h"
#include "nusimdata/SimulationBase/MCTruth.h"
Expand Down Expand Up @@ -1696,6 +1697,17 @@ void CAFMaker::produce(art::Event& evt) noexcept {
}
}

// nu graph
std::vector< art::Handle<std::vector<unsigned int>> > ng2_slice_hit_map_handle(pandora_tag_suffixes.size());
std::vector< art::Handle<std::vector<anab::FeatureVector<1>>> > ng2_filter_handle(pandora_tag_suffixes.size());
std::vector< art::Handle<std::vector<anab::FeatureVector<5>>> > ng2_semantic_handle(pandora_tag_suffixes.size());
for (unsigned i_tag = 0; i_tag < pandora_tag_suffixes.size(); i_tag++) {
const std::string &pandora_tag_suffix = pandora_tag_suffixes[i_tag];
GetByLabelIfExists(evt, fParams.NuGraphSliceHitLabel().encode() + pandora_tag_suffix, ng2_slice_hit_map_handle[i_tag]);
GetByLabelIfExists(evt, fParams.NuGraphFilterLabel().label() + pandora_tag_suffix + ":" + fParams.NuGraphFilterLabel().instance(), ng2_filter_handle[i_tag]);
GetByLabelIfExists(evt, fParams.NuGraphSemanticLabel().label() + pandora_tag_suffix + ":" + fParams.NuGraphSemanticLabel().instance(), ng2_semantic_handle[i_tag]);
}

// The Standard Record
// Branch entry definition -- contains list of slices, CRT information, and truth information
StandardRecord rec;
Expand Down Expand Up @@ -1752,6 +1764,18 @@ void CAFMaker::produce(art::Event& evt) noexcept {
}
}

std::vector<art::Ptr<anab::FeatureVector<1>>> ng2_filter_vec;
std::vector<art::Ptr<anab::FeatureVector<5>>> ng2_semantic_vec;
if (ng2_filter_handle[producer].isValid()) {
art::fill_ptr_vector(ng2_filter_vec,ng2_filter_handle[producer]);
}
if (ng2_semantic_handle[producer].isValid()) {
art::fill_ptr_vector(ng2_semantic_vec,ng2_semantic_handle[producer]);
}
if (ng2_slice_hit_map_handle[producer].isValid()) {
FillSliceNuGraph(slcHits,*ng2_slice_hit_map_handle[producer],ng2_filter_vec,ng2_semantic_vec,recslc);
}

art::FindManyP<sbn::OpT0Finder> fmOpT0 =
FindManyPStrict<sbn::OpT0Finder>(sliceList, evt, fParams.OpT0Label() + slice_tag_suff);
std::vector<art::Ptr<sbn::OpT0Finder>> slcOpT0;
Expand Down Expand Up @@ -1798,6 +1822,25 @@ void CAFMaker::produce(art::Event& evt) noexcept {
art::FindManyP<recob::PFParticle> fmSpacePointPFPs =
FindManyPStrict<recob::PFParticle>(slcSpacePoints, evt, fParams.PFParticleLabel() + slice_tag_suff);

art::FindManyP<recob::Cluster> fmPFPClusters =
FindManyPStrict<recob::Cluster>(fmPFPart, evt, fParams.PFParticleLabel() + slice_tag_suff);

std::vector<std::vector<art::Ptr<recob::Hit>>> fmPFPartHits;
// make Ptr's to clusters for cluster -> other object associations
if (fmPFPClusters.isValid()) {
for (size_t ipf=0; ipf<fmPFPart.size();++ipf) {
std::vector<art::Ptr<recob::Hit>> pfphits;
std::vector<art::Ptr<recob::Cluster>> pfclusters = fmPFPClusters.at(ipf);
art::FindManyP<recob::Hit> fmCluHits = FindManyPStrict<recob::Hit>(pfclusters, evt, fParams.PFParticleLabel() + slice_tag_suff);
for (size_t icl=0; icl<fmCluHits.size();icl++) {
for (auto hit : fmCluHits.at(icl)) {
pfphits.push_back(hit);
}
}
fmPFPartHits.push_back(pfphits);
}
}

Comment on lines +1861 to +1879
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we had a quick chat once about using index directly vs using the art Ptr key() and that both of us are more used to key, but you found that index was working okay (and seems to be used elsewhere here). Please remind me if I'm missing details? I wanted to ask just to double check that this is guaranteed to work as desired!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when looping plainly over a collection, the way one would use key is to first create an art::Ptr based on the handle and the index, which guarantees that the key is the same as the index

art::FindManyP<recob::Shower> fmShower =
FindManyPStrict<recob::Shower>(fmPFPart, evt, fParams.RecoShowerLabel() + slice_tag_suff);

Expand Down Expand Up @@ -2105,6 +2148,10 @@ void CAFMaker::produce(art::Event& evt) noexcept {
FillCNNScores(thisParticle, cnnScores, pfp);
}

if (ng2_slice_hit_map_handle[producer].isValid()) {
FillPPFNuGraph(*ng2_slice_hit_map_handle[producer], ng2_filter_vec, ng2_semantic_vec, fmPFPartHits.at(iPart), pfp);
}

if (!thisTrack.empty()) { // it has a track!
assert(thisTrack.size() == 1);

Expand Down
73 changes: 73 additions & 0 deletions sbncode/CAFMaker/FillReco.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,26 @@ namespace caf
}
}


void FillSliceNuGraph(const std::vector<art::Ptr<recob::Hit>> &inputHits,
const std::vector<unsigned int> &sliceHitsMap,
const std::vector<art::Ptr<anab::FeatureVector<1>>> &ngFilterResult,
const std::vector<art::Ptr<anab::FeatureVector<5>>> &ngSemanticResult,
caf::SRSlice &slice)
{

//need to double check that the slice processed by NuGraph is the same under consideration
//std::cout << "sizes=" << inputHits.size() << " " << sliceHitsMap.size() << " " << ngFilterResult.size() << " " << ngSemanticResult.size() << std::endl;
unsigned int nHits = inputHits.size();
if (nHits==0 || nHits!=sliceHitsMap.size() || inputHits[0].key()!=sliceHitsMap[0]) return;//not the same slice!
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be a silent error or would it be a sign of a bigger problem?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not an error. We just need to skip the slices that were not considered by NuGraph

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, since NuGraph only runs on one slice!


unsigned int npass = 0;
for ( unsigned int i = 0; i < nHits; i++ ) {
if (ngFilterResult.at(i)->at(0)>0.5) npass++;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hard-coded 0.5: can you document it and maybe turn its value into a C++ constant?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually the cut value is included:

Suggested change
if (ngFilterResult.at(i)->at(0)>0.5) npass++;
if (ngFilterResult.at(i)->at(0)>=0.5) npass++;

Of course practically it seldom matters, but it would be good to pick a convention and stick with it (below there is a < 0.5).

}
slice.ng_filt_pass_frac = float(npass)/nHits;
}

//......................................................................

void FillTrackCRTHit(const std::vector<art::Ptr<anab::T0>> &t0match,
Expand Down Expand Up @@ -950,6 +970,59 @@ namespace caf
srpfp.cnnscore.nclusters = cnnscore->nClusters;
}

void FillPPFNuGraph(const std::vector<unsigned int> &sliceHitsMap,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just noticed this is PPF not PFP

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice catch!

const std::vector<art::Ptr<anab::FeatureVector<1>>> &ngFilterResult,
const std::vector<art::Ptr<anab::FeatureVector<5>>> &ngSemanticResult,
const std::vector<art::Ptr<recob::Hit>> &pfpHits,
caf::SRPFP& srpfp,
bool allowEmpty)
{

auto arg_max = [](std::vector<float> const& vec) {
return std::distance(vec.begin(), std::max_element(vec.begin(), vec.end()));
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another question on the SBN coding conventions: are lambdas okay within other methods? (If @cerati himself isn't sure then maybe @PetrilloAtWork is the best person to answer this (especially as he is also tagged as the other reviewer))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is resolved by Giuseppe's recent commit, which just uses this one-liner in place of the 2 lines this function would have been used.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is nothing about this case in our guidelines.
Personally I do recommend the use of lambdas in methods where it makes sense (typically, to avoid the repetition of simple code). However, if the same lambda appears in different methods, then it should likely be a member function if possible.


// the nugraph elements are ordered the same as the sliceHitsMap
std::vector<size_t> mappedhits;
for (auto& hit : pfpHits) {
auto it = std::find(sliceHitsMap.begin(), sliceHitsMap.end(), hit.key());
if (it != sliceHitsMap.end()) {
size_t index = std::distance(sliceHitsMap.begin(), it);
mappedhits.push_back(index);
}
}
Comment on lines +1036 to +1043
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just double checking my understanding, I assume that the NuGraph producer is filling up the map that you grab above with the hit keys for the slice and so then this is why you grab things in this way from the map?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. See icaruscode/NuGraphIcarus/IcarusNuSliceHitsProducer_module.cc being added in the icaruscode PR. NuGraph runs over a hit collection. This hit collection is created based on the hits in the best neutrino slice. This means that NuGraph labels are applied to a hit collection which is a subset of the one used by Pandora. So in order to get the NuGraph label of hits in Pandora PFPs, a mapping is needed


if (mappedhits.size()>0) {
std::vector<float> ng2sempfpcounts(5,0);
size_t ng2bkgpfpcount = 0;
for (size_t pos : mappedhits) {
auto scores = ngSemanticResult.at(pos);
std::vector<float> ng2semscores;
for (size_t i=0;i<scores->size();i++) ng2semscores.push_back(scores->at(i));
size_t sem_label = arg_max(ng2semscores);
ng2sempfpcounts[sem_label]++;
auto bkgscore = ngFilterResult.at(pos);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Avoid unnecessary copies:

Suggested change
auto bkgscore = ngFilterResult.at(pos);
auto const& bkgscore = ngFilterResult.at(pos);

(this is probably still fine, since it's implemented as a static array).

if (bkgscore->at(0)<0.5) ng2bkgpfpcount++;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems fine, but I was thinking: is there an argument to be made that one should only fill the semantic label fractions for hits that are not tagged as background?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's a good point. Ideally this should be studied, to find what works best. In general, the semantic labels can be meaningful also when the filter does not pass (e.g. for cosmics). But since noise is a bigger issues than cosmics (unlike MicroBooNE), your suggestion makes sense and I will implement it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was thinking more about this. I think I agree with your point if the cosmic hits were considered background. Isn't the current training considering them signal though? Apologies if I'm misremembering?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, the current training is considering them signal but this is going to change soon!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah then I guess I'm okay with either way, though I suppose in this updated schema the background fraction would still be meaningful to distinguish a cosmic PFP from others? And the semantic label fractions would help distinguish if there is some overlapping particle what it was, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right. My point is that for ICARUS noise is likely more of an issue than cosmics, so the semantic labels for the background hits are more likely to be meaningless (as they are more likely to be noise and not real cosmic particles). So the new version makes more sense as we don't want to pollute the semantic counts with noise. But of course the background fraction is still useful to reject cosmics!

}
srpfp.ng_sem_cat = arg_max(ng2sempfpcounts);
srpfp.ng_mip_frac = float(ng2sempfpcounts[0])/pfpHits.size();
srpfp.ng_hip_frac = float(ng2sempfpcounts[1])/pfpHits.size();
srpfp.ng_shr_frac = float(ng2sempfpcounts[2])/pfpHits.size();
srpfp.ng_mhl_frac = float(ng2sempfpcounts[3])/pfpHits.size();
srpfp.ng_dif_frac = float(ng2sempfpcounts[4])/pfpHits.size();
srpfp.ng_bkg_frac = float(ng2bkgpfpcount)/pfpHits.size();
} else {
srpfp.ng_sem_cat = -1;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See the enum suggestion on the other PR.

srpfp.ng_mip_frac = -1.;
srpfp.ng_hip_frac = -1.;
srpfp.ng_shr_frac = -1.;
srpfp.ng_mhl_frac = -1.;
srpfp.ng_dif_frac = -1.;
srpfp.ng_bkg_frac = -1.;
}

}

void FillHitVars(const recob::Hit& hit,
unsigned producer,
const recob::SpacePoint& spacepoint,
Expand Down
14 changes: 14 additions & 0 deletions sbncode/CAFMaker/FillReco.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "lardataobj/RecoBase/OpFlash.h"
#include "lardataobj/RecoBase/OpHit.h"
#include "lardataobj/AnalysisBase/Calorimetry.h"
#include "lardataobj/AnalysisBase/MVAOutput.h"
#include "lardataobj/AnalysisBase/ParticleID.h"
#include "lardataobj/AnalysisBase/T0.h"
#include "lardataobj/RecoBase/PFParticleMetadata.h"
Expand Down Expand Up @@ -104,6 +105,12 @@ namespace caf
const std::vector<art::Ptr<recob::SpacePoint>> &inputPoints,
caf::SRSlice &slice);

void FillSliceNuGraph(const std::vector<art::Ptr<recob::Hit>> &inputHits,
const std::vector<unsigned int> &sliceHitsMap,
const std::vector<art::Ptr<anab::FeatureVector<1>>> &ngFilterResult,
const std::vector<art::Ptr<anab::FeatureVector<5>>> &ngSemanticResult,
caf::SRSlice &slice);

bool SelectSlice(const caf::SRSlice &slice, bool cut_clear_cosmic);

void FillTrackVars(const recob::Track& track,
Expand All @@ -130,6 +137,13 @@ namespace caf
caf::SRPFP& srpfp,
bool allowEmpty = false);

void FillPPFNuGraph(const std::vector<unsigned int> &sliceHitsMap,
const std::vector<art::Ptr<anab::FeatureVector<1>>> &ngFilterResult,
const std::vector<art::Ptr<anab::FeatureVector<5>>> &ngSemanticResult,
const std::vector<art::Ptr<recob::Hit>> &pfpHits,
caf::SRPFP& srpfp,
bool allowEmpty= false);

void FillTrackCRTHit(const std::vector<art::Ptr<anab::T0>> &t0match,
const std::vector<art::Ptr<sbn::crt::CRTHit>> &hitmatch,
bool use_ts0,
Expand Down