diff --git a/Makefile b/Makefile index 3b715fba..dd0b22bf 100644 --- a/Makefile +++ b/Makefile @@ -51,6 +51,14 @@ ifdef INCLUDE_CENTAURO FLAGS += -DINCLUDE_CENTAURO endif + +# ONNX plugin: ML model predictions +ifdef INCLUDE_ONNX + FLAGS += -DINCLUDE_ONNX + DEP_INCLUDES += -I${ONNX_HOME}/include + DEP_INCLUDES += -L${ONNX_HOME}/lib -lonnxruntime +endif + # MSTWPDF DEP_INCLUDES += -I${MSTWPDF_HOME} DEP_LIBRARIES += -L${MSTWPDF_HOME} -lmstwpdf diff --git a/deps/install_onnx.sh b/deps/install_onnx.sh new file mode 100755 index 00000000..96050eee --- /dev/null +++ b/deps/install_onnx.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# SPDX-License-Identifier: LGPL-3.0-or-later +# Copyright (C) 2023 Christopher Dilks + +# update or download and install delphes + +### download/update source +source environ.sh + +echo "[+] downloading onnx runtime source" +wget https://github.com/microsoft/onnxruntime/releases/download/v1.14.1/onnxruntime-linux-x64-1.14.1.tgz +mv onnxruntime-linux-x64-1.14.1.tgz deps +tar -zxvf deps/onnxruntime-linux-x64-1.14.1.tgz -C deps/ +rm deps/onnxruntime-linux-x64-1.14.1.tgz diff --git a/environ.sh b/environ.sh index ee09dc2d..deb1f31f 100644 --- a/environ.sh +++ b/environ.sh @@ -37,5 +37,9 @@ export ADAGE_HOME=$EPIC_ANALYSIS_HOME/deps/adage echo "ADAGE found at $ADAGE_HOME" export LD_LIBRARY_PATH=$ADAGE_HOME/lib:$LD_LIBRARY_PATH +### onnx +export ONNX_HOME=$EPIC_ANALYSIS_HOME/deps/onnxruntime-linux-x64-1.14.1 +export LD_LIBRARY_PATH=$ONNX_HOME/lib:$LD_LIBRARY_PATH + # prioritize EPIC_ANALYSIS libraries export LD_LIBRARY_PATH=$EPIC_ANALYSIS_HOME/lib:$LD_LIBRARY_PATH diff --git a/macro/analysis_testml.C b/macro/analysis_testml.C index 0d71cbce..378f9826 100644 --- a/macro/analysis_testml.C +++ b/macro/analysis_testml.C @@ -2,27 +2,25 @@ // Copyright (C) 2023 Connor Pecar R__LOAD_LIBRARY(EpicAnalysis) -#include -namespace py = pybind11; // using ML prediction for vecQ, and writing out tree of HFS four-vectors // requires pybind includes above void analysis_testml( TString configFile="datarec/in.config", /* delphes tree(s) */ - TString outfilePrefix="resolutions" /* output filename prefix*/ + TString outfilePrefix="resolutions", /* output filename prefix*/ + TString onnxFileName="pfn_epic22.11.2_d500_Q2_1_10_18x275_eleglobal_npartg3_bestValLoss.onnx" ) { - // object needed at start of script using pybind11 - py::scoped_interpreter guard{}; //outfilePrefix+="_DA"; // setup analysis ======================================== - AnalysisEcce *A = new AnalysisEcce( + AnalysisEpic *A = new AnalysisEpic( configFile, outfilePrefix ); - A->maxEvents = 10000; // use this to limit the number of events - A->writeSidisTree = true; // write SidisTree (for one bin) - A->writeHFSTree = true; // write HFSTree (for one bin) + //A->maxEvents = 1000; // use this to limit the number of events + //A->writeSimpleTree = true; // write SimpleTree (for one bin) + //A->writeHFSTree = true; // write HFSTree (for one bin) A->SetReconMethod("ML"); // set reconstruction method + A->SetModelName(onnxFileName); A->AddFinalState("pipTrack"); // pion final state //A->AddFinalState("KpTrack"); // kaon final state diff --git a/src/Analysis.cxx b/src/Analysis.cxx index c8637710..5c90fb3f 100644 --- a/src/Analysis.cxx +++ b/src/Analysis.cxx @@ -320,7 +320,7 @@ void Analysis::Prepare() { // instantiate shared objects kin = std::make_shared(eleBeamEn,ionBeamEn,crossingAngle); - kinTrue = std::make_shared(eleBeamEn, ionBeamEn, crossingAngle); + kinTrue = std::make_shared(eleBeamEn, ionBeamEn, crossingAngle); #ifndef EXCLUDE_DELPHES kinJet = std::make_shared(eleBeamEn, ionBeamEn, crossingAngle); kinJetTrue = std::make_shared(eleBeamEn, ionBeamEn, crossingAngle); @@ -329,6 +329,8 @@ void Analysis::Prepare() { HFST = std::make_unique("hfstree",kin,kinTrue); PT = std::make_unique("ptree"); + kin->modelname = nnFile; + // if including jets, define a `jet` final state #ifndef EXCLUDE_DELPHES if(includeOutputSet["jets"]) { diff --git a/src/Analysis.h b/src/Analysis.h index ebeaa248..342bbef6 100644 --- a/src/Analysis.h +++ b/src/Analysis.h @@ -70,7 +70,9 @@ class Analysis */ Bool_t useBreitJets; // if true, use Breit jets, if using finalState `jets` (requires centauro) // set kinematics reconstruction method; see constructor for available methods - void SetReconMethod(TString reconMethod_) { reconMethod=reconMethod_; }; + void SetReconMethod(TString reconMethod_) { reconMethod=reconMethod_; }; + // set ONNX file name + void SetModelName(TString onnxFile) { nnFile = onnxFile; }; // choose which output sets to include std::map includeOutputSet; // maximum number of errors to print @@ -153,7 +155,7 @@ class Analysis Double_t crossingAngle; // mrad Double_t totalCrossSection; TString reconMethod; - + TString nnFile; // event loop objects Long64_t ENT; Double_t eleP,maxEleP; diff --git a/src/AnalysisEpic.cxx b/src/AnalysisEpic.cxx index 00e25cbe..84b10564 100644 --- a/src/AnalysisEpic.cxx +++ b/src/AnalysisEpic.cxx @@ -13,7 +13,7 @@ void AnalysisEpic::Execute() { // setup Prepare(); - + // read EventEvaluator tree auto chain = std::make_unique("events"); for(Int_t idx=0; idxCalculateDIS(reconMethod))) continue; // reconstructed - if(!(kinTrue->CalculateDIS(reconMethod))) continue; // generated (truth) - + if( reconMethod == "ML"){ + if(!(kinTrue->CalculateDIS("ele"))) continue; // generated (truth) + } + else{ + if(!(kinTrue->CalculateDIS(reconMethod))) continue; + } // Get the weight for this event's Q2 auto Q2weightFactor = GetEventQ2Weight(kinTrue->Q2, inLookup[chain->GetTreeNumber()]); diff --git a/src/Kinematics.cxx b/src/Kinematics.cxx index dfb2bbae..80e0d2cd 100644 --- a/src/Kinematics.cxx +++ b/src/Kinematics.cxx @@ -15,12 +15,11 @@ Kinematics::Kinematics( ) { srand(time(NULL)); - // importing from local python script for ML predictions - // requires tensorflow, energyflow packages installed -#ifdef SIDIS_MLPRED - efnpackage = py::module_::import("testEFlowimport"); - pfnimport = efnpackage.attr("eflowPredict"); -#endif + + // dimension of tensors for ML predictions + dims = {1,35,6}; + dimsglobal = {1,12}; + // set ion mass IonMass = ProtonMass(); @@ -89,7 +88,6 @@ Kinematics::Kinematics( countPIDsmeared=countPIDtrue=0; }; - // --------------------------------- // calculates 4-momenta components of q and W (`vecQ` and `vecW`) as well as // derived invariants `W` and `nu` @@ -201,11 +199,31 @@ void Kinematics::GetQWNu_electronic(){ Nu = vecIonBeam.Dot(vecQ) / IonMass; }; + void Kinematics::GetQWNu_ML(){ - hfsinfo.clear(); + #ifdef INCLUDE_ONNX + + Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING,"test"); + Ort::SessionOptions session_options; + session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::AllocatorWithDefaultOptions allocator; + Ort::Session ORTsession(env, modelname, session_options); + + std::vector input_node_names, output_node_names; + std::vector> input_shape; + + input_node_names.push_back("input"); + input_node_names.push_back("num_global_features"); + + output_node_names.push_back("activation_7"); + + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); + std::vector ort_inputs; + + input_tensor_values_hfs.clear(); float pidadj = 0; + if(nHFS >= 2){ - std::vector partHold; for(int i = 0; i < nHFS; i++){ double pidsgn=(hfspid[i]/abs(hfspid[i])); if(abs(hfspid[i])==211) pidadj = 0.4*pidsgn; @@ -213,20 +231,34 @@ void Kinematics::GetQWNu_ML(){ if(abs(hfspid[i])==321) pidadj = 0.6*pidsgn; if(abs(hfspid[i])==2212) pidadj = 0.8*pidsgn; if(abs(hfspid[i])==11) pidadj = 1.0*pidsgn; - partHold.push_back(hfseta[i]); - partHold.push_back(hfsphi[i]); - partHold.push_back(hfspx[i]); - partHold.push_back(hfspy[i]); - partHold.push_back(hfspz[i]); - partHold.push_back(hfsE[i]); - partHold.push_back(pidadj); - hfsinfo.push_back(partHold); - partHold.clear(); + input_tensor_values_hfs.push_back(hfseta[i]); + input_tensor_values_hfs.push_back(hfsphi[i]); + input_tensor_values_hfs.push_back(hfspx[i]); + input_tensor_values_hfs.push_back(hfspy[i]); + input_tensor_values_hfs.push_back(hfspz[i]); + input_tensor_values_hfs.push_back(hfsE[i]); + //input_tensor_values_hfs.push_back(pidadj); + } + // zero padding (fixed expected shape of input) + for(int i = nHFS; i < nPad; i++){ + input_tensor_values_hfs.push_back(0); + input_tensor_values_hfs.push_back(0); + input_tensor_values_hfs.push_back(0); + input_tensor_values_hfs.push_back(0); + input_tensor_values_hfs.push_back(0); + input_tensor_values_hfs.push_back(0); } + ort_inputs.push_back(Ort::Value::CreateTensor(memoryInfo, + input_tensor_values_hfs.data(), + input_tensor_values_hfs.size(), + dims.data(), + dims.size() + )); + double Q2ele, Q2DA, Q2JB; double xele, xDA, xJB; TLorentzVector vecQEle; - globalinfo.clear(); + input_tensor_values_global.clear(); this->CalculateDISbyElectron(); vecQEle.SetPxPyPzE(vecQ.Px(), vecQ.Py(), vecQ.Pz(), vecQ.E()); Q2ele = Q2; @@ -234,54 +266,64 @@ void Kinematics::GetQWNu_ML(){ this->CalculateDISbyDA(); Q2DA = Q2; xDA = x; - this->CalculateDISbyJB(); + this->CalculateDISbySigma(); Q2JB = Q2; xJB = x; - if( Q2DA > 0 && Q2DA < 1e4){ - globalinfo.push_back(log10(Q2DA)); + if( Q2DA > 0 && Q2DA < 1e6){ + input_tensor_values_global.push_back(log10(Q2DA)); } else{ - globalinfo.push_back(log10((float) (rand()) / (float) (RAND_MAX/10000.0))); + input_tensor_values_global.push_back(log10((float) (rand()) / (float) (RAND_MAX/10000.0))); } - if( Q2ele > 0 && Q2ele < 1e4){ - globalinfo.push_back(log10(Q2ele)); + if( Q2ele > 0 && Q2ele < 1e6){ + input_tensor_values_global.push_back(log10(Q2ele)); } else{ - globalinfo.push_back(log10((float) (rand()) / (float) (RAND_MAX/10000.0))); + input_tensor_values_global.push_back(log10((float) (rand()) / (float) (RAND_MAX/10000.0))); } - if( Q2JB > 0 && Q2JB < 1e4){ - globalinfo.push_back(log10(Q2JB)); + if( Q2JB > 0 && Q2JB < 1e6){ + input_tensor_values_global.push_back(log10(Q2JB)); } else{ - globalinfo.push_back(log10((float) (rand()) / (float) (RAND_MAX/10000.0))); + input_tensor_values_global.push_back(log10((float) (rand()) / (float) (RAND_MAX/10000.0))); } if(xDA>0 && xDA < 1){ - globalinfo.push_back(-1*log10(xDA)); + input_tensor_values_global.push_back(-1*log10(xDA)); } else{ - globalinfo.push_back(-1*log10( (float) (rand()) / (float) (RAND_MAX/1.0) )); + input_tensor_values_global.push_back(-1*log10( (float) (rand()) / (float) (RAND_MAX/1.0) )); } if(xele>0 && xele < 1){ - globalinfo.push_back(-1*log10(xele)); + input_tensor_values_global.push_back(-1*log10(xele)); } else{ - globalinfo.push_back(-1*log10( (float) (rand()) / (float) (RAND_MAX/1.0) )); + input_tensor_values_global.push_back(-1*log10( (float) (rand()) / (float) (RAND_MAX/1.0) )); } if(xJB>0 && xJB < 1){ - globalinfo.push_back(-1*log10(xJB)); + input_tensor_values_global.push_back(-1*log10(xJB)); } else{ - globalinfo.push_back( -1*log10((float) (rand()) / (float) (RAND_MAX/1.0) ) ); - } - globalinfo.push_back(vecQEle.Px()); - globalinfo.push_back(vecQEle.Py()); - globalinfo.push_back(vecQEle.Pz()); - globalinfo.push_back(vecQEle.E()); -#ifdef SIDIS_MLPRED - py::object nnoutput = pfnimport(hfsinfo, globalinfo); - std::vector nnvecq = nnoutput.cast>(); - vecQ.SetPxPyPzE(nnvecq[0],nnvecq[1],nnvecq[2],nnvecq[3]); -#endif + input_tensor_values_global.push_back( -1*log10((float) (rand()) / (float) (RAND_MAX/1.0) ) ); + } + input_tensor_values_global.push_back(vecElectron.Eta()); + input_tensor_values_global.push_back(vecElectron.Phi()); + input_tensor_values_global.push_back(vecElectron.Px()); + input_tensor_values_global.push_back(vecElectron.Py()); + input_tensor_values_global.push_back(vecElectron.Pz()); + input_tensor_values_global.push_back(vecElectron.E()); + ort_inputs.push_back(Ort::Value::CreateTensor(memoryInfo, + input_tensor_values_global.data(), + input_tensor_values_global.size(), + dimsglobal.data(),dimsglobal.size() + )); + std::vector ort_outputs = ORTsession.Run(Ort::RunOptions{nullptr}, + input_node_names.data(), + ort_inputs.data(), + ort_inputs.size(), + output_node_names.data(), + 1); + float* nnvecq = ort_outputs[0].GetTensorMutableData(); + vecQ.SetPxPyPzE(nnvecq[0],nnvecq[1],nnvecq[2],nnvecq[3]); } else{ this->CalculateDISbyElectron(); @@ -289,7 +331,7 @@ void Kinematics::GetQWNu_ML(){ vecW = vecEleBeam + vecIonBeam - vecElectron; W = vecW.M(); Nu = vecIonBeam.Dot(vecQ) / IonMass; - + #endif } // ------------------------------------------------------ @@ -536,6 +578,8 @@ void Kinematics::AddToHFS(TLorentzVector p4_) { hfspy[nHFS] = p4.Py(); hfspz[nHFS] = p4.Pz(); hfsE[nHFS] = p4.E(); + hfseta[nHFS] = p4.Eta(); + hfsphi[nHFS] = p4.Phi(); new(ar[nHFS]) TLorentzVector(p4); nHFS++; diff --git a/src/Kinematics.h b/src/Kinematics.h index 4b0ce08e..727588f4 100644 --- a/src/Kinematics.h +++ b/src/Kinematics.h @@ -29,13 +29,10 @@ #ifndef EXCLUDE_DELPHES #include "classes/DelphesClasses.h" #endif -// pybind (for ML models using python packages) -#ifdef SIDIS_MLPRED -#include -#include -#include -#include -namespace py = pybind11; + +// onnx (for ML models prediction in c++) +#ifdef INCLUDE_ONNX +#include #endif using std::map; @@ -123,7 +120,7 @@ class Kinematics TLorentzVector vecElectron, vecW, vecQ; TLorentzVector vecHadron; - // HFS tree objects + // HFS tree objects Int_t nHFS; Int_t nPi; Double_t hfspx[100]; @@ -245,7 +242,9 @@ class Kinematics enum mainFrame_enum {fLab, fHeadOn}; Int_t qComponentsMethod; enum qComponentsMethod_enum {qQuadratic, qHadronic, qElectronic}; - + + // onnx model name + const char* modelname; protected: // reconstruction methods @@ -294,14 +293,17 @@ class Kinematics Double_t rotAboutX, rotAboutY; // other TLorentzVector vecSpin, IvecSpin; -#ifdef SIDIS_MLPRED - py::object keras, tensorflow; - py::object efnpackage; - py::function pfnimport; - py::object model; - py::object modelload; - std::string modelname = "pfn_testEpic_000-2_vecQele_nHFS2_500_bs10k_bestValLoss"; -#endif + // ONNX + int nPad = 35; + + std::vector> input_node_dims; + std::vector input_tensor_size; + std::vector dims; + std::vector dimsglobal; + std::vector input_tensor_values_hfs; + std::vector input_tensor_values_global; + + ClassDef(Kinematics,1); };