diff --git a/CombineHarvester/CombinePdfs/Rules.mk b/CombineHarvester/CombinePdfs/Rules.mk index 28d73f9c..32c970ea 100644 --- a/CombineHarvester/CombinePdfs/Rules.mk +++ b/CombineHarvester/CombinePdfs/Rules.mk @@ -1,4 +1,4 @@ SUBDIRS := LIB_DEPS := CombineTools -LIB_EXTRA := -lHiggsAnalysisCombinedLimit +LIB_EXTRA := -L$(CMSSW_BASE)/lib/$(SCRAM_ARCH) -lHiggsAnalysisCombinedLimit DICTIONARY := diff --git a/CombineHarvester/CombinePdfs/src/MorphFunctions.cc b/CombineHarvester/CombinePdfs/src/MorphFunctions.cc index d48fd83b..01ed28b5 100644 --- a/CombineHarvester/CombinePdfs/src/MorphFunctions.cc +++ b/CombineHarvester/CombinePdfs/src/MorphFunctions.cc @@ -38,7 +38,7 @@ void BuildRooMorphing(RooWorkspace& ws, CombineHarvester& cb, CombineHarvester cb_bp = std::move(cb.cp().bin({bin}).process({process})); - vector masses_all = Set2Vec(cb_bp.GenerateSetFromProcs( + vector masses_all = Set2Vec(cb_bp.SetFromProcs( std::mem_fn(&ch::Process::mass))); std::sort(masses_all.begin(), masses_all.end(), diff --git a/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp b/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp index aff1bb9e..636ba03d 100644 --- a/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp +++ b/CombineHarvester/CombinePdfs/test/ParametricMSSM.cpp @@ -107,8 +107,7 @@ int main() { std::cout << " done\n"; // cb.era({"8TeV"}).bin_id({8}); - set lm_bins = - cb.GenerateSetFromObs(mem_fn(&ch::Observation::bin)); + set lm_bins = cb.SetFromObs(mem_fn(&ch::Observation::bin)); if (create_asimov) { for (auto const& b : lm_bins) { @@ -140,11 +139,7 @@ int main() { in->set_bin(in->bin() + "_hm"); }); - set hm_bins = - cb_hm.GenerateSetFromObs(mem_fn(&ch::Observation::bin)); - - - + set hm_bins = cb_hm.SetFromObs(mem_fn(&ch::Observation::bin)); cb.cp().bin_id({8}).VariableRebin( {0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, diff --git a/CombineHarvester/CombineTools/Rules.mk b/CombineHarvester/CombineTools/Rules.mk index 0f4a6f0a..522cbbec 100644 --- a/CombineHarvester/CombineTools/Rules.mk +++ b/CombineHarvester/CombineTools/Rules.mk @@ -1,3 +1,14 @@ SUBDIRS := LIB_DEPS := LIB_EXTRA := -lboost_python -L$(shell scramv1 tool tag python LIBDIR) -l$(shell scramv1 tool tag python LIB) -lPyROOT + +$(d)/interface/GitVersion.h: $(TOP)/../.git/logs/HEAD + @echo -e "Updating $@" + @echo -e "namespace ch { inline std::string GitVersion() { return \""$(shell git describe --dirty)"\"; } }\n" > $@ + +clean_$(d)/interface/GitVersion.h : + rm -f $(subst clean_,,$@) + +clean_$(d) : clean_$(d)/interface/GitVersion.h + +dir_$(d) : | $(d)/interface/GitVersion.h \ No newline at end of file diff --git a/CombineHarvester/CombineTools/interface/.gitignore b/CombineHarvester/CombineTools/interface/.gitignore new file mode 100644 index 00000000..cf118a1b --- /dev/null +++ b/CombineHarvester/CombineTools/interface/.gitignore @@ -0,0 +1 @@ +GitVersion.h diff --git a/CombineHarvester/CombineTools/interface/CardWriter.h b/CombineHarvester/CombineTools/interface/CardWriter.h new file mode 100644 index 00000000..6c1220bb --- /dev/null +++ b/CombineHarvester/CombineTools/interface/CardWriter.h @@ -0,0 +1,80 @@ +#ifndef CombineTools_CardWriter_h +#define CombineTools_CardWriter_h +#include +#include +#include +#include +#include "CombineTools/interface/CombineHarvester.h" +#include "CombineTools/interface/Object.h" + +namespace ch { + +/** + * Automates the writing of datacards into directory structures + * + * A CardWriter is constructed with two strings which give patterns for where + * the datacard text and ROOT files should be located and how they should be + * named. These patterns can contain placeholders for the object metadata, + * e.g.: + * + * ch::CardWriter writer( + * "$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt", + * "$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root"); + * + * All of the standard metadata placeholders are supported. Then, assuming a + * CombineHarvester instance `cb` has already been configured, the cards are + * written with: + * + * writer.WriteCards("LIMITS/cmb", cb); + * + * The CardWriter will determine the sets of root files and datacards to + * create given the metadata of the objects in `cb`. The first argument of + * `WriteCards` is substituted into the `$TAG` placeholder. In this way + * multiple calls to \ref WriteCards can produce output in different folders. + * + * By default CardWriter will create any directories that are needed, but this + * behaviour can be switched off with the `CreateDirectories` method. Also + * note that any existing files with the same name will be over-written. + * + * \note The $MASS variable is treated differently than the others when + * constructing the list of output files. It is anticipated that a mass value + * of `*` should not give rise to actual files containing the `*` character, + * but rather implies an Object that is valid for any mass-point. It is + * commonly used to denote background processes that do not depend on the + * signal mass hypothesis. Therefore this class ignores objects with a mass + * value of `*` when determining which files to create, and when creating the + * files treats this object as matching any mass value. It is possible to + * alter or remove this behaviour by supplying a new list of wildcard values + * with the \ref SetWildcardMasses method. + */ +class CardWriter { + public: + /// Must be constructed with text and ROOT file patterns + CardWriter(std::string const& text_pattern, std::string const& root_pattern); + /// Write datacards according to patterns, substituting `$TAG` for `tag` + void WriteCards(std::string const& tag, ch::CombineHarvester& cmb) const; + /// Set >= 1 for verbose output, otherwise silent + CardWriter& SetVerbosity(unsigned v); + /// Control whether directories can be created if missing + CardWriter& CreateDirectories(bool flag); + /// Redefine the mass values that should be treated as wildcards + CardWriter& SetWildcardMasses(std::vector const& masses); + + private: + typedef std::map> PatternMap; + std::string text_pattern_; + std::string root_pattern_; + mutable std::string tag_; + std::vector wildcard_masses_; + unsigned v_; + bool create_dirs_; + + std::string Compile(std::string pattern, ch::Object const* obj, + bool skip_mass = false) const; + PatternMap BuildMap(std::string const& pattern, + ch::CombineHarvester& cmb) const; + void MakeDirs(PatternMap const& map) const; +}; +} + +#endif diff --git a/CombineHarvester/CombineTools/interface/CombineHarvester.h b/CombineHarvester/CombineTools/interface/CombineHarvester.h index c80e171b..e580f08f 100644 --- a/CombineHarvester/CombineTools/interface/CombineHarvester.h +++ b/CombineHarvester/CombineTools/interface/CombineHarvester.h @@ -164,18 +164,6 @@ class CombineHarvester { */ /**@{*/ // Set generation - template - std::set GenerateSetFromProcs( - std::function func); - - template - std::set GenerateSetFromObs( - std::function func); - - template - std::set GenerateSetFromSysts( - std::function func); - std::set bin_set(); std::set bin_id_set(); std::set process_set(); @@ -185,18 +173,55 @@ class CombineHarvester { std::set mass_set(); std::set syst_name_set(); std::set syst_type_set(); - /**@}*/ - // An alternative way to do the set generation - // template - // T unwind(T const& x) { return x; } + /** + * Fill an std::set with the return values from an arbitrary function + * + * This method will loop through all ch::Observation, ch::Process and + * ch::Systematic entries and call the user-supplied function `func`. The + * return value is then inserted into the set. + * + * @tparam T A function (or other callable) that must have a single + * `ch::Object const*` argument. + * @tparam R The return type of the function, which is deduced by using + * `std::result_of`, then `std::decay`. The latter is needed to handle + * functions that return by reference, i.e. turning a type R& into a type R. + */ + template ::type>::type> + std::set SetFromAll(T func); + + /** + * Fill an std::set using only the Observation entries + * + * \sa SetFromAll + */ + template ::type>::type> + std::set SetFromObs(T func); + + /** + * Fill an std::set using only the Process entries + * + * \sa SetFromAll + */ + template ::type>::type> + std::set SetFromProcs(T func); - // template - // auto SetFromProcs(T func) -> std::set { - // std::set ret; - // for (auto const& item : procs_) ret.insert(func(item.get())); - // return ret; - // }; + /** + * Fill an std::set using only the Systematic entries + * + * \sa SetFromAll + */ + template ::type>::type> + std::set SetFromSysts(T func); + /**@}*/ /** * \name Modification @@ -446,29 +471,35 @@ void FillHistMappings(std::vector & mappings); // --------------------------------------------------------------- // Template method implementation // --------------------------------------------------------------- -template -std::set CombineHarvester::GenerateSetFromProcs( - std::function func) { - std::set ret; +template +std::set CombineHarvester::SetFromAll(T func) { + std::set ret; + for (auto const& item : obs_) ret.insert(func(item.get())); for (auto const& item : procs_) ret.insert(func(item.get())); + for (auto const& item : systs_) ret.insert(func(item.get())); return ret; -} +}; -template -std::set CombineHarvester::GenerateSetFromObs( - std::function func) { - std::set ret; +template +std::set CombineHarvester::SetFromObs(T func) { + std::set ret; for (auto const& item : obs_) ret.insert(func(item.get())); return ret; -} +}; + +template +std::set CombineHarvester::SetFromProcs(T func) { + std::set ret; + for (auto const& item : procs_) ret.insert(func(item.get())); + return ret; +}; -template -std::set CombineHarvester::GenerateSetFromSysts( - std::function func) { - std::set ret; +template +std::set CombineHarvester::SetFromSysts(T func) { + std::set ret; for (auto const& item : systs_) ret.insert(func(item.get())); return ret; -} +}; template void CombineHarvester::ForEachObj(Function func) { diff --git a/CombineHarvester/CombineTools/interface/Logging.h b/CombineHarvester/CombineTools/interface/Logging.h index 6af679be..c3cb6aaa 100644 --- a/CombineHarvester/CombineTools/interface/Logging.h +++ b/CombineHarvester/CombineTools/interface/Logging.h @@ -2,6 +2,7 @@ #define CombineTools_Logging_h #include #include +#include namespace ch { @@ -9,6 +10,8 @@ namespace ch { #define LOGLINE(x, y) LogLine(x, __func__, y) +#define FNLOG(x) x << "[" << __func__ << "]" +#define FNLOGC(x, y) if (y) x << "[" << __func__ << "] " /** * \brief Writes a logging message to a given ostream * \details Should be used with the macro LOGLINE which will call this function @@ -33,7 +36,6 @@ void LogLine(std::ostream& stream, std::string const& func, */ std::string FnError(std::string const& message, std::string const& file, unsigned line, std::string const& fn); -} /** * \brief Extracts the fully-qualified function name from a complete function @@ -49,4 +51,60 @@ std::string FnError(std::string const& message, std::string const& file, */ std::string GetQualififedName(std::string const& str); +/** + * Conveniently initialise a ch::FnTimer instance + * + * This macro should be placed at the start of a function, e.g.: + * + * void MyFunction() { + * LAUNCH_FUNCTION_TIMER(__timer__, __token__) + * } + * + * The arguments are the names of two objects (a ch::FnTimer and a + * ch::FnTimer::Token) that will be created by this macro. Note that the + * ch::FnTimer will be assigned the current function name automatically. + */ +#define LAUNCH_FUNCTION_TIMER(x, y) \ + static FnTimer x(ch::GetQualififedName(__PRETTY_FUNCTION__)); \ + auto y = x.Inc(); + +/** + * Determine the total amount of time spent in a function + * + * An FnTimer instance should typically be declared as a static variable at + * the beginning of a function, follwed by a call to the Inc() method, which + * will increment the counter. The Inc() method also returns an FnTimer::Token + * object that records the time at which it is constructed and then destroyed, + * the latter occurring automatically at the end of the function. At the end + * of the program the FnTimer destructor will write a message to the screen + * summarising the number of calls and the time information. + * + * \note A simple way of using this class is via the LAUNCH_FUNCTION_TIMER(x,y) + * macro + */ +class FnTimer { + public: + class Token { + public: + explicit Token(FnTimer *src); + ~Token(); + private: + FnTimer *src_; + }; + + explicit FnTimer(std::string name); + ~FnTimer(); + Token Inc(); + void StartTimer(); + void StopTimer(); + + private: + std::string name_; + unsigned calls_; + std::chrono::time_point start_; + std::chrono::time_point end_; + double elapsed_; +}; +} + #endif diff --git a/CombineHarvester/CombineTools/interface/TFileIO.h b/CombineHarvester/CombineTools/interface/TFileIO.h index f1e4d2d3..e6633e91 100644 --- a/CombineHarvester/CombineTools/interface/TFileIO.h +++ b/CombineHarvester/CombineTools/interface/TFileIO.h @@ -29,6 +29,9 @@ T OpenFromTFile(TFile* file, std::string const& path); // ------------------------------------------------------------------ template void ch::WriteToTFile(T const* ptr, TFile* file, std::string const& path) { + #ifdef TIME_FUNCTIONS + LAUNCH_FUNCTION_TIMER(__timer__, __token__) + #endif file->cd(); std::vector as_vec; boost::split(as_vec, path, boost::is_any_of("/")); diff --git a/CombineHarvester/CombineTools/interface/Utilities.h b/CombineHarvester/CombineTools/interface/Utilities.h index d538b54c..d5a6ebb0 100644 --- a/CombineHarvester/CombineTools/interface/Utilities.h +++ b/CombineHarvester/CombineTools/interface/Utilities.h @@ -29,17 +29,11 @@ std::vector ExtractSampledFitParameters(RooFitResult const& res); // --------------------------------------------------------------------------- // Property matching & editing // --------------------------------------------------------------------------- -void SetStandardBinNames(CombineHarvester & cb); +void SetStandardBinNames( + CombineHarvester& cb, + std::string const& pattern = "$ANALYSIS_$CHANNEL_$BINID_$ERA"); -template -void SetStandardBinName(T *input) { - std::string newname; - newname = input->analysis() + "_" - + input->channel() + "_" - + boost::lexical_cast(input->bin_id()) + "_" - + input->era(); - input->set_bin(newname); -} +void SetStandardBinName(ch::Object* obj, std::string pattern); template bool MatchingProcess(T const& first, U const& second) { @@ -69,28 +63,7 @@ void SetProperties(T * first, U const* second) { first->set_mass(second->mass()); } - -template -void SetFromBinName(T *input, std::string parse_rules) { - boost::replace_all(parse_rules, "$ANALYSIS", "(?\\w+)"); - boost::replace_all(parse_rules, "$ERA", "(?\\w+)"); - boost::replace_all(parse_rules, "$CHANNEL", "(?\\w+)"); - boost::replace_all(parse_rules, "$BINID", "(?\\w+)"); - boost::replace_all(parse_rules, "$MASS", "(?\\w+)"); - boost::regex rgx(parse_rules); - boost::smatch matches; - boost::regex_search(input->bin(), matches, rgx); - if (matches.str("ANALYSIS").length()) - input->set_analysis(matches.str("ANALYSIS")); - if (matches.str("ERA").length()) - input->set_era(matches.str("ERA")); - if (matches.str("CHANNEL").length()) - input->set_channel(matches.str("CHANNEL")); - if (matches.str("BINID").length()) - input->set_bin_id(boost::lexical_cast(matches.str("BINID"))); - if (matches.str("MASS").length()) - input->set_mass(matches.str("MASS")); -} +void SetFromBinName(ch::Object *input, std::string parse_rules); // --------------------------------------------------------------------------- // Rate scaling @@ -143,9 +116,52 @@ std::vector> GenerateCombinations( std::vector ParseFileLines(std::string const& file_name); +/** + * Generate a vector of mass values using ranges and intervals specified in a + * string + * + * The input string should be of the format: + * + * m1-m2:r1,m3,m4,m5-m6:r2,... + * + * where mi and ri must both be positive. A term like mi-mj:r (where mi must + * be < mj) genrates masses starting at mi and increasing by an interval r up + * to mj. + * + * This function returns a vector of ordered mass values converted to strings. + * + * \note Use the function ch::ValsFromRange if you need to include negative + * numbers - this uses a different syntax for the ranges so doesn't suffer + * from the amiguity of the `-` sign + * + * @param input The input string to decode + * @param fmt The format specifier for converting floating-point mass values + * to strings + */ std::vector MassesFromRange(std::string const& input, std::string const& fmt = "%.0f"); +/** + * Generate a vector of values using ranges and intervals specified in a + * string + * + * The input string should be of the format: + * + * m1:m2|r1,m3,m4,m5:m6|r2,... + * + * where mi and ri can be positive or negative. A term like mi-mj:r (where mi + * must be < mj) genrates values starting at mi and increasing by an interval + * r up to mj. + * + * This function returns a vector of ordered values converted to strings. + * + * @param input The input string to decode + * @param fmt The format specifier for converting floating-point values to + * strings + */ +std::vector ValsFromRange(std::string const& input, + std::string const& fmt = "%.0f"); + bool HasNegativeBins(TH1 const* h); void ZeroNegativeBins(TH1 *h); diff --git a/CombineHarvester/CombineTools/lib/combineharvester.py b/CombineHarvester/CombineTools/lib/combineharvester.py index a49e6e08..fc097792 100644 --- a/CombineHarvester/CombineTools/lib/combineharvester.py +++ b/CombineHarvester/CombineTools/lib/combineharvester.py @@ -11,7 +11,31 @@ def AddObservations(self, mass=[''], analysis=[''], era=[''], channel=[''], bin= def AddProcesses(self, mass=[''], analysis=[''], era=[''], channel=[''], procs=[], bin=[(0, '')], signal=False): return self.__AddProcesses__(mass, analysis, era, channel, procs, bin, signal) +def SetFromAll(self, func): + res = set() + self.ForEachObj(lambda x : res.add(func(x))) + return res + +def SetFromObs(self, func): + res = set() + self.ForEachObs(lambda x : res.add(func(x))) + return res + +def SetFromProcs(self, func): + res = set() + self.ForEachProc(lambda x : res.add(func(x))) + return res + +def SetFromSysts(self, func): + res = set() + self.ForEachSyst(lambda x : res.add(func(x))) + return res + # now we turn it into a member function CombineHarvester.ParseDatacard = ParseDatacard CombineHarvester.AddObservations = AddObservations CombineHarvester.AddProcesses = AddProcesses +CombineHarvester.SetFromAll = SetFromAll +CombineHarvester.SetFromObs = SetFromObs +CombineHarvester.SetFromProcs = SetFromProcs +CombineHarvester.SetFromSysts = SetFromSysts diff --git a/CombineHarvester/CombineTools/src/CardWriter.cc b/CombineHarvester/CombineTools/src/CardWriter.cc new file mode 100644 index 00000000..76314707 --- /dev/null +++ b/CombineHarvester/CombineTools/src/CardWriter.cc @@ -0,0 +1,157 @@ +#include "CombineTools/interface/CardWriter.h" +#include +#include +#include +#include +#include "boost/format.hpp" +#include "CombineTools/interface/Logging.h" +#include "CombineTools/interface/Algorithm.h" + +namespace ch { + +CardWriter::CardWriter(std::string const& text_pattern, + std::string const& root_pattern) + : text_pattern_(text_pattern), + root_pattern_(root_pattern), + wildcard_masses_({"*"}), + v_(0), + create_dirs_(true) {} + +CardWriter& CardWriter::SetVerbosity(unsigned v) { + v_ = v; + return *this; +} + +CardWriter& CardWriter::CreateDirectories(bool flag) { + create_dirs_ = flag; + return *this; +} + + +CardWriter& CardWriter::SetWildcardMasses( + std::vector const& masses) { + wildcard_masses_ = masses; + return *this; +} + +auto CardWriter::BuildMap(std::string const& pattern, + ch::CombineHarvester& cmb) const -> PatternMap { + PatternMap f_map; + // We first filter Objects having a mass value in the wildcard list + cmb.cp().mass(wildcard_masses_, false) + .ForEachObj([&](ch::Object const* obj) { + // Build the fully-compiled key + std::string key = Compile(pattern, obj); + if (f_map.count(key)) return; + std::set mappings; + // The fully-compiled pattern always goes in + mappings.insert(key); + // Compile again, but this time skipping the $MASS substitution + std::string maps_pattern = Compile(pattern, obj, true); + // Create a set of patterns by substituting each mass wildcard + for (auto m : wildcard_masses_) { + std::string tmp = maps_pattern; + boost::replace_all(tmp, "$MASS", m); + mappings.insert(tmp); + } + f_map[key] = mappings; + }); + return f_map; +} + +void CardWriter::MakeDirs(PatternMap const& map) const { + std::set mk_dirs; + for (auto const& f : map) { + mk_dirs.insert(boost::filesystem::path(f.first).parent_path()); + } + for (auto & dir : mk_dirs) { + FNLOGC(std::cout, v_ > 0) << "Creating dir " << dir.string() << "\n"; + + boost::filesystem::create_directories(dir); + } +} + +void CardWriter::WriteCards(std::string const& tag, + ch::CombineHarvester& cmb) const { + #ifdef TIME_FUNCTIONS + LAUNCH_FUNCTION_TIMER(__timer__, __token__) + #endif + + // Update the current "$TAG" value + tag_ = tag; + + // Build the set of files we need to create (the map keys), and associate a + // set of compiled Object patterns that are associated to each file (the map + // values). One entry will typically look like: + // + // ["htt_mt_125.root"] = {"htt_mt_125.root", "htt_mt_*.root"} + // + // i.e. in addition to getting a pattern identical to the filename, we also + // get a pattern where each $MASS placeholder has been replaced by a wildcard + // expression. + auto f_map = BuildMap(root_pattern_, cmb); + + if (create_dirs_) MakeDirs(f_map); + + // It turns out that the Compile method takes a small but appreciable amount + // of CPU time. To evaluate it every time it's needed could be O(10^7) calls, + // equivalent to tens of seconds for a complex model. To avoid this we just + // calculate once for each ch::Object and store the result in a map, which we + // use as a look-up later. + std::map root_map; + cmb.ForEachObj([&](ch::Object const* obj) { + root_map[obj] = Compile(root_pattern_, obj); + }); + std::map text_map; + cmb.ForEachObj([&](ch::Object const* obj) { + text_map[obj] = Compile(text_pattern_, obj); + }); + + for (auto const& f : f_map) { + // Create each ROOT file (overwrite pre-existing) + FNLOGC(std::cout, v_ > 0) << "Creating file " << f.first << "\n"; + TFile file(f.first.c_str(), "RECREATE"); + + // Filter CH instance to leave only the objects that will be written into + // this file + CombineHarvester f_cmb = cmb.cp().FilterAll([&](ch::Object const* obj) { + return !ch::contains(f.second, root_map.at(obj)); + }); + + // Call BuildMap again - this time to figure out which text datacards to + // create + auto d_map = BuildMap(text_pattern_, f_cmb); + + // Create dirs if we're allowed to + if (create_dirs_) MakeDirs(d_map); + + // Loop through each datacard + for (auto const& d : d_map) { + // Filter CH instance to leave only the objects that will be written into + // this text datacard + CombineHarvester d_cmb = f_cmb.cp().FilterAll([&](ch::Object const* obj) { + return !ch::contains(d.second, text_map.at(obj)); + }); + FNLOGC(std::cout, v_ > 0) << "Creating datacard " << d.first << "\n"; + d_cmb.WriteDatacard(d.first, file); + } + }; +} + +std::string CardWriter::Compile(std::string pattern, ch::Object const* obj, + bool skip_mass) const { + #ifdef TIME_FUNCTIONS + LAUNCH_FUNCTION_TIMER(__timer__, __token__) + #endif + boost::replace_all(pattern, "$TAG", tag_); + boost::replace_all(pattern, "$BINID", + boost::lexical_cast(obj->bin_id())); + boost::replace_all(pattern, "$BIN", obj->bin()); + boost::replace_all(pattern, "$PROCESS", obj->process()); + if (!skip_mass) boost::replace_all(pattern, "$MASS", obj->mass()); + boost::replace_all(pattern, "$ERA", obj->era()); + boost::replace_all(pattern, "$CHANNEL", obj->channel()); + boost::replace_all(pattern, "$ANALYSIS", obj->analysis()); + return pattern; +} +} diff --git a/CombineHarvester/CombineTools/src/CombineHarvester_Datacards.cc b/CombineHarvester/CombineTools/src/CombineHarvester_Datacards.cc index 03c3de8e..faf0b397 100644 --- a/CombineHarvester/CombineTools/src/CombineHarvester_Datacards.cc +++ b/CombineHarvester/CombineTools/src/CombineHarvester_Datacards.cc @@ -1,4 +1,5 @@ #include "CombineTools/interface/CombineHarvester.h" +#include #include #include #include @@ -8,8 +9,6 @@ #include #include "boost/lexical_cast.hpp" #include "boost/algorithm/string.hpp" -#include "boost/range/algorithm_ext/erase.hpp" -#include "boost/range/algorithm/find.hpp" #include "boost/format.hpp" #include "boost/regex.hpp" #include "boost/filesystem.hpp" @@ -24,6 +23,7 @@ #include "CombineTools/interface/Utilities.h" #include "CombineTools/interface/TFileIO.h" #include "CombineTools/interface/Algorithm.h" +#include "CombineTools/interface/GitVersion.h" namespace ch { @@ -347,8 +347,8 @@ void CombineHarvester::FillHistMappings(std::vector & mappings) { CombineHarvester ch_signals = std::move(this->cp().bin({bin}).signals().histograms()); - auto sig_proc_set = ch_signals.GenerateSetFromProcs( - std::mem_fn(&ch::Process::process)); + auto sig_proc_set = + ch_signals.SetFromProcs(std::mem_fn(&ch::Process::process)); for (auto sig_proc : sig_proc_set) { mappings.push_back({sig_proc, bin, nullptr, bin + "/" + sig_proc + "$MASS", @@ -438,17 +438,24 @@ void CombineHarvester::FillHistMappings(std::vector & mappings) { void CombineHarvester::WriteDatacard(std::string const& name, TFile& root_file) { + if (!root_file.IsOpen()) { + throw std::runtime_error(FNERROR( + std::string("Output ROOT file is not open: ") + root_file.GetName())); + } std::ofstream txt_file; txt_file.open(name); + if (!txt_file.is_open()) { + throw std::runtime_error(FNERROR("Unable to create file: " + name)); + } + + txt_file << "# Datacard produced by CombineHarvester with git status: " + << ch::GitVersion() << "\n"; std::string dashes(80, '-'); - auto bin_set = - this->GenerateSetFromObs(std::mem_fn(&ch::Observation::bin)); - auto proc_set = - this->GenerateSetFromProcs(std::mem_fn(&ch::Process::process)); - auto sys_set = - this->GenerateSetFromSysts(std::mem_fn(&ch::Systematic::name)); + auto bin_set = this->SetFromObs(std::mem_fn(&ch::Observation::bin)); + auto proc_set = this->SetFromProcs(std::mem_fn(&ch::Process::process)); + auto sys_set = this->SetFromSysts(std::mem_fn(&ch::Systematic::name)); txt_file << "imax " << bin_set.size() << " number of bins\n"; txt_file << "jmax " << proc_set.size() - 1 @@ -460,8 +467,7 @@ void CombineHarvester::WriteDatacard(std::string const& name, std::vector mappings; FillHistMappings(mappings); - auto bins = - this->GenerateSetFromObs(std::mem_fn(&ch::Observation::bin)); + auto bins = this->SetFromObs(std::mem_fn(&ch::Observation::bin)); auto proc_sys_map = this->GenerateProcSystMap(); @@ -539,8 +545,17 @@ void CombineHarvester::WriteDatacard(std::string const& name, } txt_file << "\n"; txt_file << "observation "; + // On the precision of the observation yields: .1f is not sufficient for + // combine to be happy if we have some asimov dataset with non-integer values. + // We could just always give .4f but this doesn't look nice for the majority + // of cards that have real data. Instead we'll check... + std::string obs_fmt_int = "%-15.1f "; + std::string obs_fmt_flt = "%-15.4f "; for (auto const& obs : obs_) { - txt_file << boost::format("%-15.1f ") % obs->rate(); + bool is_float = + std::fabs(obs->rate() - std::round(obs->rate())) > 1E-4; + txt_file << boost::format(is_float ? obs_fmt_flt : obs_fmt_int) + % obs->rate(); } txt_file << "\n"; txt_file << dashes << "\n"; diff --git a/CombineHarvester/CombineTools/src/CombineHarvester_Filters.cc b/CombineHarvester/CombineTools/src/CombineHarvester_Filters.cc index c4bf547c..1739d1cb 100644 --- a/CombineHarvester/CombineTools/src/CombineHarvester_Filters.cc +++ b/CombineHarvester/CombineTools/src/CombineHarvester_Filters.cc @@ -130,11 +130,11 @@ CombineHarvester & CombineHarvester::data() { std::set CombineHarvester::bin_set() { std::set result = - this->GenerateSetFromObs(std::mem_fn(&ch::Observation::bin)); + this->SetFromObs(std::mem_fn(&ch::Observation::bin)); std::set result2 = - this->GenerateSetFromProcs(std::mem_fn(&ch::Process::bin)); + this->SetFromProcs(std::mem_fn(&ch::Process::bin)); std::set result3 = - this->GenerateSetFromSysts(std::mem_fn(&ch::Systematic::bin)); + this->SetFromSysts(std::mem_fn(&ch::Systematic::bin)); result.insert(result2.begin(), result2.end()); result.insert(result3.begin(), result3.end()); return result; @@ -142,31 +142,31 @@ std::set CombineHarvester::bin_set() { std::set CombineHarvester::bin_id_set() { std::set result = - this->GenerateSetFromObs(std::mem_fn(&ch::Observation::bin_id)); + this->SetFromObs(std::mem_fn(&ch::Observation::bin_id)); std::set result2 = - this->GenerateSetFromProcs(std::mem_fn(&ch::Process::bin_id)); + this->SetFromProcs(std::mem_fn(&ch::Process::bin_id)); std::set result3 = - this->GenerateSetFromSysts(std::mem_fn(&ch::Systematic::bin_id)); + this->SetFromSysts(std::mem_fn(&ch::Systematic::bin_id)); result.insert(result2.begin(), result2.end()); result.insert(result3.begin(), result3.end()); return result; } std::set CombineHarvester::process_set() { - std::set result = this->GenerateSetFromProcs( + std::set result = this->SetFromProcs( std::mem_fn(&ch::Process::process)); - std::set result2 = this->GenerateSetFromSysts( + std::set result2 = this->SetFromSysts( std::mem_fn(&ch::Systematic::process)); result.insert(result2.begin(), result2.end()); return result; } std::set CombineHarvester::analysis_set() { - std::set result = this->GenerateSetFromObs( + std::set result = this->SetFromObs( std::mem_fn(&ch::Observation::analysis)); - std::set result2 = this->GenerateSetFromProcs( + std::set result2 = this->SetFromProcs( std::mem_fn(&ch::Process::analysis)); - std::set result3 = this->GenerateSetFromSysts( + std::set result3 = this->SetFromSysts( std::mem_fn(&ch::Systematic::analysis)); result.insert(result2.begin(), result2.end()); result.insert(result3.begin(), result3.end()); @@ -175,22 +175,22 @@ std::set CombineHarvester::analysis_set() { std::set CombineHarvester::era_set() { std::set result = - this->GenerateSetFromObs(std::mem_fn(&ch::Observation::era)); + this->SetFromObs(std::mem_fn(&ch::Observation::era)); std::set result2 = - this->GenerateSetFromProcs(std::mem_fn(&ch::Process::era)); + this->SetFromProcs(std::mem_fn(&ch::Process::era)); std::set result3 = - this->GenerateSetFromSysts(std::mem_fn(&ch::Systematic::era)); + this->SetFromSysts(std::mem_fn(&ch::Systematic::era)); result.insert(result2.begin(), result2.end()); result.insert(result3.begin(), result3.end()); return result; } std::set CombineHarvester::channel_set() { - std::set result = this->GenerateSetFromObs( + std::set result = this->SetFromObs( std::mem_fn(&ch::Observation::channel)); - std::set result2 = this->GenerateSetFromProcs( + std::set result2 = this->SetFromProcs( std::mem_fn(&ch::Process::channel)); - std::set result3 = this->GenerateSetFromSysts( + std::set result3 = this->SetFromSysts( std::mem_fn(&ch::Systematic::channel)); result.insert(result2.begin(), result2.end()); result.insert(result3.begin(), result3.end()); @@ -198,11 +198,11 @@ std::set CombineHarvester::channel_set() { } std::set CombineHarvester::mass_set() { - std::set result = this->GenerateSetFromObs( + std::set result = this->SetFromObs( std::mem_fn(&ch::Observation::mass)); - std::set result2 = this->GenerateSetFromProcs( + std::set result2 = this->SetFromProcs( std::mem_fn(&ch::Process::mass)); - std::set result3 = this->GenerateSetFromSysts( + std::set result3 = this->SetFromSysts( std::mem_fn(&ch::Systematic::mass)); result.insert(result2.begin(), result2.end()); result.insert(result3.begin(), result3.end()); @@ -210,13 +210,13 @@ std::set CombineHarvester::mass_set() { } std::set CombineHarvester::syst_name_set() { - std::set result = this->GenerateSetFromSysts( + std::set result = this->SetFromSysts( std::mem_fn(&ch::Systematic::name)); return result; } std::set CombineHarvester::syst_type_set() { - std::set result = this->GenerateSetFromSysts( + std::set result = this->SetFromSysts( std::mem_fn(&ch::Systematic::type)); return result; } diff --git a/CombineHarvester/CombineTools/src/CombineHarvester_Python.cc b/CombineHarvester/CombineTools/src/CombineHarvester_Python.cc index 9a6af35e..967a6d37 100644 --- a/CombineHarvester/CombineTools/src/CombineHarvester_Python.cc +++ b/CombineHarvester/CombineTools/src/CombineHarvester_Python.cc @@ -1,6 +1,7 @@ #include "CombineTools/interface/CombineHarvester_Python.h" #include "CombineTools/interface/CombineHarvester.h" #include "CombineTools/interface/Observation.h" +#include "CombineTools/interface/CardWriter.h" #include "boost/python.hpp" #include "TFile.h" #include "TH1F.h" @@ -11,6 +12,7 @@ using ch::Object; using ch::Observation; using ch::Process; using ch::Systematic; +using ch::CardWriter; void FilterAllPy(ch::CombineHarvester & cb, boost::python::object func) { auto lambda = [func](ch::Object *obj) -> bool { @@ -282,4 +284,13 @@ BOOST_PYTHON_MODULE(libCHCombineTools) .def("asymm", &Systematic::asymm) ; + py::class_("CardWriter", py::init()) + .def("WriteCards", &CardWriter::WriteCards) + .def("SetVerbosity", &CardWriter::SetVerbosity, + py::return_internal_reference<>()) + .def("CreateDirectories", &CardWriter::CreateDirectories, + py::return_internal_reference<>()) + .def("SetWildcardMasses", &CardWriter::SetWildcardMasses, + py::return_internal_reference<>()) + ; } \ No newline at end of file diff --git a/CombineHarvester/CombineTools/src/HttSystematics_MSSMLegacy.cc b/CombineHarvester/CombineTools/src/HttSystematics_MSSMLegacy.cc index c3c28a68..0e5d9525 100644 --- a/CombineHarvester/CombineTools/src/HttSystematics_MSSMLegacy.cc +++ b/CombineHarvester/CombineTools/src/HttSystematics_MSSMLegacy.cc @@ -18,7 +18,7 @@ using ch::JoinStr; void AddMSSMSystematics(CombineHarvester & cb) { CombineHarvester src = cb.cp(); - auto signal = Set2Vec(src.cp().signals().GenerateSetFromProcs( + auto signal = Set2Vec(src.cp().signals().SetFromProcs( std::mem_fn(&Process::process))); src.cp().signals() diff --git a/CombineHarvester/CombineTools/src/Logging.cc b/CombineHarvester/CombineTools/src/Logging.cc index df445019..3860d873 100644 --- a/CombineHarvester/CombineTools/src/Logging.cc +++ b/CombineHarvester/CombineTools/src/Logging.cc @@ -44,4 +44,23 @@ std::string FnError(std::string const& message, std::string const& file, res += "\n" + banner; return res; } + +// Implementation of FnTimer ("Function Timer") class +FnTimer::FnTimer(std::string name) : name_(name), calls_(0), elapsed_(0.) {} +FnTimer::~FnTimer() { + printf( + "[Timer] %-40s Calls: %-20u Total [s]: %-20.5g Per-call [s]: %-20.5g\n", + name_.c_str(), calls_, elapsed_, elapsed_ / double(calls_)); +} +FnTimer::Token FnTimer::Inc() { + ++calls_; + return Token(this); +} +void FnTimer::StartTimer() { start_ = std::chrono::system_clock::now(); } +void FnTimer::StopTimer() { + end_ = std::chrono::system_clock::now(); + elapsed_ += std::chrono::duration(end_ - start_).count(); +} +FnTimer::Token::Token(FnTimer* src) : src_(src) { src_->StartTimer(); } +FnTimer::Token::~Token() { src_->StopTimer(); } } diff --git a/CombineHarvester/CombineTools/src/Utilities.cc b/CombineHarvester/CombineTools/src/Utilities.cc index 1b90ccd4..e3cd1c8a 100644 --- a/CombineHarvester/CombineTools/src/Utilities.cc +++ b/CombineHarvester/CombineTools/src/Utilities.cc @@ -47,12 +47,46 @@ std::vector ExtractSampledFitParameters( // --------------------------------------------------------------------------- // Property matching & editing // --------------------------------------------------------------------------- -void SetStandardBinNames(CombineHarvester & cb) { - cb.ForEachObs(ch::SetStandardBinName); - cb.ForEachProc(ch::SetStandardBinName); - cb.ForEachSyst(ch::SetStandardBinName); +void SetStandardBinNames(CombineHarvester& cb, std::string const& pattern) { + cb.ForEachObj([&](ch::Object* obj) { + ch::SetStandardBinName(obj, pattern); + }); } +void SetStandardBinName(ch::Object* obj, std::string pattern) { + boost::replace_all(pattern, "$BINID", + boost::lexical_cast(obj->bin_id())); + boost::replace_all(pattern, "$BIN", obj->bin()); + boost::replace_all(pattern, "$PROCESS", obj->process()); + boost::replace_all(pattern, "$MASS", obj->mass()); + boost::replace_all(pattern, "$ERA", obj->era()); + boost::replace_all(pattern, "$CHANNEL", obj->channel()); + boost::replace_all(pattern, "$ANALYSIS", obj->analysis()); + obj->set_bin(pattern); +} + +void SetFromBinName(ch::Object *input, std::string parse_rules) { + boost::replace_all(parse_rules, "$ANALYSIS", "(?\\w+)"); + boost::replace_all(parse_rules, "$ERA", "(?\\w+)"); + boost::replace_all(parse_rules, "$CHANNEL", "(?\\w+)"); + boost::replace_all(parse_rules, "$BINID", "(?\\w+)"); + boost::replace_all(parse_rules, "$MASS", "(?\\w+)"); + boost::regex rgx(parse_rules); + boost::smatch matches; + boost::regex_search(input->bin(), matches, rgx); + if (matches.str("ANALYSIS").length()) + input->set_analysis(matches.str("ANALYSIS")); + if (matches.str("ERA").length()) + input->set_era(matches.str("ERA")); + if (matches.str("CHANNEL").length()) + input->set_channel(matches.str("CHANNEL")); + if (matches.str("BINID").length()) + input->set_bin_id(boost::lexical_cast(matches.str("BINID"))); + if (matches.str("MASS").length()) + input->set_mass(matches.str("MASS")); +} + + // --------------------------------------------------------------------------- // Rate scaling // --------------------------------------------------------------------------- @@ -218,6 +252,38 @@ std::vector MassesFromRange(std::string const& input, return result; } +std::vector ValsFromRange(std::string const& input, + std::string const& fmt) { + std::set mass_set; + std::vector tokens; + boost::split(tokens, input, boost::is_any_of(",")); + for (auto const& t : tokens) { + std::vector sub_tokens; + boost::split(sub_tokens, t, boost::is_any_of(":|")); + if (sub_tokens.size() == 1) { + double mass_val = boost::lexical_cast(sub_tokens[0]); + mass_set.insert(mass_val); + } else if (sub_tokens.size() == 3) { + double lo = boost::lexical_cast(sub_tokens[0]); + double hi = boost::lexical_cast(sub_tokens[1]); + double step = boost::lexical_cast(sub_tokens[2]); + if (hi <= lo) + throw std::runtime_error( + "[ValsFromRange] High mass is smaller than low mass!"); + double start = lo; + while (start < hi + 1E-4) { + mass_set.insert(start); + start += step; + } + } + } + std::vector result; + for (auto const& m : mass_set) { + result.push_back((boost::format(fmt) % m).str()); + } + return result; +} + boost::filesystem::path make_relative(boost::filesystem::path p_from, boost::filesystem::path p_to) { p_from = boost::filesystem::absolute(p_from); diff --git a/CombineHarvester/CombineTools/test/ImpactsTable.cpp b/CombineHarvester/CombineTools/test/ImpactsTable.cpp index e4518f68..b9f7a087 100644 --- a/CombineHarvester/CombineTools/test/ImpactsTable.cpp +++ b/CombineHarvester/CombineTools/test/ImpactsTable.cpp @@ -4,6 +4,7 @@ #include "boost/format.hpp" #include "boost/algorithm/string/replace.hpp" #include "boost/program_options.hpp" +#include "boost/regex.hpp" #include "TTree.h" #include "TFile.h" /* @@ -41,6 +42,7 @@ int main(int argc, char* argv[]) { unsigned max = 99999; bool do_latex = false; bool do_text = false; + std::vector filters; po::variables_map vm; po::notify(vm); @@ -49,11 +51,15 @@ int main(int argc, char* argv[]) { ("input,i", po::value(&input_file)->required()) ("max,m", po::value(&max)->default_value(max)) ("latex,l", po::value(&do_latex)->implicit_value(true)) - ("text,t", po::value(&do_text)->implicit_value(true)); + ("text,t", po::value(&do_text)->implicit_value(true)) + ("filter", po::value>(&filters)->multitoken()); po::store(po::command_line_parser(argc, argv) .options(config).allow_unregistered().run(), vm); po::notify(vm); + std::vector rgx; + for (auto const& ele : filters) rgx.emplace_back(ele); + TFile f(input_file.c_str()); TTree *t = (TTree*)f.Get("impact"); @@ -80,9 +86,31 @@ int main(int argc, char* argv[]) { for (unsigned i = 0; i < t->GetEntries(); ++i) { t->GetEntry(i); reader.parameter = *param_str; + std::cout << reader.parameter << "\n"; + bool matches = false; + for (unsigned j = 0; j < rgx.size(); ++j) { + if (boost::regex_match((*param_str).Data(), rgx[j])) { + matches = true; + break; + } + } + if (rgx.size() && !matches) continue; val_vec.push_back(reader); } + double tot_obs = 0; + double tot_post = 0; + double tot_pre = 0; + for (unsigned i = 0; i < max && i < val_vec.size(); ++i) { + Vals &val = val_vec[i]; + tot_obs += (val.impact * val.impact); + tot_post += (val.impact_post * val.impact_post); + tot_pre += (val.impact_pre * val.impact_pre); + } + tot_obs = std::sqrt(tot_obs); + tot_post = std::sqrt(tot_post); + tot_pre = std::sqrt(tot_pre); + string fmt = "& %-5.2f & %-5.2f"; if (do_text) { @@ -151,5 +179,8 @@ int main(int argc, char* argv[]) { cout << R"(\hline)" << "\n"; cout << R"(\end{tabular})" << "\n"; } + std::cout << "Total impact obs: " << tot_obs << "\n"; + std::cout << "Total impact post: " << tot_post << "\n"; + std::cout << "Total impact pre: " << tot_pre << "\n"; return 0; } diff --git a/CombineHarvester/CombineTools/test/MSSMYieldTable.cpp b/CombineHarvester/CombineTools/test/MSSMYieldTable.cpp index 6c7f83f2..6b54c974 100644 --- a/CombineHarvester/CombineTools/test/MSSMYieldTable.cpp +++ b/CombineHarvester/CombineTools/test/MSSMYieldTable.cpp @@ -164,12 +164,8 @@ int main(int argc, char* argv[]) { cmb.ParseDatacard(d, "", "", "", 0, signal_mass); } - cmb.ForEachSyst(boost::bind(ch::SetFromBinName, _1, + cmb.ForEachObj(boost::bind(ch::SetFromBinName, _1, "$ANALYSIS_$CHANNEL_$BINID_$ERA")); - cmb.ForEachObs(boost::bind(ch::SetFromBinName, _1, - "$ANALYSIS_$CHANNEL_$BINID_$ERA")); - cmb.ForEachProc(boost::bind(ch::SetFromBinName, _1, - "$ANALYSIS_$CHANNEL_$BINID_$ERA")); cmb.cp().signals().ForEachProc([&](ch::Process *p) { p->set_rate(p->rate() * d_tanb); diff --git a/CombineHarvester/CombineTools/test/MSSMtauptYieldTable.cpp b/CombineHarvester/CombineTools/test/MSSMtauptYieldTable.cpp index 6aba047e..ad69015e 100644 --- a/CombineHarvester/CombineTools/test/MSSMtauptYieldTable.cpp +++ b/CombineHarvester/CombineTools/test/MSSMtauptYieldTable.cpp @@ -179,12 +179,8 @@ int main(int argc, char* argv[]) { cmb.ParseDatacard(d, "", "", "", 0, signal_mass); } - cmb.ForEachSyst(boost::bind(ch::SetFromBinName, _1, + cmb.ForEachObj(boost::bind(ch::SetFromBinName, _1, "$ANALYSIS_$CHANNEL_$BINID_$ERA")); - cmb.ForEachObs(boost::bind(ch::SetFromBinName, _1, - "$ANALYSIS_$CHANNEL_$BINID_$ERA")); - cmb.ForEachProc(boost::bind(ch::SetFromBinName, _1, - "$ANALYSIS_$CHANNEL_$BINID_$ERA")); cmb.cp().signals().ForEachProc([&](ch::Process *p) { p->set_rate(p->rate() * d_tanb); diff --git a/CombineHarvester/CombineTools/test/NuisanceSummary.cpp b/CombineHarvester/CombineTools/test/NuisanceSummary.cpp index 9bd02995..48bfd357 100644 --- a/CombineHarvester/CombineTools/test/NuisanceSummary.cpp +++ b/CombineHarvester/CombineTools/test/NuisanceSummary.cpp @@ -40,9 +40,7 @@ int main(int argc, char* argv[]) { cmb.ParseDatacard(d, parse_rule); } - cmb.ForEachSyst(ch::SetStandardBinName); - cmb.ForEachObs(ch::SetStandardBinName); - cmb.ForEachProc(ch::SetStandardBinName); + ch::SetStandardBinNames(cmb); // cmb.PrintAll(); if (fitresult_file.length()) { @@ -54,8 +52,7 @@ int main(int argc, char* argv[]) { delete fitresult; } - auto systematics = cmb.GenerateSetFromSysts( - std::mem_fn(&ch::Systematic::name)); + auto systematics = cmb.SetFromSysts(std::mem_fn(&ch::Systematic::name)); std::set ungrouped; @@ -131,18 +128,16 @@ int main(int argc, char* argv[]) { std::set cleaned_rates; ch::CombineHarvester all_syst = cmb.cp() .syst_name(p.second); - auto chns = all_syst - .GenerateSetFromSysts(std::mem_fn(&ch::Systematic::channel)); + auto chns = all_syst.SetFromSysts(std::mem_fn(&ch::Systematic::channel)); for (auto const& c : chns) std::cout << " " << c << " "; std::cout << "\n"; std::set all_procs; - auto bins = all_syst - .GenerateSetFromSysts(std::mem_fn(&ch::Systematic::bin)); + auto bins = all_syst.SetFromSysts(std::mem_fn(&ch::Systematic::bin)); for (auto const& bin : bins) { ch::CombineHarvester syst_in_bin = all_syst.cp() .bin({bin}); - auto procs = syst_in_bin - .GenerateSetFromSysts(std::mem_fn(&ch::Systematic::process)); + auto procs = + syst_in_bin.SetFromSysts(std::mem_fn(&ch::Systematic::process)); for (auto const& proc : procs) { all_procs.insert(proc); ch::CombineHarvester syst_in_proc = syst_in_bin.cp() diff --git a/CombineHarvester/CombineTools/test/SBWeighted.cpp b/CombineHarvester/CombineTools/test/SBWeighted.cpp index a544f352..69c1d816 100644 --- a/CombineHarvester/CombineTools/test/SBWeighted.cpp +++ b/CombineHarvester/CombineTools/test/SBWeighted.cpp @@ -65,9 +65,7 @@ int main(int argc, char* argv[]){ for (auto const& d : datacards) { cmb.ParseDatacard(d, parse_rule); } - cmb.ForEachSyst(ch::SetStandardBinName); - cmb.ForEachObs(ch::SetStandardBinName); - cmb.ForEachProc(ch::SetStandardBinName); + ch::SetStandardBinNames(cmb); RooFitResult* fitresult = nullptr; if (fitresult_file.length() && postfit) { diff --git a/CombineHarvester/CombineTools/test/SMLegacyExample.cpp b/CombineHarvester/CombineTools/test/SMLegacyExample.cpp index 2d77f9a1..f7bc10eb 100644 --- a/CombineHarvester/CombineTools/test/SMLegacyExample.cpp +++ b/CombineHarvester/CombineTools/test/SMLegacyExample.cpp @@ -9,6 +9,7 @@ #include "CombineTools/interface/CombineHarvester.h" #include "CombineTools/interface/Utilities.h" #include "CombineTools/interface/HttSystematics.h" +#include "CombineTools/interface/CardWriter.h" using namespace std; @@ -93,7 +94,7 @@ int main() { {0, "tauTau_1jet_high_mediumhiggs"}, {1, "tauTau_1jet_high_highhiggs"}, {2, "tauTau_vbf"}}; - vector masses = ch::MassesFromRange("110-145:5"); + vector masses = ch::ValsFromRange("110:145|5"); cout << ">> Creating processes and observations...\n"; for (string era : {"7TeV", "8TeV"}) { @@ -273,11 +274,11 @@ int main() { // cout << " - " << x << "\n"; // } - string folder = "output/sm_cards"; + string folder = "output/sm_cards/LIMITS"; boost::filesystem::create_directories(folder); - boost::filesystem::create_directories(folder+"/common"); + boost::filesystem::create_directories(folder + "/common"); for (auto m : masses) { - boost::filesystem::create_directories(folder+"/"+m); + boost::filesystem::create_directories(folder + "/" + m); } for (string chn : chns) { @@ -292,10 +293,26 @@ int main() { folder + "/" + m + "/" + b + ".txt", output); } } - cb.cp().channel({chn}).mass({"125", "*"}).WriteDatacard( - folder+"/htt_" + chn + "_125.txt", output); output.Close(); } + /* + Alternatively use the ch::CardWriter class to automate the datacard writing. + This makes it simple to re-produce the LIMITS directory format employed during + the Run I analyses. + Uncomment the code below to test: + */ + + /* + // Here we define a CardWriter with a template for how the text datacard + // and the root files should be named. + ch::CardWriter writer("$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt", + "$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root"); + writer.SetVerbosity(1); + writer.WriteCards("output/sm_cards/LIMITS/cmb", cb); + for (auto chn : cb.channel_set()) { + writer.WriteCards("output/sm_cards/LIMITS/" + chn, cb.cp().channel({chn})); + } + */ cout << "\n>> Done!\n"; } diff --git a/CombineHarvester/CombineTools/test/SOBPlot.cpp b/CombineHarvester/CombineTools/test/SOBPlot.cpp index 091771f8..a932d650 100644 --- a/CombineHarvester/CombineTools/test/SOBPlot.cpp +++ b/CombineHarvester/CombineTools/test/SOBPlot.cpp @@ -147,9 +147,7 @@ int main(int argc, char* argv[]){ for (auto const& d : datacards) { cmb.ParseDatacard(d, parse_rule); } - cmb.ForEachSyst(ch::SetStandardBinName); - cmb.ForEachObs(ch::SetStandardBinName); - cmb.ForEachProc(ch::SetStandardBinName); + ch::SetStandardBinNames(cmb); RooFitResult *fitresult = nullptr; if (fitresult_file.length() && postfit) { @@ -158,7 +156,7 @@ int main(int argc, char* argv[]){ cmb.UpdateParameters(fitparams); } - auto bins = cmb.GenerateSetFromObs(std::mem_fn(&ch::Observation::bin)); + auto bins = cmb.SetFromObs(std::mem_fn(&ch::Observation::bin)); map weights; for (auto const& bin : bins) { TH1F sig = cmb.cp().bin({bin}).signals().GetShape(); diff --git a/CombineHarvester/CombineTools/test/YieldTable.cpp b/CombineHarvester/CombineTools/test/YieldTable.cpp index 598fbda7..5acc124e 100644 --- a/CombineHarvester/CombineTools/test/YieldTable.cpp +++ b/CombineHarvester/CombineTools/test/YieldTable.cpp @@ -69,9 +69,8 @@ int main(int argc, char* argv[]){ for (auto const& d : datacards) { cmb.ParseDatacard(d, parse_rule); } - cmb.ForEachSyst(ch::SetStandardBinName); - cmb.ForEachObs(ch::SetStandardBinName); - cmb.ForEachProc(ch::SetStandardBinName); + ch::SetStandardBinNames(cmb); + RooFitResult *fitresult = nullptr; if (fitresult_file.length() && postfit) { diff --git a/CombineHarvester/docs/PythonInterface.md b/CombineHarvester/docs/PythonInterface.md index 42780f0e..b2a43378 100644 --- a/CombineHarvester/docs/PythonInterface.md +++ b/CombineHarvester/docs/PythonInterface.md @@ -1,13 +1,16 @@ Python Interface {#python-interface} ==================================== +[TOC] + The python interface is embedded within the shared library (`CombineTools/lib/libCHCombineTools.so`) that is produced when the code is compiled. Add this directory to your `PYTHONPATH` environment variable so that it can be imported from anywhere. This can also be achieved by doing: eval `make env` Then in a python script you can do, e.g. `import combineharvester as ch`. The sections below summarises the methods that are currently supported. -## Constructors and copying +Constructors and copying {#py-constr-copy} +========================================== C++: ch::CombineHarvester cb; @@ -20,7 +23,8 @@ Python: cb_shallow_copy = cb.cp() cb_deep_copy = cb.deep() -## Logging and Printing +Logging and Printing {#py-log-print} +==================================== C++: cb.PrintAll(); @@ -39,9 +43,11 @@ Python: cb.PrintParams() cb.SetVerbosity(1) -## Datacards +Datacards {#py-datacards} +========================= -### Parsing specifying metadata +Parsing specifying metadata {#py-datacards-meta} +------------------------------------------------- C++: cb.ParseDatacard("datacard.txt", "htt", "8TeV", "mt", 6, "125"); @@ -54,7 +60,8 @@ Metadata parameters also have default values and can be named explicitly: cb.ParseDatacard("datacard.txt", analysis = "htt", mass = "125") -### Parsing with pattern substitution +Parsing with pattern substitution {#py-datacards-pat-sub} +--------------------------------------------------------- C++: cb.ParseDatacard("htt_mt_8_8TeV.txt", "$ANALYSIS_$CHANNEL_$BINID_$ERA.txt"); @@ -63,7 +70,8 @@ Python (note the different method name): cb.QuickParseDatacard("htt_mt_8_8TeV.txt", "$ANALYSIS_$CHANNEL_$BINID_$ERA.txt") -### Writing +Writing {#py-datacards-writing} +------------------------------- C++: cb.WriteDatacard("card.txt", "file.root"); // or @@ -75,7 +83,8 @@ Python (second method not yet available): cb.WriteDatacard("card.txt", "file.root") -## Filtering +Filtering {#py-filtering} +========================= All of the basic filter methods can be called and chained in a similar way in both interfaces. For example: C++: @@ -98,9 +107,10 @@ Python: cb.FilterAll(lambda obj : obj.mass() in ['110', '145']) -## Set producers +Set producers {#py-sets} +======================== -Only the basic set producer methods are available at the moment. The generic methods (GenerateSetFromObs, GenerateSetFromProcs, GenerateSetFromSysts) will be adapted shortly. +All basic set producer methods are available. C++: @@ -115,7 +125,22 @@ Python: for p in cb.process_set(): ... -## Modifications +**NEW** The generic methods are now available too, and accept a generic function object. + +C++: + + set bins = cb.SetFromAll(std::mem_fn(&ch::Object::bin)); + set some_set = cb.SetFromProcs([](ch::Process const* p) { + return p->process() + "_" + p->bin(); + }); + +Python: + + bins = cb.SetFromAll(ch.Object.bin) + some_set = cb.SetFromProcs(lambda x : x.process() + '_' + x.bin()) + +Modifications {#py-modifications} +================================= The `GetParameter`, and `UpdateParameters` methods that use `ch::Parameter` objects are not available (and may be deprecated soon anyway). The `UpdateParameters` method taking a `RooFitResult` is available however. @@ -147,7 +172,8 @@ Python: if p.process() == 'ggH_hww125': p.set_signal(True) cb.ForEachProc(SwitchToSignal) -## Rate and shape evaluation +Rate and shape evaluation {#py-eval} +==================================== All methods are supported with a similar interface. @@ -172,7 +198,8 @@ Python: f = cb.GetShape() g = cb.GetShapeWithUncertainty(res, 500) -## Datacard creation +Datacard creation {#py-creation} +================================ Creating observation and process entries is supported. @@ -201,3 +228,18 @@ Python: The AddSyst ExtractPdfs, ExtractData and AddWorkspace methods are not currently supported. The creation of Systematic entries via the [AddSyst](\ref ch::CombineHarvester::AddSyst) method is not trivial to convert to python due to the heavy use of C++ templates. A python equivalent will be made available in a future release. +Class: CardWriter {#py-card-writer} +================================ +The ch::CardWriter class has an identical interface in python. + +C++: + + ch::CardWriter writer("$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt", + "$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root"); + writer.WriteCards("LIMITS/cmb", cb); + +Python: + + writer = ch.CardWriter('$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt', + '$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root') + writer.WriteCards('LIMITS/cmb', cb) diff --git a/CombineHarvester/mk/makefile b/CombineHarvester/mk/makefile index a56ce638..880cd51d 100644 --- a/CombineHarvester/mk/makefile +++ b/CombineHarvester/mk/makefile @@ -69,6 +69,7 @@ CXX=g++ LD=g++ CXXFLAGS= -Wall -Wextra -O2 -std=c++0x LDFLAGS= -shared -Wall -Wextra +CXXFLAGS += $(EXTRAFLAGS) # Extra gcc flags that will generate A LOT of warnings # -pedantic -Weffc++