diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py index 9444b7fc1d..0bb9685b44 100644 --- a/validphys2/src/validphys/results.py +++ b/validphys2/src/validphys/results.py @@ -75,6 +75,7 @@ def __init__(self, dataset, covmat, sqrtcovmat): stats = Stats(self._central_value) self._covmat = covmat self._sqrtcovmat = sqrtcovmat + self._dataset = dataset super().__init__(stats) @property @@ -98,15 +99,19 @@ def sqrtcovmat(self): """Lower part of the Cholesky decomposition""" return self._sqrtcovmat + @property + def name(self): + return self._dataset.name class ThPredictionsResult(StatsResult): """Class holding theory prediction, inherits from StatsResult When created with `from_convolution`, it keeps tracks of the PDF for which it was computed """ - def __init__(self, dataobj, stats_class, label=None, pdf=None, theoryid=None): + def __init__(self, dataobj, stats_class, datasetnames=None, label=None, pdf=None, theoryid=None): self.stats_class = stats_class self.label = label + self._datasetnames = datasetnames statsobj = stats_class(dataobj.T) self._pdf = pdf self._theoryid = theoryid @@ -149,8 +154,12 @@ def from_convolution(cls, pdf, dataset, central_only=False): label = cls.make_label(pdf, dataset) thid = dataset.thspec.id + datasetnames = [i.name for i in datasets] + return cls(th_predictions, pdf.stats_class, datasetnames, label, pdf=pdf, theoryid=thid) - return cls(th_predictions, pdf.stats_class, label, pdf=pdf, theoryid=thid) + @property + def datasetnames(self): + return self._datasetnames class ThUncertaintiesResult(StatsResult): diff --git a/validphys2/src/validphys/theorycovariance/construction.py b/validphys2/src/validphys/theorycovariance/construction.py index 147568daea..78e02cf99f 100644 --- a/validphys2/src/validphys/theorycovariance/construction.py +++ b/validphys2/src/validphys/theorycovariance/construction.py @@ -63,9 +63,7 @@ def theory_covmat_singleprocess(theory_covmat_singleprocess_no_table, fivetheori results_central_bytheoryids = collect(results_central, ("theoryids",)) -each_dataset_results_central_bytheory = collect( - "results_central_bytheoryids", ("group_dataset_inputs_by_process", "data") -) +each_dataset_results_central_bytheory = collect("results_central_bytheoryids", ("data",)) @check_using_theory_covmat @@ -100,50 +98,6 @@ def theory_covmat_dataset( return thcovmat -@check_correct_theory_combination -def theory_covmat_datasets(each_dataset_results_central_bytheory, fivetheories): - """Produces an array of theory covariance matrices. Each matrix corresponds - to a different dataset, which must be specified in the runcard.""" - dataset_covmats = [] - for dataset in each_dataset_results_central_bytheory: - theory_centrals = [x[1].central_value for x in dataset] - s = make_scale_var_covmat(theory_centrals) - dataset_covmats.append(s) - return dataset_covmats - - -@check_correct_theory_combination -def total_covmat_datasets(each_dataset_results_central_bytheory, fivetheories): - """Produces an array of total covariance matrices; the sum of experimental - and scale-varied theory covariance matrices. Each matrix corresponds - to a different dataset, which must be specified in the runcard. - These are needed for calculation of chi2 per dataset.""" - dataset_covmats = [] - for dataset in each_dataset_results_central_bytheory: - theory_centrals = [x[1].central_value for x in dataset] - s = make_scale_var_covmat(theory_centrals) - sigma = dataset[0][0].covmat - cov = s + sigma - dataset_covmats.append(cov) - return dataset_covmats - - -@check_correct_theory_combination -def total_covmat_diagtheory_datasets(each_dataset_results_central_bytheory, fivetheories): - """Same as total_covmat_theory_datasets but for diagonal theory only""" - dataset_covmats = [] - for dataset in each_dataset_results_central_bytheory: - theory_centrals = [x[1].central_value for x in dataset] - s = make_scale_var_covmat(theory_centrals) - # Initialise array of zeros and set precision to same as FK tables - s_diag = np.zeros((len(s), len(s)), dtype=np.float32) - np.fill_diagonal(s_diag, np.diag(s)) - sigma = dataset[0][0].covmat - cov = s_diag + sigma - dataset_covmats.append(cov) - return dataset_covmats - - @table def theory_block_diag_covmat(theory_covmat_datasets, procs_index): """Takes the theory covariance matrices for individual datasets and @@ -182,27 +136,29 @@ def total_covmat_procs(procs_results_theory, fivetheories): return proc_result_covmats -def dataset_names(data_input): - """Returns a list of the names of the datasets, in the same order as - they are inputted in the runcard""" - return [el.name for el in data_input] - +ProcessInfo = namedtuple("ProcessInfo", ("preds", "namelist", "sizes")) -ProcessInfo = namedtuple("ProcessInfo", ("theory", "namelist", "sizes")) +def combine_by_type(each_dataset_results_central_bytheory): + """Groups the datasets bu process and returns an instance of the ProcessInfo class -def combine_by_type(each_dataset_results_central_bytheory, dataset_names): - """Groups the datasets according to processes and returns three objects: - theories_by_process: the relevant theories grouped by process type - ordered_names: dictionary with keys of process type and values being the - corresponding list of names of datasets, in the order they - are appended to theories_by_process - dataset_size: dictionary with keys of dataset name and values being the - number of points in that dataset""" + Parameters + ---------- + each_dataset_results_central_bytheory: list[list[(DataResult,ThPredictionsResult)]] + Tuples of DataResult and ThPredictionsResult (where only the second is used for the + construction of the theory covariance matrix), wrapped in a list such that there is a tuple + per theoryid, wrapped in another list per dataset. + + Returns + ------- + :ProcesInfo :py:class:`validphys.theorycovariance.construction.ProcessInfo` + Class with info needed to construct the theory covmat. + """ dataset_size = defaultdict(list) theories_by_process = defaultdict(list) ordered_names = defaultdict(list) - for dataset, name in zip(each_dataset_results_central_bytheory, dataset_names): + for dataset in each_dataset_results_central_bytheory: + name = dataset[0][0].name theory_centrals = [x[1].central_value for x in dataset] dataset_size[name] = len(theory_centrals[0]) proc_type = process_lookup(name) @@ -211,44 +167,11 @@ def combine_by_type(each_dataset_results_central_bytheory, dataset_names): for key, item in theories_by_process.items(): theories_by_process[key] = np.concatenate(item, axis=1) process_info = ProcessInfo( - theory=theories_by_process, namelist=ordered_names, sizes=dataset_size + preds=theories_by_process, namelist=ordered_names, sizes=dataset_size ) return process_info -def process_starting_points(combine_by_type): - """Returns a dictionary of indices in the covariance matrix corresponding - to the starting point of each process.""" - process_info = combine_by_type - running_index = 0 - start_proc = defaultdict(list) - for name in process_info.theory: - size = len(process_info.theory[name][0]) - start_proc[name] = running_index - running_index += size - return start_proc - - -def covmap(combine_by_type, dataset_names): - """Creates a map between the covmat indices from matrices ordered by - process to matrices ordered by experiment as listed in the runcard""" - mapping = defaultdict(list) - start_exp = defaultdict(list) - process_info = combine_by_type - running_index = 0 - for dataset in dataset_names: - size = process_info.sizes[dataset] - start_exp[dataset] = running_index - running_index += size - start = 0 - names_by_proc_list = [item for sublist in process_info.namelist.values() for item in sublist] - for dataset in names_by_proc_list: - for i in range(process_info.sizes[dataset]): - mapping[start + i] = start_exp[dataset] + i - start += process_info.sizes[dataset] - return mapping - - def covmat_3fpt(name1, name2, deltas1, deltas2): """Returns theory covariance sub-matrix for 3pt factorisation scale variation *only*, given two dataset names and collections @@ -490,14 +413,7 @@ def compute_covs_pt_prescrip( @check_correct_theory_combination -def covs_pt_prescrip( - combine_by_type, - process_starting_points, - theoryids, - point_prescription, - fivetheories, - seventheories, -): +def covs_pt_prescrip(combine_by_type, theoryids, point_prescription, fivetheories, seventheories): """Produces the sub-matrices of the theory covariance matrix according to a point prescription which matches the number of input theories. If 5 theories are provided, a scheme 'bar' or 'nobar' must be @@ -505,18 +421,31 @@ def covs_pt_prescrip( correspond to applying the scale variation prescription to each pair of processes in turn, using a different procedure for the case where the processes are the same relative to when they are different.""" - l = len(theoryids) - start_proc = process_starting_points + process_info = combine_by_type + running_index = 0 + start_proc = defaultdict(list) + for name in process_info.preds: + size = len(process_info.preds[name][0]) + start_proc[name] = running_index + running_index += size + covmats = defaultdict(list) - for name1 in process_info.theory: - for name2 in process_info.theory: - central1, *others1 = process_info.theory[name1] + for name1 in process_info.preds: + for name2 in process_info.preds: + central1, *others1 = process_info.preds[name1] deltas1 = list((other - central1 for other in others1)) - central2, *others2 = process_info.theory[name2] + central2, *others2 = process_info.preds[name2] deltas2 = list((other - central2 for other in others2)) s = compute_covs_pt_prescrip( - point_prescription, l, name1, deltas1, name2, deltas2, fivetheories, seventheories + point_prescription, + len(theoryids), + name1, + deltas1, + name2, + deltas2, + fivetheories, + seventheories, ) start_locs = (start_proc[name1], start_proc[name2]) covmats[start_locs] = s @@ -524,24 +453,30 @@ def covs_pt_prescrip( @table -def theory_covmat_custom(covs_pt_prescrip, covmap, procs_index): +def theory_covmat_custom(covs_pt_prescrip, procs_index, combine_by_type): """Takes the individual sub-covmats between each two processes and assembles them into a full covmat. Then reshuffles the order from ordering by process to ordering by experiment as listed in the runcard""" - matlength = int( - sum([len(covmat) for covmat in covs_pt_prescrip.values()]) - / int(np.sqrt(len(covs_pt_prescrip))) - ) - # Initialise arrays of zeros and set precision to same as FK tables - mat = np.zeros((matlength, matlength), dtype=np.float32) - cov_by_exp = np.zeros((matlength, matlength), dtype=np.float32) - for locs in covs_pt_prescrip: - cov = covs_pt_prescrip[locs] - mat[locs[0] : (len(cov) + locs[0]), locs[1] : (len(cov.T) + locs[1])] = cov - for i in range(matlength): - for j in range(matlength): - cov_by_exp[covmap[i]][covmap[j]] = mat[i][j] - df = pd.DataFrame(cov_by_exp, index=procs_index, columns=procs_index) + process_info = combine_by_type + + # Construct a covmat_index based on the order of experiments as they are in combine_by_type + # NOTE: maybe the ordering of covmat_index is always the same as that of procs_index? + # Regardless, we don't want to open ourselves up to the risk of the ordering of procs_index + # changing and breaking this function + indexlist = [] + for procname in process_info.preds: + for datasetname in process_info.namelist[procname]: + slicer = procs_index.get_locs((procname, datasetname)) + indexlist += procs_index[slicer].to_list() + covmat_index = pd.MultiIndex.from_tuples(indexlist, names=procs_index.names) + + # Put the covariance matrices between two process into a single covariance matrix + total_datapoints = sum(combine_by_type.sizes.values()) + mat = np.zeros((total_datapoints, total_datapoints), dtype=np.float32) + for locs, cov in covs_pt_prescrip.items(): + xsize, ysize = cov.shape + mat[locs[0] : locs[0] + xsize, locs[1] : locs[1] + ysize] = cov + df = pd.DataFrame(mat, index=covmat_index, columns=covmat_index) return df @@ -763,7 +698,8 @@ def theory_normcovmat_custom(theory_covmat_custom, procs_data_values): """Calculates the theory covariance matrix for scale variations normalised to data, with variations according to the relevant prescription.""" df = theory_covmat_custom - mat = df / np.outer(procs_data_values, procs_data_values) + vals = procs_data_values.reindex(df.index) + mat = df / np.outer(vals, vals) return mat diff --git a/validphys2/src/validphys/theorycovariance/tests.py b/validphys2/src/validphys/theorycovariance/tests.py index b5b302ddd1..2e3d4d12ba 100644 --- a/validphys2/src/validphys/theorycovariance/tests.py +++ b/validphys2/src/validphys/theorycovariance/tests.py @@ -21,15 +21,10 @@ from validphys.checks import check_two_dataspecs from validphys.theorycovariance.construction import ( combine_by_type, - covmap, - covs_pt_prescrip, - process_starting_points, theory_corrmat_singleprocess, - theory_covmat_custom, ) from validphys.theorycovariance.output import _get_key, matrix_plot_labels from validphys.theorycovariance.theorycovarianceutils import ( - check_correct_theory_combination_dataspecs, check_correct_theory_combination_theoryconfig, process_lookup, ) @@ -147,36 +142,6 @@ def combine_by_type_dataspecs(all_matched_results, matched_dataspecs_dataset_nam dataspecs_theoryids = collect("theoryid", ["theoryconfig", "original", "dataspecs"]) -def process_starting_points_dataspecs(combine_by_type_dataspecs): - """Like process_starting_points but for matched dataspecs.""" - return process_starting_points(combine_by_type_dataspecs) - - -@check_correct_theory_combination_dataspecs -def covs_pt_prescrip_dataspecs( - combine_by_type_dataspecs, - process_starting_points_dataspecs, - dataspecs_theoryids, - point_prescription, - fivetheories, - seventheories, -): - """Like covs_pt_prescrip but for matched dataspecs.""" - return covs_pt_prescrip( - combine_by_type_dataspecs, - process_starting_points_dataspecs, - dataspecs_theoryids, - point_prescription, - fivetheories, - seventheories, - ) - - -def covmap_dataspecs(combine_by_type_dataspecs, matched_dataspecs_dataset_name): - """Like covmap but for matched dataspecs.""" - return covmap(combine_by_type_dataspecs, matched_dataspecs_dataset_name) - - matched_dataspecs_process = collect("process", ["dataspecs"]) matched_dataspecs_dataset_name = collect("dataset_name", ["dataspecs"]) matched_cuts_datasets = collect("dataset", ["dataspecs"]) @@ -204,16 +169,6 @@ def matched_experiments_index(matched_dataspecs_dataset_name, all_matched_data_l return index -@table -def theory_covmat_custom_dataspecs( - covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index -): - """Like theory_covmat_custom but for matched dataspecs.""" - return theory_covmat_custom( - covs_pt_prescrip_dataspecs, covmap_dataspecs, matched_experiments_index - ) - - thx_corrmat = collect( "theory_corrmat_custom_dataspecs", ["combined_shift_and_theory_dataspecs", "theoryconfig"] )