Skip to content

Commit

Permalink
Merge pull request #1517 from NNPDF/utilize_error_members_instead_of_…
Browse files Browse the repository at this point in the history
…rawdata

use error_members instead of rawdata
  • Loading branch information
scarlehoff committed Mar 1, 2022
2 parents 9613456 + 420ccc1 commit d3b2639
Show file tree
Hide file tree
Showing 19 changed files with 98 additions and 76 deletions.
4 changes: 2 additions & 2 deletions n3fit/src/n3fit/tests/test_vpinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_vpinterface():
res_2 = xplotting_grid(fit_2, 1.6, xgrid)
distances = distance_grids([fit_1, fit_2], [res_1, res_2], 0)
assert len(distances) == 2
assert distances[0].grid_values.data.shape == (8, 40)
assert distances[1].grid_values.data.shape == (8, 40)
assert distances[0].grid_values.data.shape == (1, 8, 40)
assert distances[1].grid_values.data.shape == (1, 8, 40)
np.testing.assert_allclose(distances[0].grid_values.data, 0.0)
assert not np.allclose(distances[1].grid_values.data, 0.0)
2 changes: 1 addition & 1 deletion validphys2/src/validphys/arclength.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def arc_lengths(
# Finite diff. step-size, x-grid
eps = (b - a) / npoints
ixgrid = xgrid(a, b, "linear", npoints)
# PDFs evaluated on grid
# PDFs evaluated on grid, use the entire thing, the Stats class will chose later
xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values.data * ixgrid[1]
fdiff = np.diff(xfgrid) / eps # Compute forward differences
res += integrate.simps(1 + np.square(fdiff), ixgrid[1][1:])
Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/calcutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def calc_chi2(sqrtcov, diffs):
return np.einsum('i...,i...->...', vec,vec)

def all_chi2(results):
"""Return the chi² for all elements in the result. Note that the
interpretation of the result will depend on the PDF error type"""
"""Return the chi² for all elements in the result, regardless of the Stats class
Note that the interpretation of the result will depend on the PDF error type"""
data_result, th_result = results
diffs = th_result.rawdata - data_result.central_value[:,np.newaxis]
return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)
Expand Down
6 changes: 3 additions & 3 deletions validphys2/src/validphys/closuretest/closure_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def bootstrap_bias_experiment(
"""
dt_ct, th_ct = dataset_inputs_results
((_, th_ul),) = underlying_dataset_inputs_results
th_ct_boot_cv = bootstrap_values(th_ct.rawdata, bootstrap_samples)
th_ct_boot_cv = bootstrap_values(th_ct.error_members, bootstrap_samples)
boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis]
boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct)
return boot_bias
Expand Down Expand Up @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata):
"""
dt, th = results
diff = th.central_value[:, np.newaxis] - th.rawdata
diff = th.central_value[:, np.newaxis] - th.error_members
var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
return VarianceData(var_unnorm, len(th))

Expand All @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500)
normalised to the number of data in the experiment.
"""
dt_ct, th_ct = dataset_inputs_results
diff = th_ct.central_value[:, np.newaxis] - th_ct.rawdata
diff = th_ct.central_value[:, np.newaxis] - th_ct.error_members
var_unnorm_boot = bootstrap_values(
diff,
bootstrap_samples,
Expand Down
8 changes: 4 additions & 4 deletions validphys2/src/validphys/closuretest/multiclosure.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def fits_dataset_bias_variance(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down Expand Up @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader):
"""
closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
replicas = np.asarray([th.rawdata for th in closures_th])
replicas = np.asarray([th.error_members for th in closures_th])
centrals = np.mean(replicas, axis=-1)
underlying = law_th.central_value

Expand Down Expand Up @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits(
# construct proxy fits theory predictions
for fit_th in fit_boot_th:
rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats)
boot_ths.append(BootstrappedTheoryResult(fit_th.rawdata[:, rep_boot_index]))
boot_ths.append(BootstrappedTheoryResult(fit_th.error_members[:, rep_boot_index]))
return (boot_ths, *input_tuple)


Expand Down Expand Up @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/closuretest/multiclosure_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def xi_grid_values(xi_pdfgrids):
glu_sin_grid, nonsin_grid = xi_pdfgrids
# grid values have shape: replica, flavour, x
# concatenate along flavour
return np.concatenate((glu_sin_grid.grid_values.data, nonsin_grid.grid_values.data), axis=1)
return np.concatenate((glu_sin_grid.grid_values.error_members(), nonsin_grid.grid_values.error_members()), axis=1)


def underlying_xi_grid_values(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ def experiments_closure_pseudodata_estimators_table(
fits_erep_delta_chi2 = []
for i_fit, fit_th in enumerate(closures_th):
# some of these could be done outside of loop, but easier to do here.
th_replicas = fit_th.rawdata
th_central = np.mean(th_replicas, axis=-1)
th_replicas = fit_th.error_members
th_central = fit_th.central_value
dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy()
dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy()

Expand Down
20 changes: 9 additions & 11 deletions validphys2/src/validphys/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def _basic_obs_pdf_correlation(pdf_stats, obs_stats):
(nbins x nf x nx), compatible with grid_values, upon
changing replicas->bins.
"""
x = pdf_stats.data - pdf_stats.central_value()
y = obs_stats.data - obs_stats.central_value()
x = pdf_stats.error_members() - pdf_stats.central_value()
y = obs_stats.error_members() - obs_stats.central_value()

#We want to compute:
#sum(x*y)/(norm(x)*norm(y))
Expand All @@ -53,26 +53,24 @@ def _basic_obs_obs_correlation(obs1, obs2):
The result is (nbins1 , nbins2), a mareix containing the correlation
coefficients between the two sets.
"""
x = (obs1.data - obs1.central_value()).T
y = (obs2.data - obs2.central_value())
x = (obs1.error_members() - obs1.central_value()).T
y = obs2.error_members() - obs2.central_value()

return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0))

#TODO: Implement for other error types. Do not use the rawdata.
@check_pdf_is_montecarlo
def obs_pdf_correlations(pdf, results, xplotting_grid):
"""Return the correlations between each point in a dataset and the PDF
values on a grid of (x,f) points in a format similar to `xplotting_grid`."""
_ , th = results
_, th = results
corrs = pdf.stats_class(_basic_obs_pdf_correlation(xplotting_grid.grid_values, th.stats))
return xplotting_grid.copy_grid(grid_values=corrs)


corrpair_results = collect('results', ['corrpair'])
corrpair_datasets = collect('dataset', ['corrpair'])
corrpair_results = collect("results", ["corrpair"])
corrpair_datasets = collect("dataset", ["corrpair"])


@check_pdf_is_montecarlo
def obs_obs_correlations(pdf, corrpair_results):
"""Return the theoretical correlation matrix between a pair of observables."""
(_,th1), (_,th2) = corrpair_results
(_, th1), (_, th2) = corrpair_results
return _basic_obs_obs_correlation(th1.stats, th2.stats)
2 changes: 1 addition & 1 deletion validphys2/src/validphys/covmats.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
True
"""
th = ThPredictionsResult.from_convolution(pdf, dataset)
pdf_cov = np.cov(th.rawdata, rowvar=True)
pdf_cov = np.cov(th.error_members, rowvar=True)
return pdf_cov + covmat_t0_considered


Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _chi2_distribution_plots(chi2_data, stats, pdf, plot_type):
ax.set_xlabel(r"Replica $\chi^2$")

if plot_type == "hist":
ax.hist(alldata.data, label=label, zorder=100)
ax.hist(alldata.error_members(), label=label, zorder=100)
elif plot_type == "kde":
# We need the squeeze here to change shape from (x, 1) to (x,)
ax = plotutils.kde_plot(alldata.data.squeeze(), label=label)
Expand Down Expand Up @@ -871,7 +871,7 @@ def plot_smpdf(pdf, dataset, obs_pdf_correlations, mark_threshold:float=0.9):

basis = obs_pdf_correlations.basis

fullgrid = obs_pdf_correlations.grid_values.data
fullgrid = obs_pdf_correlations.grid_values.error_members()

fls = obs_pdf_correlations.flavours
x = obs_pdf_correlations.xgrid
Expand Down
19 changes: 7 additions & 12 deletions validphys2/src/validphys/deltachi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,12 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
"""
positive_eigenvalue_mask = delta_chi2_hessian >= 0

# include replica 0 in both new grids
# The masks do not include replica 0, add it in both grids
pos_mask = np.append(True, positive_eigenvalue_mask)
neg_mask = np.append(True, ~positive_eigenvalue_mask)

pos_grid = xplotting_grid.grid_values.data[pos_mask]
neg_grid = xplotting_grid.grid_values.data[neg_mask]

pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid)
neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid)
pos_xplotting_grid = xplotting_grid.mask_replicas(pos_mask)
neg_xplotting_grid = xplotting_grid.mask_replicas(neg_mask)

return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]

Expand Down Expand Up @@ -210,12 +207,10 @@ def draw(self, pdf, grid, flstate):
labels = flstate.labels
handles = flstate.handles

# pick all replicas grid_values for the flavour flindex. stats_class is a method
# of the PDF class, which returns the stats calculator (object) for the pdf error type.
# Basically stats is an object which says what is the type class of the replicas:
# MCStats, HessianStats, SymmHessianStats. In this way validphys use the right methods
# to compute statistical values.
stats = pdf.stats_class(grid.grid_values.data[:, flindex, :])
# Create a copy of the `Stats` instance of the grid
# with only the flavours we are interested in
gv = grid.grid_values.data
stats = grid(grid_values=gv[:, flindex, :]).grid_values

# Ignore spurious normalization warnings
with warnings.catch_warnings():
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/paramfits/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ def plot_total_as_distribution_dataspecs(
dataspecs_parabolic_as_determination_for_total,
dataspecs_speclabel):
#Remember that *_for_total is a len 1 list, so take the first element.
kde_plot(dist[0].data, ax=ax, label=label)
kde_plot(dist[0].error_members(), ax=ax, label=label)
ax.set_xlabel(r"$\alpha_S$")
ax.legend()
return fig
Expand Down
31 changes: 24 additions & 7 deletions validphys2/src/validphys/pdfgrids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ class XPlottingGrid:
grid_values: Stats
scale: str

def select_flavour(self, flindex):
"""Return a new grid for one single flavour"""
if isinstance(flindex, str):
flstr = flindex
flindex = self.flavours.index(flindex)
else:
flstr = self.flavours[flindex]
new_grid = self.grid_values.data[:, flindex]
gv = self.grid_values.__class__(new_grid)
return dataclasses.replace(self, grid_values=gv, flavours=[flstr])

def mask_replicas(self, mask):
"""Return a copy of XPlottingGrid with the mask applied to the replicas"""
new_grid = self.grid_values.data[mask]
gv = self.grid_values.__class__(new_grid)
return dataclasses.replace(self, grid_values=gv)

def copy_grid(self, grid_values=None):
"""Create a copy of the grid with potentially a different set of values"""
if not isinstance(grid_values, Stats):
Expand Down Expand Up @@ -92,9 +109,9 @@ def xplotting_grid(pdf:PDF, Q:(float,int), xgrid=None, basis:(str, Basis)='flavo
raise TypeError(f"Invalid xgrid {xgrid!r}")
gv = basis.grid_values(pdf, flavours, xgrid, Q)
#Eliminante Q axis
gv = pdf.stats_class(gv.reshape(gv.shape[:-1]))
stats_gv = pdf.stats_class(gv.reshape(gv.shape[:-1]))

res = XPlottingGrid(Q, basis, flavours, xgrid, gv, scale)
res = XPlottingGrid(Q, basis, flavours, xgrid, stats_gv, scale)
return res

xplotting_grids = collect(xplotting_grid, ('pdfs',))
Expand Down Expand Up @@ -263,7 +280,7 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None

if pdf == pdfs[normalize_to]:
# Zero the PDF we are normalizing against
pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0]))
pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0:1]))
newgrid = grid.copy_grid(grid_values=pdf_zero)
newgrids.append(newgrid)
continue
Expand All @@ -273,8 +290,8 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None
sg1 = g_stats.std_error()
N1 = pdf.get_members()

# the distance definition
distance = pdf.stats_class(np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2)))
# Wrap the distance into a Stats (1, flavours, points)
distance = Stats([np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2))])

newgrid = grid.copy_grid(grid_values=distance)
newgrids.append(newgrid)
Expand Down Expand Up @@ -316,8 +333,8 @@ def variance_distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(No
N1 = pdf.get_members()
s1 = (mo1-(N1-3)/(N1-1)*sg1**4)/N1

# the distance definition
variance_distance = pdf.stats_class(np.sqrt((sg1**2-sg2**2)**2/(s1+s2)))
# Wrap the distance into a Stats (1, flavours, points)
variance_distance = Stats([np.sqrt((sg1**2-sg2**2)**2/(s1+s2))])

newgrid = grid.copy_grid(grid_values=variance_distance)
newgrids.append(newgrid)
Expand Down
17 changes: 10 additions & 7 deletions validphys2/src/validphys/pdfplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,11 @@ def draw(self, pdf, grid, flstate):
ax = flstate.ax
next_prop = next(ax._get_lines.prop_cycler)
color = next_prop['color']
gv = grid.grid_values.data[:,flstate.flindex,:]
flavour_grid = grid.select_flavour(flstate.flindex)
stats = flavour_grid.grid_values
gv = stats.data
ax.plot(grid.xgrid, gv.T, alpha=0.2, linewidth=0.5,
color=color, zorder=1)
stats = pdf.stats_class(gv)
ax.plot(grid.xgrid, stats.central_value(), color=color,
linewidth=2,
label=pdf.label)
Expand Down Expand Up @@ -223,8 +224,7 @@ def get_ylabel(self, parton_name):
def draw(self, pdf, grid, flstate):
ax = flstate.ax
flindex = flstate.flindex
gv = grid.grid_values.data[:,flindex,:]
stats = pdf.stats_class(gv)
stats = grid.select_flavour(flindex).grid_values

res = stats.std_error()

Expand Down Expand Up @@ -330,7 +330,10 @@ def draw(self, pdf, grid, flstate):
next_prop = next(pcycler)
color = next_prop['color']

gv = grid.grid_values.data[flindex,:]
# The grid for the distance is (1, flavours, points)
# take only the flavour we are interested in
gv = grid.select_flavour(flindex).grid_values.data.squeeze()

ax.plot(grid.xgrid, gv, color=color, label='$%s$' % flstate.parton_name)

return gv
Expand Down Expand Up @@ -406,11 +409,11 @@ def setup_flavour(self, flstate):

def draw(self, pdf, grid, flstate):
ax = flstate.ax
flindex = flstate.flindex
hatchit = flstate.hatchit
labels = flstate.labels
handles = flstate.handles
stats = pdf.stats_class(grid.grid_values.data[:,flindex,:])
# Take only the flavours we are interested in
stats = grid.select_flavour(flstate.flindex).grid_values
pcycler = ax._get_lines.prop_cycler
#This is ugly but can't think of anything better

Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/replica_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def not_outlier_mask(
"""Return a boolean mask with the replicas that are never outside of the given
percentile in the constrained region"""
lim = unconstrained_region_index
gv = xplotting_grid.grid_values.data[:, :, lim:]
gv = xplotting_grid.grid_values.error_members()[:, :, lim:]
delta = nsigma * np.std(gv, axis=0)
mean = np.mean(gv, axis=0)
return ((gv >= mean - delta) & (gv <= mean+delta)).all(axis=2).all(axis=1)
Expand Down Expand Up @@ -309,7 +309,7 @@ def draw(self, pdf, grid, flstate):
#PDFs, and instead would do something different
next_prop = next(ax._get_lines.prop_cycler)
color = next_prop['color']
gv = grid.grid_values.data[filt, flstate.flindex, :]
gv = grid.grid_values.error_members()[filt, flstate.flindex, :]
ax.plot(
grid.xgrid,
gv.T,
Expand Down
Loading

0 comments on commit d3b2639

Please sign in to comment.