Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use error_members instead of rawdata #1517

Merged
merged 5 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions n3fit/src/n3fit/tests/test_vpinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_vpinterface():
res_2 = xplotting_grid(fit_2, 1.6, xgrid)
distances = distance_grids([fit_1, fit_2], [res_1, res_2], 0)
assert len(distances) == 2
assert distances[0].grid_values.data.shape == (8, 40)
assert distances[1].grid_values.data.shape == (8, 40)
assert distances[0].grid_values.data.shape == (1, 8, 40)
assert distances[1].grid_values.data.shape == (1, 8, 40)
np.testing.assert_allclose(distances[0].grid_values.data, 0.0)
assert not np.allclose(distances[1].grid_values.data, 0.0)
2 changes: 1 addition & 1 deletion validphys2/src/validphys/arclength.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def arc_lengths(
# Finite diff. step-size, x-grid
eps = (b - a) / npoints
ixgrid = xgrid(a, b, "linear", npoints)
# PDFs evaluated on grid
# PDFs evaluated on grid, use the entire thing, the Stats class will chose later
xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values.data * ixgrid[1]
fdiff = np.diff(xfgrid) / eps # Compute forward differences
res += integrate.simps(1 + np.square(fdiff), ixgrid[1][1:])
Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/calcutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ def calc_chi2(sqrtcov, diffs):
return np.einsum('i...,i...->...', vec,vec)

def all_chi2(results):
"""Return the chi² for all elements in the result. Note that the
interpretation of the result will depend on the PDF error type"""
"""Return the chi² for all elements in the result, regardless of the Stats class
Note that the interpretation of the result will depend on the PDF error type"""
data_result, th_result = results
diffs = th_result.rawdata - data_result.central_value[:,np.newaxis]
return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)
Expand Down
6 changes: 3 additions & 3 deletions validphys2/src/validphys/closuretest/closure_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def bootstrap_bias_experiment(
"""
dt_ct, th_ct = dataset_inputs_results
((_, th_ul),) = underlying_dataset_inputs_results
th_ct_boot_cv = bootstrap_values(th_ct.rawdata, bootstrap_samples)
th_ct_boot_cv = bootstrap_values(th_ct.error_members, bootstrap_samples)
boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis]
boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct)
return boot_bias
Expand Down Expand Up @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata):

"""
dt, th = results
diff = th.central_value[:, np.newaxis] - th.rawdata
diff = th.central_value[:, np.newaxis] - th.error_members
var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
return VarianceData(var_unnorm, len(th))

Expand All @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500)
normalised to the number of data in the experiment.
"""
dt_ct, th_ct = dataset_inputs_results
diff = th_ct.central_value[:, np.newaxis] - th_ct.rawdata
diff = th_ct.central_value[:, np.newaxis] - th_ct.error_members
var_unnorm_boot = bootstrap_values(
diff,
bootstrap_samples,
Expand Down
8 changes: 4 additions & 4 deletions validphys2/src/validphys/closuretest/multiclosure.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def fits_dataset_bias_variance(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down Expand Up @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader):

"""
closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
replicas = np.asarray([th.rawdata for th in closures_th])
replicas = np.asarray([th.error_members for th in closures_th])
centrals = np.mean(replicas, axis=-1)
underlying = law_th.central_value

Expand Down Expand Up @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits(
# construct proxy fits theory predictions
for fit_th in fit_boot_th:
rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats)
boot_ths.append(BootstrappedTheoryResult(fit_th.rawdata[:, rep_boot_index]))
boot_ths.append(BootstrappedTheoryResult(fit_th.error_members[:, rep_boot_index]))
return (boot_ths, *input_tuple)


Expand Down Expand Up @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/closuretest/multiclosure_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def xi_grid_values(xi_pdfgrids):
glu_sin_grid, nonsin_grid = xi_pdfgrids
# grid values have shape: replica, flavour, x
# concatenate along flavour
return np.concatenate((glu_sin_grid.grid_values.data, nonsin_grid.grid_values.data), axis=1)
return np.concatenate((glu_sin_grid.grid_values.error_members(), nonsin_grid.grid_values.error_members()), axis=1)


def underlying_xi_grid_values(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ def experiments_closure_pseudodata_estimators_table(
fits_erep_delta_chi2 = []
for i_fit, fit_th in enumerate(closures_th):
# some of these could be done outside of loop, but easier to do here.
th_replicas = fit_th.rawdata
th_central = np.mean(th_replicas, axis=-1)
th_replicas = fit_th.error_members
th_central = fit_th.central_value
dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy()
dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy()

Expand Down
20 changes: 9 additions & 11 deletions validphys2/src/validphys/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def _basic_obs_pdf_correlation(pdf_stats, obs_stats):
(nbins x nf x nx), compatible with grid_values, upon
changing replicas->bins.
"""
x = pdf_stats.data - pdf_stats.central_value()
y = obs_stats.data - obs_stats.central_value()
x = pdf_stats.error_members() - pdf_stats.central_value()
y = obs_stats.error_members() - obs_stats.central_value()

#We want to compute:
#sum(x*y)/(norm(x)*norm(y))
Expand All @@ -53,26 +53,24 @@ def _basic_obs_obs_correlation(obs1, obs2):
The result is (nbins1 , nbins2), a mareix containing the correlation
coefficients between the two sets.
"""
x = (obs1.data - obs1.central_value()).T
y = (obs2.data - obs2.central_value())
x = (obs1.error_members() - obs1.central_value()).T
y = obs2.error_members() - obs2.central_value()

return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0))

#TODO: Implement for other error types. Do not use the rawdata.
@check_pdf_is_montecarlo
def obs_pdf_correlations(pdf, results, xplotting_grid):
"""Return the correlations between each point in a dataset and the PDF
values on a grid of (x,f) points in a format similar to `xplotting_grid`."""
_ , th = results
_, th = results
corrs = pdf.stats_class(_basic_obs_pdf_correlation(xplotting_grid.grid_values, th.stats))
return xplotting_grid.copy_grid(grid_values=corrs)


corrpair_results = collect('results', ['corrpair'])
corrpair_datasets = collect('dataset', ['corrpair'])
corrpair_results = collect("results", ["corrpair"])
corrpair_datasets = collect("dataset", ["corrpair"])


@check_pdf_is_montecarlo
def obs_obs_correlations(pdf, corrpair_results):
"""Return the theoretical correlation matrix between a pair of observables."""
(_,th1), (_,th2) = corrpair_results
(_, th1), (_, th2) = corrpair_results
return _basic_obs_obs_correlation(th1.stats, th2.stats)
2 changes: 1 addition & 1 deletion validphys2/src/validphys/covmats.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
True
"""
th = ThPredictionsResult.from_convolution(pdf, dataset)
pdf_cov = np.cov(th.rawdata, rowvar=True)
pdf_cov = np.cov(th.error_members, rowvar=True)
return pdf_cov + covmat_t0_considered


Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _chi2_distribution_plots(chi2_data, stats, pdf, plot_type):
ax.set_xlabel(r"Replica $\chi^2$")

if plot_type == "hist":
ax.hist(alldata.data, label=label, zorder=100)
ax.hist(alldata.error_members(), label=label, zorder=100)
elif plot_type == "kde":
# We need the squeeze here to change shape from (x, 1) to (x,)
ax = plotutils.kde_plot(alldata.data.squeeze(), label=label)
Expand Down Expand Up @@ -871,7 +871,7 @@ def plot_smpdf(pdf, dataset, obs_pdf_correlations, mark_threshold:float=0.9):

basis = obs_pdf_correlations.basis

fullgrid = obs_pdf_correlations.grid_values.data
fullgrid = obs_pdf_correlations.grid_values.error_members()

fls = obs_pdf_correlations.flavours
x = obs_pdf_correlations.xgrid
Expand Down
19 changes: 7 additions & 12 deletions validphys2/src/validphys/deltachi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,12 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
"""
positive_eigenvalue_mask = delta_chi2_hessian >= 0

# include replica 0 in both new grids
# The masks do not include replica 0, add it in both grids
pos_mask = np.append(True, positive_eigenvalue_mask)
neg_mask = np.append(True, ~positive_eigenvalue_mask)

pos_grid = xplotting_grid.grid_values.data[pos_mask]
neg_grid = xplotting_grid.grid_values.data[neg_mask]

pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid)
neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid)
pos_xplotting_grid = xplotting_grid.mask_replicas(pos_mask)
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
neg_xplotting_grid = xplotting_grid.mask_replicas(neg_mask)

return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]

Expand Down Expand Up @@ -210,12 +207,10 @@ def draw(self, pdf, grid, flstate):
labels = flstate.labels
handles = flstate.handles

# pick all replicas grid_values for the flavour flindex. stats_class is a method
# of the PDF class, which returns the stats calculator (object) for the pdf error type.
# Basically stats is an object which says what is the type class of the replicas:
# MCStats, HessianStats, SymmHessianStats. In this way validphys use the right methods
# to compute statistical values.
stats = pdf.stats_class(grid.grid_values.data[:, flindex, :])
# Create a copy of the `Stats` instance of the grid
# with only the flavours we are interested in
gv = grid.grid_values.data
stats = grid(grid_values=gv[:, flindex, :]).grid_values

# Ignore spurious normalization warnings
with warnings.catch_warnings():
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/paramfits/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ def plot_total_as_distribution_dataspecs(
dataspecs_parabolic_as_determination_for_total,
dataspecs_speclabel):
#Remember that *_for_total is a len 1 list, so take the first element.
kde_plot(dist[0].data, ax=ax, label=label)
kde_plot(dist[0].error_members(), ax=ax, label=label)
ax.set_xlabel(r"$\alpha_S$")
ax.legend()
return fig
Expand Down
31 changes: 24 additions & 7 deletions validphys2/src/validphys/pdfgrids.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,23 @@ class XPlottingGrid:
grid_values: Stats
scale: str

def select_flavour(self, flindex):
"""Return a new grid for one single flavour"""
if isinstance(flindex, str):
flstr = flindex
flindex = self.flavours.index(flindex)
else:
flstr = self.flavours[flindex]
new_grid = self.grid_values.data[:, flindex]
gv = self.grid_values.__class__(new_grid)
return dataclasses.replace(self, grid_values=gv, flavours=[flstr])

def mask_replicas(self, mask):
"""Return a copy of XPlottingGrid with the mask applied to the replicas"""
new_grid = self.grid_values.data[mask]
gv = self.grid_values.__class__(new_grid)
return dataclasses.replace(self, grid_values=gv)

def copy_grid(self, grid_values=None):
"""Create a copy of the grid with potentially a different set of values"""
if not isinstance(grid_values, Stats):
Expand Down Expand Up @@ -92,9 +109,9 @@ def xplotting_grid(pdf:PDF, Q:(float,int), xgrid=None, basis:(str, Basis)='flavo
raise TypeError(f"Invalid xgrid {xgrid!r}")
gv = basis.grid_values(pdf, flavours, xgrid, Q)
#Eliminante Q axis
gv = pdf.stats_class(gv.reshape(gv.shape[:-1]))
stats_gv = pdf.stats_class(gv.reshape(gv.shape[:-1]))

res = XPlottingGrid(Q, basis, flavours, xgrid, gv, scale)
res = XPlottingGrid(Q, basis, flavours, xgrid, stats_gv, scale)
return res

xplotting_grids = collect(xplotting_grid, ('pdfs',))
Expand Down Expand Up @@ -263,7 +280,7 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None

if pdf == pdfs[normalize_to]:
# Zero the PDF we are normalizing against
pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0]))
pdf_zero = pdf.stats_class(np.zeros_like(gr2_stats.data[0:1]))
newgrid = grid.copy_grid(grid_values=pdf_zero)
newgrids.append(newgrid)
continue
Expand All @@ -273,8 +290,8 @@ def distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(None))=None
sg1 = g_stats.std_error()
N1 = pdf.get_members()

# the distance definition
distance = pdf.stats_class(np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2)))
# Wrap the distance into a Stats (1, flavours, points)
distance = Stats([np.sqrt((cv1-cv2)**2/(sg1**2/N1+sg2**2/N2))])

newgrid = grid.copy_grid(grid_values=distance)
newgrids.append(newgrid)
Expand Down Expand Up @@ -316,8 +333,8 @@ def variance_distance_grids(pdfs, xplotting_grids, normalize_to:(int,str,type(No
N1 = pdf.get_members()
s1 = (mo1-(N1-3)/(N1-1)*sg1**4)/N1

# the distance definition
variance_distance = pdf.stats_class(np.sqrt((sg1**2-sg2**2)**2/(s1+s2)))
# Wrap the distance into a Stats (1, flavours, points)
variance_distance = Stats([np.sqrt((sg1**2-sg2**2)**2/(s1+s2))])

newgrid = grid.copy_grid(grid_values=variance_distance)
newgrids.append(newgrid)
Expand Down
17 changes: 10 additions & 7 deletions validphys2/src/validphys/pdfplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,11 @@ def draw(self, pdf, grid, flstate):
ax = flstate.ax
next_prop = next(ax._get_lines.prop_cycler)
color = next_prop['color']
gv = grid.grid_values.data[:,flstate.flindex,:]
flavour_grid = grid.select_flavour(flstate.flindex)
stats = flavour_grid.grid_values
gv = stats.data
ax.plot(grid.xgrid, gv.T, alpha=0.2, linewidth=0.5,
color=color, zorder=1)
stats = pdf.stats_class(gv)
ax.plot(grid.xgrid, stats.central_value(), color=color,
linewidth=2,
label=pdf.label)
Expand Down Expand Up @@ -223,8 +224,7 @@ def get_ylabel(self, parton_name):
def draw(self, pdf, grid, flstate):
ax = flstate.ax
flindex = flstate.flindex
gv = grid.grid_values.data[:,flindex,:]
stats = pdf.stats_class(gv)
stats = grid.select_flavour(flindex).grid_values

res = stats.std_error()

Expand Down Expand Up @@ -330,7 +330,10 @@ def draw(self, pdf, grid, flstate):
next_prop = next(pcycler)
color = next_prop['color']

gv = grid.grid_values.data[flindex,:]
# The grid for the distance is (1, flavours, points)
# take only the flavour we are interested in
gv = grid.select_flavour(flindex).grid_values.data.squeeze()

ax.plot(grid.xgrid, gv, color=color, label='$%s$' % flstate.parton_name)

return gv
Expand Down Expand Up @@ -406,11 +409,11 @@ def setup_flavour(self, flstate):

def draw(self, pdf, grid, flstate):
ax = flstate.ax
flindex = flstate.flindex
hatchit = flstate.hatchit
labels = flstate.labels
handles = flstate.handles
stats = pdf.stats_class(grid.grid_values.data[:,flindex,:])
# Take only the flavours we are interested in
stats = grid.select_flavour(flstate.flindex).grid_values
pcycler = ax._get_lines.prop_cycler
#This is ugly but can't think of anything better

Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/replica_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def not_outlier_mask(
"""Return a boolean mask with the replicas that are never outside of the given
percentile in the constrained region"""
lim = unconstrained_region_index
gv = xplotting_grid.grid_values.data[:, :, lim:]
gv = xplotting_grid.grid_values.error_members()[:, :, lim:]
delta = nsigma * np.std(gv, axis=0)
mean = np.mean(gv, axis=0)
return ((gv >= mean - delta) & (gv <= mean+delta)).all(axis=2).all(axis=1)
Expand Down Expand Up @@ -309,7 +309,7 @@ def draw(self, pdf, grid, flstate):
#PDFs, and instead would do something different
next_prop = next(ax._get_lines.prop_cycler)
color = next_prop['color']
gv = grid.grid_values.data[filt, flstate.flindex, :]
gv = grid.grid_values.error_members()[filt, flstate.flindex, :]
ax.plot(
grid.xgrid,
gv.T,
Expand Down
Loading