Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utilize stats everywhere where it can be used #1515

Merged
merged 7 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions n3fit/src/n3fit/tests/test_vpinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_N3PDF(members, layers):
assert n3pdf.compute_arclength().shape == (5,)
# Try to get a plotting grid
res = xplotting_grid(n3pdf, 1.6, xx)
assert res.grid_values.shape == (members, 8, xsize)
assert res.grid_values.data.shape == (members, 8, xsize)


def test_vpinterface():
Expand All @@ -68,7 +68,7 @@ def test_vpinterface():
res_2 = xplotting_grid(fit_2, 1.6, xgrid)
distances = distance_grids([fit_1, fit_2], [res_1, res_2], 0)
assert len(distances) == 2
assert distances[0].grid_values.shape == (8, 40)
assert distances[1].grid_values.shape == (8, 40)
np.testing.assert_allclose(distances[0].grid_values, 0.0)
assert not np.allclose(distances[1].grid_values, 0.0)
assert distances[0].grid_values.data.shape == (8, 40)
assert distances[1].grid_values.data.shape == (8, 40)
np.testing.assert_allclose(distances[0].grid_values.data, 0.0)
assert not np.allclose(distances[1].grid_values.data, 0.0)
2 changes: 1 addition & 1 deletion n3fit/src/n3fit/vpinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
>>> pdf_model = pdfNN_layer_generator(nodes=[8], activations=['linear'], seed=0, flav_info=fake_fl)
>>> n3pdf = N3PDF(pdf_model)
>>> res = xplotting_grid(n3pdf, 1.6, fake_x)
>>> res.grid_values.shape
>>> res.grid_values.error_members().shape
(1, 8, 3)


Expand Down
4 changes: 2 additions & 2 deletions validphys2/src/validphys/arclength.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def arc_lengths(
eps = (b - a) / npoints
ixgrid = xgrid(a, b, "linear", npoints)
# PDFs evaluated on grid
xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values * ixgrid[1]
xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values.data * ixgrid[1]
fdiff = np.diff(xfgrid) / eps # Compute forward differences
res += integrate.simps(1 + np.square(fdiff), ixgrid[1][1:])
stats = pdf.stats_class(res)
Expand Down Expand Up @@ -127,6 +127,6 @@ def integrability_number(
checked = check_basis(basis, flavours)
basis, flavours = checked["basis"], checked["flavours"]
ixgrid = xgrid(1e-9, 1e-6, "log", 3)
xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values
xfgrid = xplotting_grid(pdf, Q, ixgrid, basis, flavours).grid_values.data
res = np.sum(np.abs(xfgrid), axis=2)
return res.squeeze()
4 changes: 2 additions & 2 deletions validphys2/src/validphys/calcutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def all_chi2(results):
"""Return the chi² for all elements in the result. Note that the
interpretation of the result will depend on the PDF error type"""
data_result, th_result = results
diffs = th_result._rawdata - data_result.central_value[:,np.newaxis]
diffs = th_result.rawdata - data_result.central_value[:,np.newaxis]
return calc_chi2(sqrtcov=data_result.sqrtcovmat, diffs=diffs)

def central_chi2(results):
Expand All @@ -85,7 +85,7 @@ def all_chi2_theory(results, totcov):
"""Like all_chi2 but here the chi² are calculated using a covariance matrix
that is the sum of the experimental covmat and the theory covmat."""
data_result, th_result = results
diffs = th_result._rawdata - data_result.central_value[:,np.newaxis]
diffs = th_result.rawdata - data_result.central_value[:,np.newaxis]
total_covmat = np.array(totcov)
return calc_chi2(sqrtcov=la.cholesky(total_covmat, lower=True), diffs=diffs)

Expand Down
6 changes: 3 additions & 3 deletions validphys2/src/validphys/closuretest/closure_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def bootstrap_bias_experiment(
"""
dt_ct, th_ct = dataset_inputs_results
((_, th_ul),) = underlying_dataset_inputs_results
th_ct_boot_cv = bootstrap_values(th_ct._rawdata, bootstrap_samples)
th_ct_boot_cv = bootstrap_values(th_ct.rawdata, bootstrap_samples)
boot_diffs = th_ct_boot_cv - th_ul.central_value[:, np.newaxis]
boot_bias = calc_chi2(dt_ct.sqrtcovmat, boot_diffs) / len(dt_ct)
return boot_bias
Expand Down Expand Up @@ -191,7 +191,7 @@ def variance_dataset(results, fit, use_fitcommondata):

"""
dt, th = results
diff = th.central_value[:, np.newaxis] - th._rawdata
diff = th.central_value[:, np.newaxis] - th.rawdata
var_unnorm = calc_chi2(dt.sqrtcovmat, diff).mean()
return VarianceData(var_unnorm, len(th))

Expand All @@ -210,7 +210,7 @@ def bootstrap_variance_experiment(dataset_inputs_results, bootstrap_samples=500)
normalised to the number of data in the experiment.
"""
dt_ct, th_ct = dataset_inputs_results
diff = th_ct.central_value[:, np.newaxis] - th_ct._rawdata
diff = th_ct.central_value[:, np.newaxis] - th_ct.rawdata
var_unnorm_boot = bootstrap_values(
diff,
bootstrap_samples,
Expand Down
10 changes: 5 additions & 5 deletions validphys2/src/validphys/closuretest/multiclosure.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def fits_dataset_bias_variance(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down Expand Up @@ -226,7 +226,7 @@ def dataset_xi(internal_multiclosure_dataset_loader):

"""
closures_th, law_th, covmat, _ = internal_multiclosure_dataset_loader
replicas = np.asarray([th._rawdata for th in closures_th])
replicas = np.asarray([th.rawdata for th in closures_th])
centrals = np.mean(replicas, axis=-1)
underlying = law_th.central_value

Expand Down Expand Up @@ -297,7 +297,7 @@ class BootstrappedTheoryResult:
"""

def __init__(self, data):
self._rawdata = data
self.rawdata = data
self.central_value = data.mean(axis=1)


Expand Down Expand Up @@ -341,7 +341,7 @@ def _bootstrap_multiclosure_fits(
# construct proxy fits theory predictions
for fit_th in fit_boot_th:
rep_boot_index = rng.choice(n_rep_max, size=n_rep, replace=use_repeats)
boot_ths.append(BootstrappedTheoryResult(fit_th._rawdata[:, rep_boot_index]))
boot_ths.append(BootstrappedTheoryResult(fit_th.rawdata[:, rep_boot_index]))
return (boot_ths, *input_tuple)


Expand Down Expand Up @@ -741,7 +741,7 @@ def dataset_fits_bias_replicas_variance_samples(
"""
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th._rawdata[:, :_internal_max_reps] for th in closures_th])
reps = np.asarray([th.rawdata[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)
# place bins on first axis
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/closuretest/multiclosure_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def xi_grid_values(xi_pdfgrids):
glu_sin_grid, nonsin_grid = xi_pdfgrids
# grid values have shape: replica, flavour, x
# concatenate along flavour
return np.concatenate((glu_sin_grid.grid_values, nonsin_grid.grid_values), axis=1)
return np.concatenate((glu_sin_grid.grid_values.data, nonsin_grid.grid_values.data), axis=1)


def underlying_xi_grid_values(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def experiments_closure_pseudodata_estimators_table(
fits_erep_delta_chi2 = []
for i_fit, fit_th in enumerate(closures_th):
# some of these could be done outside of loop, but easier to do here.
th_replicas = fit_th._rawdata
th_replicas = fit_th.rawdata
th_central = np.mean(th_replicas, axis=-1)
dt_replicas = fits_reps_pseudo[i_fit].xs(exp_name, axis=0, level=0).to_numpy()
dt_central = fits_cv.xs(exp_name, axis=0, level=0).iloc[:, i_fit].to_numpy()
Expand Down
36 changes: 17 additions & 19 deletions validphys2/src/validphys/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,21 @@

#This would be a good candidate to be optimized to calculate everything in one
#pass over x,
def _basic_obs_pdf_correlation(pdfarr, obsarr):
def _basic_obs_pdf_correlation(pdf_stats, obs_stats):
"""Calculate the correlation between pdfs and observables.
The expected format is:
The expected format is are Stats instances of:

obsarr: (nbins x nreplicas), as returned from thresult.
pdfarr: (nreplicas x nf x nx), as returned from xplotting_grid.grid_values
obs_stats: (nbins x nreplicas), as returned from thresult.
pdf_stats: (nreplicas x nf x nx), as returned from xplotting_grid.grid_values

The returned array contains the PDF correlation between
the value of the obsevable and the PDF at the corresponding point in (fl,x)
space. The dimensions are:
(nbins x nf x nx), compatible with grid_values, upon
changing replicas->bins.
"""

#Remove mean
#TODO: This should be done at the Result level
x = pdfarr - np.mean(pdfarr, axis=0)
y = obsarr.T - np.mean(obsarr, axis=1)
x = pdf_stats.data - pdf_stats.central_value()
y = obs_stats.data - obs_stats.central_value()

#We want to compute:
#sum(x*y)/(norm(x)*norm(y))
Expand All @@ -46,28 +43,29 @@ def _basic_obs_pdf_correlation(pdfarr, obsarr):

return num/den

def _basic_obs_obs_correlation(obsarr1, obsarr2):
def _basic_obs_obs_correlation(obs1, obs2):
"""Calculate the correlation between two observables. The expected format is
obsarr1: (nbins1, nreplicas)
obsarr2: (nbins2, nreplicas)
Stats instances of:

obs1: (nreplicas, nbins1)
obs2: (nreplicas, nbins2)

The result is (nbins1 , nbins2), a mareix containing the correlation
coefficients between the two sets.
"""
#TODO: Do this at Result level taking into account error type
x = (obsarr1.T - np.mean(obsarr1, axis=1)).T
y = (obsarr2.T - np.mean(obsarr2, axis=1))
x = (obs1.data - obs1.central_value()).T
y = (obs2.data - obs2.central_value())
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved

return x@y/np.outer(la.norm(x,axis=1),la.norm(y,axis=0))

#TODO: Implement for other error types. Do not use the _rawdata.
#TODO: Implement for other error types. Do not use the rawdata.
@check_pdf_is_montecarlo
def obs_pdf_correlations(pdf, results, xplotting_grid):
"""Return the correlations between each point in a dataset and the PDF
values on a grid of (x,f) points in a format similar to `xplotting_grid`."""
_ , th = results
corrs = _basic_obs_pdf_correlation(xplotting_grid.grid_values, th._rawdata)
return xplotting_grid._replace(grid_values=corrs)
corrs = pdf.stats_class(_basic_obs_pdf_correlation(xplotting_grid.grid_values, th.stats))
return xplotting_grid.copy_grid(grid_values=corrs)


corrpair_results = collect('results', ['corrpair'])
Expand All @@ -77,4 +75,4 @@ def obs_pdf_correlations(pdf, results, xplotting_grid):
def obs_obs_correlations(pdf, corrpair_results):
"""Return the theoretical correlation matrix between a pair of observables."""
(_,th1), (_,th2) = corrpair_results
return _basic_obs_obs_correlation(th1._rawdata, th2._rawdata)
return _basic_obs_obs_correlation(th1.stats, th2.stats)
2 changes: 1 addition & 1 deletion validphys2/src/validphys/covmats.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def pdferr_plus_covmat(dataset, pdf, covmat_t0_considered):
True
"""
th = ThPredictionsResult.from_convolution(pdf, dataset)
pdf_cov = np.cov(th._rawdata, rowvar=True)
pdf_cov = np.cov(th.rawdata, rowvar=True)
return pdf_cov + covmat_t0_considered


Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ def plot_smpdf(pdf, dataset, obs_pdf_correlations, mark_threshold:float=0.9):

basis = obs_pdf_correlations.basis

fullgrid = obs_pdf_correlations.grid_values
fullgrid = obs_pdf_correlations.grid_values.data

fls = obs_pdf_correlations.flavours
x = obs_pdf_correlations.xgrid
Expand Down
15 changes: 5 additions & 10 deletions validphys2/src/validphys/deltachi2.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,11 +134,6 @@ def plot_delta_chi2_hessian_distribution(delta_chi2_hessian, pdf, total_chi2_dat
return fig


XPlottingGrid = namedtuple(
"XPlottingGrid", ("Q", "basis", "flavours", "xgrid", "grid_values", "scale")
)


def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
"""
Generates xplotting_grids correspodning to positive and negative delta chi2s.
Expand All @@ -149,11 +144,11 @@ def pos_neg_xplotting_grids(delta_chi2_hessian, xplotting_grid):
pos_mask = np.append(True, positive_eigenvalue_mask)
neg_mask = np.append(True, ~positive_eigenvalue_mask)

pos_grid = xplotting_grid.grid_values[pos_mask]
neg_grid = xplotting_grid.grid_values[neg_mask]
pos_grid = xplotting_grid.grid_values.data[pos_mask]
neg_grid = xplotting_grid.grid_values.data[neg_mask]

pos_xplotting_grid = xplotting_grid._replace(grid_values=pos_grid)
neg_xplotting_grid = xplotting_grid._replace(grid_values=neg_grid)
pos_xplotting_grid = xplotting_grid.copy_grid(grid_values=pos_grid)
neg_xplotting_grid = xplotting_grid.copy_grid(grid_values=neg_grid)

return [xplotting_grid, pos_xplotting_grid, neg_xplotting_grid]

Expand Down Expand Up @@ -220,7 +215,7 @@ def draw(self, pdf, grid, flstate):
# Basically stats is an object which says what is the type class of the replicas:
# MCStats, HessianStats, SymmHessianStats. In this way validphys use the right methods
# to compute statistical values.
stats = pdf.stats_class(grid.grid_values[:, flindex, :])
stats = pdf.stats_class(grid.grid_values.data[:, flindex, :])

# Ignore spurious normalization warnings
with warnings.catch_warnings():
Expand Down
19 changes: 8 additions & 11 deletions validphys2/src/validphys/eff_exponents.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,14 @@ def alpha_eff(pdf: PDF, *,

pdfGrid = pdfgrids.xplotting_grid(
pdf, Q, xgrid=xGrid, basis=basis, flavours=flavours)
pdfGrid_values = pdfGrid.grid_values
pdfGrid_values = pdfGrid.grid_values.data
# NOTE: without this I get "setting an array element with a sequence"
xGrid = pdfGrid.xgrid
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
alphaGrid_values = -np.log(abs(pdfGrid_values/xGrid))/np.log(xGrid)
alphaGrid_values[alphaGrid_values == - np.inf] = np.nan # when PDF_i =0
alphaGrid = pdfGrid._replace(grid_values=alphaGrid_values)
alphaGrid = pdfGrid.copy_grid(grid_values=pdf.stats_class(alphaGrid_values))
return alphaGrid

@check_positive('Q')
Expand Down Expand Up @@ -110,14 +110,14 @@ def beta_eff(pdf, *,

pdfGrid = pdfgrids.xplotting_grid(
pdf, Q, xgrid=xGrid, basis=basis, flavours=flavours)
pdfGrid_values = pdfGrid.grid_values
pdfGrid_values = pdfGrid.grid_values.data
# NOTE: without this I get "setting an array element with a sequence"
xGrid = pdfGrid.xgrid
with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)
betaGrid_values = np.log(abs(pdfGrid_values/xGrid))/np.log(1-xGrid)
betaGrid_values[betaGrid_values == -np.inf] = np.nan # when PDF_i =0
betaGrid = pdfGrid._replace(grid_values=betaGrid_values)
betaGrid = pdfGrid.copy_grid(grid_values=pdf.stats_class(betaGrid_values))

return betaGrid # .grid_values

Expand Down Expand Up @@ -348,17 +348,14 @@ def next_effective_exponents_table(

eff_exp_data = []

alphagrid = alpha_effs.grid_values
betagrid = beta_effs.grid_values

alphastats = pdf.stats_class(alphagrid)
betastats = pdf.stats_class(betagrid)
alphastats = alpha_effs.grid_values
betastats = beta_effs.grid_values

with warnings.catch_warnings():
warnings.simplefilter('ignore', RuntimeWarning)

alpha_cv = np.nanmean(alphagrid, axis=0)
beta_cv = np.nanmean(betagrid, axis=0)
alpha_cv = np.nanmean(alphastats.error_members(), axis=0)
beta_cv = np.nanmean(betastats.error_members(), axis=0)
#tuple of low and high values repectively
alpha68 = alphastats.errorbar68()
beta68 = betastats.errorbar68()
Expand Down
7 changes: 5 additions & 2 deletions validphys2/src/validphys/mc2hessian.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from validphys.lhio import hessian_from_lincomb
from validphys.pdfgrids import xplotting_grid

from validphys.checks import check_pdf_is_montecarlo

log = logging.getLogger(__name__)


Expand Down Expand Up @@ -58,6 +60,7 @@ def mc2hessian_xgrid(
)


@check_pdf_is_montecarlo
def mc2hessian(
pdf, Q, Neig: int, mc2hessian_xgrid, output_path, gridname, installgrid: bool = False
):
Expand Down Expand Up @@ -109,8 +112,8 @@ def _create_mc2hessian(pdf, Q, xgrid, Neig, output_path, name=None):
def _get_X(pdf, Q, xgrid, reshape=False):
pdf_grid = xplotting_grid(pdf, Q, xgrid=xgrid)
pdf_grid_values = pdf_grid.grid_values
replicas = pdf_grid_values
mean = pdf_grid_values.mean(axis=0)
replicas = pdf_grid_values.error_members()
mean = pdf_grid_values.central_value()
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
Xt = replicas - mean
if reshape:
Xt = Xt.reshape(Xt.shape[0], Xt.shape[1] * Xt.shape[2])
Expand Down
Loading