Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Let the user know there could've been some problems with the indexing #1083

Merged
merged 10 commits into from
Mar 5, 2021
177 changes: 84 additions & 93 deletions validphys2/src/validphys/uploadutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

Tools to upload resources to remote servers.
"""
import time
import subprocess
import logging
import os
Expand Down Expand Up @@ -172,65 +173,48 @@ def upload_context(self, output_and_file):
class ReportFileUploader(FileUploader, ReportUploader):
pass

class FitUploader(FileUploader):
"""An uploader for fits. Fits will be automatically compressed
before uploading."""
target_dir = _profile_key('fits_target_dir')
root_url = _profile_key('fits_root_url')

class ArchiveUploader(FileUploader):
""" Uploader for objects comprising many files such as fits or PDFs """
target_dir = None
root_url = None
_loader_name = None # vp loader for this kind of archive
_resource_type = "Archive" # name used during logging

def get_relative_path(self, output_path=None):
return ''

def check_fit_exists(self, fit_name):
"""Check whether the fit already exists on the server."""
# Get list of the available fits on the server
def _check_existence(self, resource_name):
""" Check whether the given resource exists on the server
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
Returns true if the resource exists with the same name on the server
or false otherwise.
Note that the type of resource being checked is defined by the ``_loader_name`` attribute
"""
l = RemoteLoader()
fits = l.downloadable_fits
resource_list = getattr(l, self._loader_name)

if fit_name in fits:
log.error("A fit with the same name already exists on "
"the server. To overwrite this fit use the "
"--force flag, as in `vp-upload <fitname> "
"--force`.")
raise UploadError

def check_fit_md5(self, output_path):
"""When ``vp-setupfit`` is successfully ran, it creates an ``md5`` from
the config. We check that the ``md5`` matches the ``filter.yml`` which
is checking that ``vp-setupfit`` was ran and that the ``filter.yml``
inside the fit folder wasn't modified.
if resource_name in resource_list:
return True
return False
Zaharid marked this conversation as resolved.
Show resolved Hide resolved

def check_is_indexed(self, resource_name):
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
""" Check whether the fit is correctly indexed in the server
"""
md5_path = output_path / "md5"
try:
with open(md5_path, "r") as f:
saved_md5 = f.read()
except FileNotFoundError as e:
log.error(
"It doesn't appear that `vp-setupfit` was ran because no `md5` "
"was found, `vp-setupfit` should be ran before uploading a fit."
)
raise UploadError(f"Fit MD5 file not found at {md5_path}") from e

with open(output_path / "filter.yml", "rb") as f:
hashed_config = hashlib.md5(f.read()).hexdigest()

if hashed_config != saved_md5:
log.error(
"Saved md5 doesn't match saved fit configuration runcard, which "
"suggests that the configuration file was modified after it was "
"saved. <fit folder>/filter.yml shouldn't be modified directly. "
"Instead modify the fit runcard and re-run ``vp-setupfit``."
)
raise UploadError

def compress(self, output_path):
log.info("Checking whether %s was correctly uploaded...", resource_name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why you don't use an f-string here and in one other place below? Personally I tend to find them a bit easier to read

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because has bullied me for years.
In reality I guess if you are ever bottle-necked by the writing of the logging then you have bigger problems than that... tbh I don't mind either way.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you should really use this format in logging right? pylint complains if you don't, something about lazy formatting so I guess this gets resolved later than the f-string?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the case. But tbh I don't think "compiling" a string will ever be a bottleneck of any kind...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure sure, I was just saying I think we should follow the "rules"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the more valid concern is that logging has the very theoretical capability of doing things with the args other than printing them. But it is also unlikely to matter.

time.sleep(3)
if self._check_existence(resource_name):
log.info("It has been correctly indexed by the server!")
else:
log.error("The object is uploaded but hasn't been indexed yet by the server "
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
"you should upload it again to ensure it is indexed: vp-upload %s", resource_name)
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved

def _compress(self, output_path):
"""Compress the folder and put in in a directory inside its parent."""
#make_archive fails if we give it relative paths for some reason
output_path = output_path.resolve()
tempdir = tempfile.mkdtemp(prefix='fit_upload_deleteme_',
tempdir = tempfile.mkdtemp(prefix=f'{self._resource_type}_upload_deleteme_',
dir=output_path.parent)
log.info(f"Compressing fit to {tempdir}")
log.info(f"Compressing {self._resource_type} to {tempdir}")
archive_path_without_extension = pathlib.Path(tempdir)/(output_path.name)
try:
with Spinner():
Expand All @@ -242,19 +226,24 @@ def compress(self, output_path):
raise UploadError(e) from e
return tempdir, archive_path_without_extension


def upload_output(self, output_path, force):
output_path = pathlib.Path(output_path)
fit_name = output_path.name

if not force:
self.check_fit_exists(fit_name)

self.check_fit_md5(output_path)

new_out, name = self.compress(output_path)
if self._check_existence(fit_name):
log.error("A %s with the same name already exists on "
"the server. To overwrite it use the "
"--force flag, as in `vp-upload <%s_name> --force.",
self._resource_type, self._resource_type)
raise UploadError

new_out, name = self._compress(output_path)
super().upload_output(new_out)

# Check whether the fit was really uploaded
self.check_is_indexed(fit_name)

shutil.rmtree(new_out)
return name.with_suffix('.tar.gz').name

Expand All @@ -274,56 +263,58 @@ def upload_or_exit_context(self, output, force):
log.error(e)
sys.exit()

class PDFUploader(FitUploader):
"""An uploader for PDFs. PDFs will be automatically compressed
before uploading."""
target_dir = _profile_key('pdfs_target_dir')
root_url = _profile_key('pdfs_root_url')

def check_pdf_exists(self, pdf_name):
"""Check whether the pdf already exists on the server."""
# Get list of the available fits on the server
l = RemoteLoader()
pdfs = l.downloadable_pdfs
class FitUploader(ArchiveUploader):
"""An uploader for fits. Fits will be automatically compressed
before uploading."""
target_dir = _profile_key('fits_target_dir')
root_url = _profile_key('fits_root_url')
_loader_name = "downloadable_fits"
_resource_type = "fit"

if pdf_name in pdfs:
log.error("A PDF with the same name already exists on "
"the server. To overwrite this PDF use the "
"--force flag, as in `vp-upload <pdfname> "
"--force`.")
raise UploadError
def check_fit_md5(self, output_path):
"""When ``vp-setupfit`` is successfully ran, it creates an ``md5`` from
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
the config. We check that the ``md5`` matches the ``filter.yml`` which
is checking that ``vp-setupfit`` was ran and that the ``filter.yml``
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
inside the fit folder wasn't modified.

def compress(self, output_path):
"""Compress the folder and put it in a directory inside its parent."""
# make_archive fails if we give it relative paths for some reason
output_path = output_path.resolve()
tempdir = tempfile.mkdtemp(prefix='pdf_upload_deleteme_',
dir=output_path.parent)
log.info(f"Compressing pdf to {tempdir}")
archive_path_without_extension = pathlib.Path(tempdir)/(output_path.name)
"""
md5_path = output_path / "md5"
try:
with Spinner():
shutil.make_archive(base_name=archive_path_without_extension,
format='gztar',
root_dir=output_path.parent, base_dir=output_path.name)
except Exception as e:
log.error(f"Couldn't compress archive: {e}")
raise UploadError(e) from e
return tempdir, archive_path_without_extension
with open(md5_path, "r") as f:
saved_md5 = f.read()
except FileNotFoundError as e:
log.error(
"It doesn't appear that `vp-setupfit` was ran because no `md5` "
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
"was found, `vp-setupfit` should be ran before uploading a fit."
scarlehoff marked this conversation as resolved.
Show resolved Hide resolved
)
raise UploadError(f"Fit MD5 file not found at {md5_path}") from e

with open(output_path / "filter.yml", "rb") as f:
hashed_config = hashlib.md5(f.read()).hexdigest()

if hashed_config != saved_md5:
log.error(
"Saved md5 doesn't match saved fit configuration runcard, which "
"suggests that the configuration file was modified after it was "
"saved. <fit folder>/filter.yml shouldn't be modified directly. "
"Instead modify the fit runcard and re-run ``vp-setupfit``."
)
raise UploadError

def upload_output(self, output_path, force):
output_path = pathlib.Path(output_path)
pdf_name = output_path.name

if not force:
self.check_pdf_exists(pdf_name)
self.check_fit_md5(output_path)
return super().upload_output(output_path, force)

new_out, name = self.compress(output_path)
super(FileUploader, self).upload_output(new_out)

shutil.rmtree(new_out)
return name.with_suffix('.tar.gz').name
class PDFUploader(ArchiveUploader):
"""An uploader for PDFs. PDFs will be automatically compressed
before uploading."""
target_dir = _profile_key('pdfs_target_dir')
root_url = _profile_key('pdfs_root_url')
_loader_name = "downloadable_pdfs"
_resource_type = "pdf"


def check_for_meta(path):
Expand Down