Skip to content

🎉 Online saving with appropriate arborescence #18

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion src/plaid/containers/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,8 +731,44 @@ def load(self, fname: Union[str,Path], verbose: bool = False,
shutil.rmtree(inputdir)

# -------------------------------------------------------------------------#
def add_to_dir(self, sample:Sample, savedir:str=None, verbose:bool=False) -> None:
"""

Notes:
--- if savedir is None, will look for self.savedir which will be retrieved from last previous call to load or save
"""
if self.savedir is None:
if savedir is None:
raise ValueError(f'self.savedir and savedir are None, we don’t know where to save, specify one of them before')
else:
self.savedir = savedir

# --- sample is not only saved to dir, but also added to the dataset
# self.add_sample(sample)
# --- if dataset already contains other Samples, they will all be saved to savedir
# self._save_to_dir_(self.savedir)

if not (os.path.isdir(self.savedir)):
os.makedirs(self.savedir)

if verbose: # pragma: no cover
print(f"Saving database to: {self.savedir}")

samples_dir = os.path.join(self.savedir, 'samples')
if not (os.path.isdir(samples_dir)):
os.makedirs(samples_dir)

# find i_sample
# if there are already samples in the instance, we should not take an already existing id
# if there are already samples in the path, we should not take an already existing id
sample_ids_in_path = [int(d.split('_')[-1]) for d in glob.glob(os.path.join(samples_dir, 'sample_*')) if os.path.isdir(d)]
i_sample = max(len(self), max(sample_ids_in_path)+1)

sample_fname = os.path.join(samples_dir, f'sample_{i_sample:09d}')
sample.save(sample_fname)

def _save_to_dir_(self, savedir: Union[str,Path], verbose: bool = False) -> None:
"""Saves the dataset into a created sample directory and creates an 'infos.yaml' file to store additional information about the dataset.
"""Saves the dataset into a sub-directory `samples` and creates an 'infos.yaml' file to store additional information about the dataset.

Args:
savedir (Union[str,Path]): The path in which to save the files.
Expand All @@ -742,6 +778,8 @@ def _save_to_dir_(self, savedir: Union[str,Path], verbose: bool = False) -> None
if not (savedir.is_dir()):
savedir.mkdir(parents=True)

self.savedir = savedir

if verbose: # pragma: no cover
print(f"Saving database to: {savedir}")

Expand Down Expand Up @@ -795,6 +833,8 @@ def _load_from_dir_(self, savedir: Union[str,Path], ids: list[int] = None,
if processes_number < -1:
raise ValueError("Number of processes cannot be < -1")

self.savedir = savedir

if verbose: # pragma: no cover
print(f"Reading database located at: {savedir}")

Expand Down
Loading