Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add package and source package batching for publish #1146

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/1147.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improved performance when creating publications, by creating PublishedArtifacts in bulk, greatly reducing the number of database calls.
21 changes: 7 additions & 14 deletions pulp_deb/app/serializers/content_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,22 +482,15 @@ def to822(self, component=""):
try:
artifact = self.instance._artifacts.get()
artifact.touch() # Orphan cleanup protection until we are done!
if artifact.md5:
ret["MD5sum"] = artifact.md5
if artifact.sha1:
ret["SHA1"] = artifact.sha1
ret["SHA256"] = artifact.sha256
ret["Size"] = str(artifact.size)
except Artifact.DoesNotExist:
artifact = RemoteArtifact.objects.filter(sha256=self.instance.sha256).first()
if artifact.md5:
ret["MD5sum"] = artifact.md5
if artifact.sha1:
ret["SHA1"] = artifact.sha1
ret["SHA256"] = artifact.sha256
ret["Size"] = str(artifact.size)

quba42 marked this conversation as resolved.
Show resolved Hide resolved
ret["Filename"] = self.instance.filename(component)

if artifact:
ret.update({"MD5sum": artifact.md5} if artifact.md5 else {})
ret.update({"SHA1": artifact.sha1} if artifact.sha1 else {})
ret.update({"SHA256": artifact.sha256})
ret.update({"Size": str(artifact.size)})
ret.update({"Filename": self.instance.filename(component)})

return ret

Expand Down
126 changes: 79 additions & 47 deletions pulp_deb/app/tasks/publishing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from django.conf import settings
from django.core.files import File
from django.db import transaction
from django.db.utils import IntegrityError
from django.forms.models import model_to_dict

Expand Down Expand Up @@ -147,14 +148,12 @@ def publish(
packages = Package.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created")
)
for package in packages:
release_helper.components[component].add_package(package)
release_helper.components[component].add_packages(packages)

source_packages = SourcePackage.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created"),
)
for source_package in source_packages:
release_helper.components[component].add_source_package(source_package)
release_helper.components[component].add_source_packages(source_packages)

release_helper.finish()

Expand Down Expand Up @@ -242,21 +241,28 @@ def publish(
pk__in=repo_version.content.order_by("-pulp_created"),
release_component__in=release_components_filtered,
).select_related("release_component", "package")
for prc in package_release_components:
release_helper.components[prc.release_component.component].add_package(
prc.package
)

source_package_release_components = (
SourcePackageReleaseComponent.objects.filter(
pk__in=repo_version.content.order_by("-pulp_created"),
release_component__in=release_components_filtered,
).select_related("release_component", "source_package")
)
for drc in source_package_release_components:
release_helper.components[
drc.release_component.component
].add_source_package(drc.source_package)

for component in components:
packages = [
prc.package
for prc in package_release_components
if prc.release_component.component == component
]
release_helper.components[component].add_packages(packages)

source_packages = [
drc.source_package
for drc in source_package_release_components
if drc.release_component.component == component
]
release_helper.components[component].add_source_packages(source_packages)

release_helper.save_unsigned_metadata()
release_helpers.append(release_helper)
Expand Down Expand Up @@ -307,47 +313,73 @@ def __init__(self, parent, component):
source_index_path,
)

def add_package(self, package):
with suppress(IntegrityError):
published_artifact = PublishedArtifact(
relative_path=package.filename(self.component),
publication=self.parent.publication,
content_artifact=package.contentartifact_set.get(),
)
published_artifact.save()
package_serializer = Package822Serializer(package, context={"request": None})

try:
package_serializer.to822(self.component).dump(
self.package_index_files[package.architecture][0]
)
except KeyError:
log.warn(
f"Published package '{package.relative_path}' with architecture "
f"'{package.architecture}' was not added to component '{self.component}' in "
f"distribution '{self.parent.distribution}' because it lacks this architecture!"
)
else:
self.package_index_files[package.architecture][0].write(b"\n")
def add_packages(self, packages):
published_artifacts = []
package_data = []

# Publish DSC file and setup to create Sources Indices file
def add_source_package(self, source_package):
artifact_set = source_package.contentartifact_set.all()
for content_artifact in artifact_set:
for package in packages:
with suppress(IntegrityError):
content_artifact = package.contentartifact_set.get()
relative_path = package.filename(self.component)
quba42 marked this conversation as resolved.
Show resolved Hide resolved

published_artifact = PublishedArtifact(
relative_path=source_package.derived_path(
os.path.basename(content_artifact.relative_path), self.component
),
relative_path=relative_path,
publication=self.parent.publication,
content_artifact=content_artifact,
)
published_artifact.save()
dsc_file_822_serializer = DscFile822Serializer(source_package, context={"request": None})
dsc_file_822_serializer.to822(self.component, paragraph=True).dump(
self.source_index_file_info[0]
)
self.source_index_file_info[0].write(b"\n")
published_artifacts.append(published_artifact)
package_data.append((package, package.architecture))

with transaction.atomic():
if published_artifacts:
PublishedArtifact.objects.bulk_create(published_artifacts, ignore_conflicts=True)
quba42 marked this conversation as resolved.
Show resolved Hide resolved

for package, architecture in package_data:
package_serializer = Package822Serializer(package, context={"request": None})
try:
package_serializer.to822(self.component).dump(
self.package_index_files[architecture][0]
)
except KeyError:
log.warn(
f"Published package '{package.relative_path}' with architecture "
f"'{architecture}' was not added to component '{self.component}' in "
f"distribution '{self.parent.distribution}' because it lacks this architecture!"
)
else:
self.package_index_files[architecture][0].write(b"\n")

# Publish DSC file and setup to create Sources Indices file
def add_source_packages(self, source_packages):
published_artifacts = []
source_package_data = []

for source_package in source_packages:
with suppress(IntegrityError):
artifact_set = source_package.contentartifact_set.all()
for content_artifact in artifact_set:
published_artifact = PublishedArtifact(
relative_path=source_package.derived_path(
os.path.basename(content_artifact.relative_path), self.component
),
publication=self.parent.publication,
content_artifact=content_artifact,
)
published_artifacts.append(published_artifact)
source_package_data.append(source_package)

with transaction.atomic():
if published_artifacts:
PublishedArtifact.objects.bulk_create(published_artifacts, ignore_conflicts=True)

for source_package in source_package_data:
dsc_file_822_serializer = DscFile822Serializer(
source_package, context={"request": None}
)
dsc_file_822_serializer.to822(self.component, paragraph=True).dump(
self.source_index_file_info[0]
)
self.source_index_file_info[0].write(b"\n")

def finish(self):
# Publish Packages files
Expand Down