Skip to content

Commit 0243390

Browse files
authored
canvas - separate courses per session (#2347)
1 parent 213b49c commit 0243390

File tree

4 files changed

+102
-13
lines changed

4 files changed

+102
-13
lines changed

learning_resources/etl/canvas.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from tempfile import TemporaryDirectory
66

77
from defusedxml import ElementTree
8+
from django.conf import settings
89

910
from learning_resources.constants import LearningResourceType, PlatformType
1011
from learning_resources.etl.constants import ETLSource
@@ -24,10 +25,14 @@ def sync_canvas_archive(bucket, key: str, overwrite):
2425
"""
2526
from learning_resources.etl.loaders import load_content_files
2627

28+
course_folder = key.lstrip(settings.CANVAS_COURSE_BUCKET_PREFIX).split("/")[0]
29+
2730
with TemporaryDirectory() as export_tempdir:
2831
course_archive_path = Path(export_tempdir, key.split("/")[-1])
2932
bucket.download_file(key, course_archive_path)
30-
run = run_for_canvas_archive(course_archive_path, overwrite=overwrite)
33+
resource_readable_id, run = run_for_canvas_archive(
34+
course_archive_path, course_folder=course_folder, overwrite=overwrite
35+
)
3136
checksum = calc_checksum(course_archive_path)
3237
if run:
3338
load_content_files(
@@ -38,16 +43,17 @@ def sync_canvas_archive(bucket, key: str, overwrite):
3843
)
3944
run.checksum = checksum
4045
run.save()
46+
return resource_readable_id, run
4147

4248

43-
def run_for_canvas_archive(course_archive_path, overwrite):
49+
def run_for_canvas_archive(course_archive_path, course_folder, overwrite):
4450
"""
4551
Generate and return a LearningResourceRun for a Canvas course
4652
"""
4753
checksum = calc_checksum(course_archive_path)
4854
course_info = parse_canvas_settings(course_archive_path)
4955
course_title = course_info.get("title")
50-
readable_id = course_info.get("course_code")
56+
readable_id = f"{course_folder}-{course_info.get('course_code')}"
5157
# create placeholder learning resource
5258
resource, _ = LearningResource.objects.update_or_create(
5359
readable_id=readable_id,
@@ -64,15 +70,18 @@ def run_for_canvas_archive(course_archive_path, overwrite):
6470
)
6571
if resource.runs.count() == 0:
6672
LearningResourceRun.objects.create(
67-
run_id=f"{readable_id}+canvas", learning_resource=resource, published=True
73+
run_id=f"{readable_id}+canvas",
74+
learning_resource=resource,
75+
published=True,
6876
)
6977
run = resource.runs.first()
78+
resource_readable_id = run.learning_resource.readable_id
7079
if run.checksum == checksum and not overwrite:
7180
log.info("Checksums match for %s, skipping load", readable_id)
72-
return None
81+
return resource_readable_id, None
7382
run.checksum = checksum
7483
run.save()
75-
return run
84+
return resource_readable_id, run
7685

7786

7887
def parse_canvas_settings(course_archive_path):

learning_resources/etl/canvas_test.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def test_run_for_canvas_archive_creates_resource_and_run(tmp_path, mocker):
8383
Test that run_for_canvas_archive creates a LearningResource and Run
8484
when given a valid canvas archive.
8585
"""
86+
course_folder = "test"
8687
mocker.patch(
8788
"learning_resources.etl.canvas.parse_canvas_settings",
8889
return_value={"title": "Test Course", "course_code": "TEST101"},
@@ -91,8 +92,10 @@ def test_run_for_canvas_archive_creates_resource_and_run(tmp_path, mocker):
9192
# No resource exists yet
9293
course_archive_path = tmp_path / "archive.zip"
9394
course_archive_path.write_text("dummy")
94-
run = run_for_canvas_archive(course_archive_path, overwrite=True)
95-
resource = LearningResource.objects.get(readable_id="TEST101")
95+
_, run = run_for_canvas_archive(
96+
course_archive_path, course_folder=course_folder, overwrite=True
97+
)
98+
resource = LearningResource.objects.get(readable_id=f"{course_folder}-TEST101")
9699
assert resource.title == "Test Course"
97100
assert resource.etl_source == ETLSource.canvas.name
98101
assert resource.resource_type == LearningResourceType.course.name
@@ -107,6 +110,7 @@ def test_run_for_canvas_archive_creates_run_if_none_exists(tmp_path, mocker):
107110
"""
108111
Test that run_for_canvas_archive creates a Run if no runs exist for the resource.
109112
"""
113+
course_folder = "test"
110114
mocker.patch(
111115
"learning_resources.etl.canvas.parse_canvas_settings",
112116
return_value={"title": "Test Course", "course_code": "TEST104"},
@@ -116,7 +120,7 @@ def test_run_for_canvas_archive_creates_run_if_none_exists(tmp_path, mocker):
116120
)
117121
# Create resource with no runs
118122
resource = LearningResourceFactory.create(
119-
readable_id="TEST104",
123+
readable_id=f"{course_folder}-TEST104",
120124
title="Test Course",
121125
etl_source=ETLSource.canvas.name,
122126
resource_type=LearningResourceType.course.name,
@@ -126,7 +130,9 @@ def test_run_for_canvas_archive_creates_run_if_none_exists(tmp_path, mocker):
126130
assert resource.runs.count() == 0
127131
course_archive_path = tmp_path / "archive4.zip"
128132
course_archive_path.write_text("dummy")
129-
run = run_for_canvas_archive(course_archive_path, overwrite=True)
133+
_, run = run_for_canvas_archive(
134+
course_archive_path, course_folder=course_folder, overwrite=True
135+
)
130136
assert run is not None
131137
assert run.learning_resource == resource
132138
assert run.checksum == "checksum104"

learning_resources/tasks.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@
3030
)
3131
from learning_resources.models import ContentFile, LearningResource
3232
from learning_resources.site_scrapers.utils import scraper_for_site
33-
from learning_resources.utils import html_to_markdown, load_course_blocklist
33+
from learning_resources.utils import (
34+
html_to_markdown,
35+
load_course_blocklist,
36+
resource_unpublished_actions,
37+
)
3438
from learning_resources_search.exceptions import RetryError
3539
from main.celery import app
3640
from main.constants import ISOFORMAT
@@ -473,7 +477,7 @@ def summarize_unprocessed_content(
473477
@app.task(acks_late=True)
474478
def ingest_canvas_course(archive_path, overwrite):
475479
bucket = get_learning_course_bucket(ETLSource.canvas.name)
476-
sync_canvas_archive(bucket, archive_path, overwrite=overwrite)
480+
return sync_canvas_archive(bucket, archive_path, overwrite=overwrite)
477481

478482

479483
@app.task(acks_late=True)
@@ -499,14 +503,22 @@ def sync_canvas_courses(overwrite):
499503
== archive.last_modified
500504
):
501505
latest_archives[course_folder] = archive
506+
canvas_readable_ids = []
502507

503508
for archive in latest_archives.values():
504509
key = archive.key
505510
log.info("Ingesting canvas course %s", key)
506-
ingest_canvas_course(
511+
resource_readable_id, canvas_run = ingest_canvas_course(
507512
key,
508513
overwrite=overwrite,
509514
)
515+
canvas_readable_ids.append(resource_readable_id)
516+
stale_courses = LearningResource.objects.filter(
517+
etl_source=ETLSource.canvas.name
518+
).exclude(readable_id__in=canvas_readable_ids)
519+
stale_courses.update(test_mode=False, published=False)
520+
[resource_unpublished_actions(resource) for resource in stale_courses]
521+
stale_courses.delete()
510522

511523

512524
@app.task(bind=True)

learning_resources/tasks_test.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,17 @@
1717
from learning_resources.factories import (
1818
LearningResourceFactory,
1919
)
20+
from learning_resources.models import LearningResource
2021
from learning_resources.tasks import (
2122
get_ocw_data,
2223
get_youtube_data,
2324
get_youtube_transcripts,
2425
marketing_page_for_resources,
2526
scrape_marketing_pages,
27+
sync_canvas_courses,
2628
update_next_start_date_and_prices,
2729
)
30+
from main.utils import now_in_utc
2831

2932
pytestmark = pytest.mark.django_db
3033
# pylint:disable=redefined-outer-name,unused-argument,too-many-arguments
@@ -606,3 +609,62 @@ def test_scrape_marketing_pages(mocker, settings, mocked_celery):
606609
eid in mock_marketing_page_task.mock_calls[0].args[0] for eid in expected_ids
607610
)
608611
mock_group.assert_called_once()
612+
613+
614+
def test_sync_canvas_courses_removes_stale_resources(
615+
settings, mocker, django_assert_num_queries
616+
):
617+
"""
618+
sync_canvas_courses should unpublish and delete stale canvas LearningResources
619+
"""
620+
settings.CANVAS_COURSE_BUCKET_PREFIX = "canvas/"
621+
mocker.patch("learning_resources.tasks.resource_unpublished_actions")
622+
mocker.patch("learning_resources.tasks.get_learning_course_bucket")
623+
mock_bucket = mocker.Mock()
624+
mock_archive1 = mocker.Mock()
625+
mock_archive1.key = "canvas/course1/archive1.zip"
626+
mock_archive1.last_modified = now_in_utc()
627+
mock_archive2 = mocker.Mock()
628+
mock_archive2.key = "canvas/course2/archive2.zip"
629+
mock_archive2.last_modified = now_in_utc() - timedelta(days=1)
630+
mock_bucket.objects.filter.return_value = [mock_archive1, mock_archive2]
631+
mocker.patch(
632+
"learning_resources.tasks.get_learning_course_bucket", return_value=mock_bucket
633+
)
634+
635+
# Create two canvas LearningResources - one stale
636+
637+
lr1 = LearningResourceFactory.create(
638+
readable_id="course1",
639+
etl_source=ETLSource.canvas.name,
640+
published=True,
641+
test_mode=True,
642+
resource_type="course",
643+
)
644+
lr2 = LearningResourceFactory.create(
645+
readable_id="course2",
646+
etl_source=ETLSource.canvas.name,
647+
published=True,
648+
test_mode=True,
649+
resource_type="course",
650+
)
651+
lr_stale = LearningResourceFactory.create(
652+
readable_id="course3",
653+
etl_source=ETLSource.canvas.name,
654+
published=True,
655+
test_mode=True,
656+
resource_type="course",
657+
)
658+
659+
# Patch ingest_canvas_course to return the readable_ids for the two non-stale courses
660+
mocker.patch(
661+
"learning_resources.tasks.ingest_canvas_course",
662+
side_effect=[("course1", lr1.runs.first()), ("course2", lr2.runs.first())],
663+
)
664+
sync_canvas_courses(overwrite=False)
665+
666+
# The stale course should be unpublished and deleted
667+
assert not LearningResource.objects.filter(id=lr_stale.id).exists()
668+
# The non-stale courses should still exist
669+
assert LearningResource.objects.filter(id=lr1.id).exists()
670+
assert LearningResource.objects.filter(id=lr2.id).exists()

0 commit comments

Comments
 (0)