Skip to content

Commit

Permalink
institutional user reports
Browse files Browse the repository at this point in the history
  • Loading branch information
John Tordoff committed Aug 27, 2024
1 parent f09940a commit 3800f83
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 0 deletions.
2 changes: 2 additions & 0 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .preprint_count import PreprintCountReporter
from .user_count import UserCountReporter
from .spam_count import SpamCountReporter
from .institutional_users import InstitutionalUsersReporter


class AllDailyReporters(enum.Enum):
Expand All @@ -26,3 +27,4 @@ class AllDailyReporters(enum.Enum):

class AllMonthlyReporters(enum.Enum):
SPAM_COUNT = SpamCountReporter
INSTITUTION_USERS = InstitutionalUsersReporter
111 changes: 111 additions & 0 deletions osf/metrics/reporters/institutional_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import datetime
from django.contrib.contenttypes.models import ContentType

from osf import models as osfdb
from osf.metrics.reports import InstitutionalUsersReport
from osf.metrics.utils import YearMonth
from website import settings as website_settings
from api.caching.utils import storage_usage_cache
from api.caching.tasks import update_storage_usage_cache
from api.caching.settings import STORAGE_USAGE_KEY
from ._base import MonthlyReporter


class InstitutionalUsersReporter(MonthlyReporter):
def report(self, yearmonth: YearMonth):
before_datetime = yearmonth.next_month()
institutions = osfdb.Institution.objects.all()

for institution in institutions:
users = institution.get_institution_users().filter(created__lt=before_datetime)
for user in users.iterator():
report = self.generate_report(institution, user, yearmonth, before_datetime)
yield report

def generate_report(self, institution, user, yearmonth, before_datetime):
affiliation = user.get_institution_affiliation(institution._id)
report = InstitutionalUsersReport(
report_yearmonth=yearmonth,
institution_id=institution._id,
user_id=user._id,
department_name=affiliation.sso_department or None,
month_last_login=user.date_last_login,
account_creation_date=user.created.date(),
orcid_id=user.get_verified_external_id('ORCID', verified_only=True),
public_project_count=0,
private_project_count=0,
public_registration_count=0,
embargoed_registration_count=0,
storage_byte_count=0,
public_file_count=0,
published_preprint_count=0,
)

self.fill_counts(report, institution, user, before_datetime)
return report

def fill_counts(self, report, institution, user, before_datetime):
nodes = self.get_user_nodes(institution, user, before_datetime)
for node in nodes.iterator():
self.update_node_counts(report, node, before_datetime)

preprints = self.get_user_preprints(user, before_datetime)
for preprint in preprints.iterator():
self.update_preprint_counts(report, preprint)

def get_user_nodes(self, institution, user, before_datetime):
nodes = institution.nodes.filter(
type__in=('osf.node', 'osf.registration'),
created__lt=before_datetime,
is_deleted=False,
)
return osfdb.Node.objects.get_nodes_for_user(user=user, base_queryset=nodes).select_related('embargo')

def get_user_preprints(self, user, before_datetime):
return osfdb.Preprint.objects.filter(
_contributors=user,
is_published=True,
date_published__lt=before_datetime,
)

def update_node_counts(self, report, node, before_datetime):
self.update_storage_usage(report, node)

if node.is_public:
report.public_file_count += self.get_file_count(node)

if node.pk == node.root_id:
if isinstance(node, osfdb.Node):
if node.is_public:
report.public_project_count += 1
else:
report.private_project_count += 1
elif isinstance(node, osfdb.Registration):
if node.is_public:
report.public_registration_count += 1
elif node.embargo.end_date >= before_datetime:
report.embargoed_registration_count += 1

def update_preprint_counts(self, report, preprint):
if preprint.verified_publishable:
report.published_preprint_count += 1
report.public_file_count += self.get_file_count(preprint)
self.update_storage_usage(report, preprint)

def get_file_count(self, obj):
return osfdb.OsfStorageFile.active.filter(
target_object_id=obj.pk,
target_content_type=ContentType.objects.get_for_model(osfdb.AbstractNode),
created__lt=obj.created,
).count()

def update_storage_usage(self, report, obj):
if website_settings.ENABLE_STORAGE_USAGE_CACHE:
cache_key = STORAGE_USAGE_KEY.format(target_id=obj._id)
byte_count = storage_usage_cache.get(cache_key)
if byte_count is None:
update_storage_usage_cache(obj.id, obj._id)
byte_count = storage_usage_cache.get(cache_key)

if byte_count is not None:
report.storage_byte_count += byte_count
18 changes: 18 additions & 0 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,3 +214,21 @@ class SpamSummaryReport(MonthlyReport):
preprint_flagged = metrics.Integer()
user_marked_as_spam = metrics.Integer()
user_marked_as_ham = metrics.Integer()


class InstitutionalUserReport(MonthlyReport):
UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',)

institution_id = metrics.Keyword()
user_id = metrics.Keyword()
department_name = metrics.Keyword()
public_project_count = metrics.Integer()
private_project_count = metrics.Integer()
public_registration_count = metrics.Integer()
embargoed_registration_count = metrics.Integer()
published_preprint_count = metrics.Integer()
public_file_count = metrics.Integer()
storage_byte_count = metrics.Integer()
month_last_login = YearmonthField()
account_creation_date = YearmonthField()
orcid_id = metrics.Keyword()
47 changes: 47 additions & 0 deletions osf_tests/metrics/test_institution_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from __future__ import annotations

import pytest

from osf import models as osfdb
from osf.metrics.reporters import InstitutionalUsersReporter
from osf_tests.factories import InstitutionFactory, ProjectFactory, UserFactory


@pytest.mark.django_db
class TestInstitutionalUsersReporter:
@pytest.fixture()
def institution(self):
return InstitutionFactory()

@pytest.fixture()
def user_cases(self, institution) -> list[dict]:
cases = [
self.create_user_case(1, 1, 1, 1, 1, 1, institution),
self.create_user_case(2, 2, 2, 2, 2, 2, institution),
self.create_user_case(3, 3, 3, 3, 3, 3, institution),
]
return cases

@pytest.fixture()
def unaffiliated_user(self):
return UserFactory()

def create_user_case(self, pub_project_count, priv_project_count, pub_reg_count,
emb_reg_count, pub_file_count, pub_preprint_count, institution):
user = UserFactory()
user.add_or_update_affiliated_institution(institution)
return {
'pub_project_count': pub_project_count,
'priv_project_count': priv_project_count,
'pub_reg_count': pub_reg_count,
'emb_reg_count': emb_reg_count,
'pub_file_count': pub_file_count,
'pub_preprint_count': pub_preprint_count,
'user': user,
}

def test_reporter(self, user_cases, unaffiliated_user):
# Implement test logic here
raise NotImplementedError('TODO')
actual_reports = list(InstitutionalUsersReporter().report(_yearmonth))

0 comments on commit 3800f83

Please sign in to comment.