Commit a3acc448 authored by Honcoop, T.'s avatar Honcoop, T.
Browse files

Feature/cohorts

parent f5c964b5
...@@ -9,6 +9,9 @@ services: ...@@ -9,6 +9,9 @@ services:
# only cache local items. # only cache local items.
variables: variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache" PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache"
POSTGRES_DB: test_coursera
POSTGRES_USER: user
POSTGRES_PASSWORD: ""
# Pip's cache doesn't store the python packages # Pip's cache doesn't store the python packages
# https://pip.pypa.io/en/stable/reference/pip_install/#caching # https://pip.pypa.io/en/stable/reference/pip_install/#caching
...@@ -19,11 +22,6 @@ cache: ...@@ -19,11 +22,6 @@ cache:
paths: paths:
- .cache/pip - .cache/pip
- db - db
variables:
POSTGRES_DB: test_coursera
POSTGRES_USER: user
POSTGRES_PASSWORD: ""
stages: stages:
- test - test
...@@ -35,7 +33,7 @@ runtests: ...@@ -35,7 +33,7 @@ runtests:
- apt install postgresql-client -y - apt install postgresql-client -y
- mkdir -p db - mkdir -p db
- cd db - cd db
- wget -O coursera.sql -nc $DATABASE_FILE_URL || true - wget -O coursera.sql -N $DATABASE_FILE_URL || true
- psql -h postgres -U $POSTGRES_USER -d $POSTGRES_DB -f coursera.sql - psql -h postgres -U $POSTGRES_USER -d $POSTGRES_DB -f coursera.sql
- cd .. - cd ..
- python -V # Print out python version for debugging - python -V # Print out python version for debugging
......
# Generated by Django 2.1.3 on 2018-11-14 13:38
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("coursera", "0044_auto_20181114_1338")]
operations = [
migrations.RunSQL(
[
"""
CREATE """
+ settings.TABLE_TYPE
+ """ on_demand_session_users_view
AS
SELECT
course_id,
on_demand_session_id,
eitdigital_user_id,
on_demand_sessions_membership_start_ts,
on_demand_sessions_membership_end_ts
FROM
on_demand_session_memberships
""",
"""
CREATE INDEX ON on_demand_session_users_view (on_demand_session_id)
""",
"""
CREATE INDEX ON on_demand_session_users_view (course_id)
""",
"""
CREATE INDEX ON on_demand_session_users_view (eitdigital_user_id)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS on_demand_session_users_view",
)
]
...@@ -29,11 +29,13 @@ class CourseQuerySet(models.QuerySet): ...@@ -29,11 +29,13 @@ class CourseQuerySet(models.QuerySet):
) )
) )
def with_enrolled_learners(self, filter): def with_enrolled_learners(self, filter, filter_cohort):
return self.annotate( return self.annotate(
enrolled_learners=CountSubquery( enrolled_learners=CountSubquery(
filter( filter(
CourseMembership.objects.filter(course_id=OuterRef("pk")).filter( filter_cohort(
CourseMembership.objects.filter(course_id=OuterRef("pk"))
).filter(
role__in=[ role__in=[
CourseMembership.LEARNER, CourseMembership.LEARNER,
CourseMembership.PRE_ENROLLED_LEARNER, CourseMembership.PRE_ENROLLED_LEARNER,
...@@ -67,21 +69,23 @@ class CourseQuerySet(models.QuerySet): ...@@ -67,21 +69,23 @@ class CourseQuerySet(models.QuerySet):
) )
) )
def with_finished_learners(self, filter): def with_finished_learners(self, filter, filter_cohort):
return self.annotate( return self.annotate(
finished_learners=CountSubquery( finished_learners=CountSubquery(
filter( filter(
Grade.objects.filter(course_id=OuterRef("pk")).filter( filter_cohort(
passing_state__in=[Grade.PASSED, Grade.VERIFIED_PASSED] Grade.objects.filter(course_id=OuterRef("pk"))
) ).filter(passing_state__in=[Grade.PASSED, Grade.VERIFIED_PASSED])
) )
) )
) )
def with_paying_learners(self, filter): def with_paying_learners(self, filter, filter_cohort):
return self.annotate( return self.annotate(
paying_learners=CountSubquery( paying_learners=CountSubquery(
CertificatePayment.objects.filter(course_id=OuterRef("pk")) filter_cohort(
CertificatePayment.objects.filter(course_id=OuterRef("pk"))
)
) )
) )
...@@ -127,14 +131,14 @@ class CourseQuerySet(models.QuerySet): ...@@ -127,14 +131,14 @@ class CourseQuerySet(models.QuerySet):
) )
) )
def with_average_time(self, filter): def with_average_time(self, filter, filter_cohort):
return self.annotate( return self.annotate(
average_time=Coalesce( average_time=Coalesce(
AvgSubquery( AvgSubquery(
filter( filter(
CourseDuration.objects.filter(course_id=OuterRef("pk")).values( filter_cohort(
"duration" CourseDuration.objects.filter(course_id=OuterRef("pk"))
) ).values("duration")
), ),
db_column="duration", db_column="duration",
output_field=models.DurationField(), output_field=models.DurationField(),
......
from django.db import models from django.db import models
__all__ = ["OnDemandSession"] __all__ = ["OnDemandSession", "OnDemandSessionUsers"]
class OnDemandSession(models.Model): class OnDemandSession(models.Model):
...@@ -35,3 +35,31 @@ class OnDemandSession(models.Model): ...@@ -35,3 +35,31 @@ class OnDemandSession(models.Model):
class Meta: class Meta:
managed = False managed = False
db_table = "on_demand_sessions_view" db_table = "on_demand_sessions_view"
class OnDemandSessionUsers(models.Model):
course = models.ForeignKey(
"Course",
related_name="session_users",
on_delete=models.DO_NOTHING,
db_column="course_id",
)
id = models.CharField(
db_column="on_demand_session_id", max_length=50, primary_key=True
)
timestamp = models.DateTimeField(
db_column="on_demand_sessions_membership_start_ts", blank=True, null=True
)
end_timestamp = models.DateTimeField(
db_column="on_demand_sessions_membership_end_ts", blank=True, null=True
)
eitdigital_user = models.ForeignKey(
"EITDigitalUser",
related_name="session_users",
on_delete=models.DO_NOTHING,
db_column="eitdigital_user_id",
)
class Meta:
managed = False
db_table = "on_demand_session_users_view"
unique_together = ("eitdigital_user", "id", "timestamp")
...@@ -29,6 +29,7 @@ from coursera.models import ( ...@@ -29,6 +29,7 @@ from coursera.models import (
LastActivity, LastActivity,
LastActivityPerModule, LastActivityPerModule,
ModuleDuration, ModuleDuration,
OnDemandSessionUsers,
) )
from coursera.utils import AvgSubquery, CountSubquery from coursera.utils import AvgSubquery, CountSubquery
...@@ -88,6 +89,32 @@ class CourseSerializer(serializers.ModelSerializer): ...@@ -88,6 +89,32 @@ class CourseSerializer(serializers.ModelSerializer):
get_filterset, self.context["request"].GET, request=self.context["request"] get_filterset, self.context["request"].GET, request=self.context["request"]
) )
def filter_cohort_user(self, queryset):
cohort_users = None
if self.context["request"].query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.context["request"].query_params.get("cohort"),
end_timestamp__isnull=True,
).values("eitdigital_user")
if cohort_users is not None:
return queryset.filter(user__in=cohort_users)
return queryset
def filter_cohort_eitdigital_user(self, queryset):
cohort_users = None
if self.context["request"].query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.context["request"].query_params.get("cohort"),
end_timestamp__isnull=True,
).values("eitdigital_user")
if cohort_users is not None:
return queryset.filter(eitdigital_user__in=cohort_users)
return queryset
def _filter_current_branch(self, course_id): def _filter_current_branch(self, course_id):
""" """
Return a filtered queryset with just the current branch for `course_id`. Return a filtered queryset with just the current branch for `course_id`.
...@@ -152,7 +179,9 @@ class CourseSerializer(serializers.ModelSerializer): ...@@ -152,7 +179,9 @@ class CourseSerializer(serializers.ModelSerializer):
past_leavers = 0 past_leavers = 0
return ( return (
CourseMembership.objects.filter(course_id=obj.pk) self.filter_cohort_eitdigital_user(
CourseMembership.objects.filter(course_id=obj.pk)
)
.filter(timestamp__lte=to_date) .filter(timestamp__lte=to_date)
.filter( .filter(
role__in=[ role__in=[
...@@ -185,7 +214,11 @@ class CourseSerializer(serializers.ModelSerializer): ...@@ -185,7 +214,11 @@ class CourseSerializer(serializers.ModelSerializer):
ratings = obj.ratings ratings = obj.ratings
except AttributeError: except AttributeError:
ratings = list( ratings = list(
self.filter(CourseRating.objects.filter(course_id=obj.pk)) self.filter(
self.filter_cohort_user(
CourseRating.objects.filter(course_id=obj.pk)
)
)
.filter( .filter(
feedback_system__in=[ feedback_system__in=[
CourseRating.NPS_FIRST_WEEK, CourseRating.NPS_FIRST_WEEK,
...@@ -261,7 +294,9 @@ class CourseAnalyticsSerializer(CourseSerializer): ...@@ -261,7 +294,9 @@ class CourseAnalyticsSerializer(CourseSerializer):
return obj.finished_learners_over_time return obj.finished_learners_over_time
except AttributeError: except AttributeError:
return list( return list(
Grade.objects.filter(course_id=obj.pk) self.filter_cohort_eitdigital_user(
Grade.objects.filter(course_id=obj.pk)
)
.annotate(date=TruncDate("timestamp", output_field=DateField())) .annotate(date=TruncDate("timestamp", output_field=DateField()))
.annotate( .annotate(
num_finished=Window( num_finished=Window(
...@@ -301,14 +336,18 @@ class CourseAnalyticsSerializer(CourseSerializer): ...@@ -301,14 +336,18 @@ class CourseAnalyticsSerializer(CourseSerializer):
to_date = from_date to_date = from_date
subquery = CountSubquery( subquery = CountSubquery(
LastActivityPerModule.objects.filter(module_id=OuterRef("pk")).filter( self.filter_cohort_eitdigital_user(
timestamp__lt=to_date - timedelta(weeks=6) LastActivityPerModule.objects.filter(module_id=OuterRef("pk")).filter(
timestamp__lt=to_date - timedelta(weeks=6)
)
) )
) )
if from_date: if from_date:
subquery -= CountSubquery( subquery -= CountSubquery(
LastActivityPerModule.objects.filter(module_id=OuterRef("pk")).filter( self.filter_cohort_eitdigital_user(
timestamp__lt=from_date - timedelta(weeks=6) LastActivityPerModule.objects.filter(
module_id=OuterRef("pk")
).filter(timestamp__lt=from_date - timedelta(weeks=6))
) )
) )
...@@ -385,9 +424,11 @@ class CourseAnalyticsSerializer(CourseSerializer): ...@@ -385,9 +424,11 @@ class CourseAnalyticsSerializer(CourseSerializer):
average_time=Coalesce( average_time=Coalesce(
AvgSubquery( AvgSubquery(
self.filter( self.filter(
ModuleDuration.objects.filter( self.filter_cohort_eitdigital_user(
module_id=OuterRef("pk") ModuleDuration.objects.filter(
).values("duration") module_id=OuterRef("pk")
).values("duration")
)
), ),
db_column="duration", db_column="duration",
output_field=DateTimeField(), output_field=DateTimeField(),
...@@ -401,12 +442,24 @@ class CourseAnalyticsSerializer(CourseSerializer): ...@@ -401,12 +442,24 @@ class CourseAnalyticsSerializer(CourseSerializer):
def get_geo_data(self, obj): def get_geo_data(self, obj):
""" """
Return the count of countries of users of the course. Return the count of countries of users of the course.
This method has the cohort filter built in since it uses the
EITDigitalUser table itself.
""" """
try: try:
return obj.geo_data return obj.geo_data
except AttributeError: except AttributeError:
return list( users = None
EITDigitalUser.objects.filter(
if self.context["request"].query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.context["request"].query_params.get("cohort"),
end_timestamp__isnull=True,
).values("eitdigital_user")
users = EITDigitalUser.objects.filter(
eitdigital_user_id__in=cohort_users
).filter(
eitdigital_user_id__in=CourseMembership.objects.filter( eitdigital_user_id__in=CourseMembership.objects.filter(
course_id=obj.pk course_id=obj.pk
) )
...@@ -418,7 +471,22 @@ class CourseAnalyticsSerializer(CourseSerializer): ...@@ -418,7 +471,22 @@ class CourseAnalyticsSerializer(CourseSerializer):
) )
.values("eitdigital_user_id") .values("eitdigital_user_id")
) )
.annotate(three_let=F("country_cd__three_let")) else:
users = EITDigitalUser.objects.filter(
eitdigital_user_id__in=CourseMembership.objects.filter(
course_id=obj.pk
)
.filter(
role__in=[
CourseMembership.LEARNER,
CourseMembership.PRE_ENROLLED_LEARNER,
]
)
.values("eitdigital_user_id")
)
return list(
users.annotate(three_let=F("country_cd__three_let"))
.annotate(country_name=F("country_cd__country")) .annotate(country_name=F("country_cd__country"))
.values_list("three_let", "country_name") .values_list("three_let", "country_name")
.annotate(country_count=Count("eitdigital_user_id")) .annotate(country_count=Count("eitdigital_user_id"))
...@@ -429,5 +497,7 @@ class CourseAnalyticsSerializer(CourseSerializer): ...@@ -429,5 +497,7 @@ class CourseAnalyticsSerializer(CourseSerializer):
Return the list of cohorts in this course. Return the list of cohorts in this course.
""" """
return list( return list(
obj.sessions.values_list("timestamp", "end_timestamp").order_by("timestamp") obj.sessions.values_list("timestamp", "end_timestamp", "id").order_by(
"timestamp"
)
) )
...@@ -8,7 +8,15 @@ from rest_framework.response import Response ...@@ -8,7 +8,15 @@ from rest_framework.response import Response
from rest_framework.viewsets import ReadOnlyModelViewSet from rest_framework.viewsets import ReadOnlyModelViewSet
from coursera.filters import GenericFilterSet from coursera.filters import GenericFilterSet
from coursera.models import Branch, ClickstreamEvent, Course, Item, ItemType, Quiz from coursera.models import (
Branch,
ClickstreamEvent,
Course,
Item,
ItemType,
OnDemandSessionUsers,
Quiz,
)
from coursera.serializers import ( from coursera.serializers import (
AssignmentAnalyticsSerializer, AssignmentAnalyticsSerializer,
AssignmentSerializer, AssignmentSerializer,
...@@ -42,6 +50,18 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet): ...@@ -42,6 +50,18 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet):
return partial(get_filterset, self.request.GET, request=self.request) return partial(get_filterset, self.request.GET, request=self.request)
def filter_cohort(self, queryset):
cohort_users = None
if self.request.query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.request.query_params.get("cohort"), end_timestamp__isnull=True
).values("eitdigital_user")
if cohort_users is not None:
return queryset.filter(eitdigital_user__in=cohort_users)
return queryset
def get_serializer_class(self): def get_serializer_class(self):
""" """
Return CourseAnalyticsSerializer for single objects, and Return CourseAnalyticsSerializer for single objects, and
...@@ -66,9 +86,9 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet): ...@@ -66,9 +86,9 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet):
super() super()
.get_queryset() .get_queryset()
.filter(id__in=self.request.user.courses) .filter(id__in=self.request.user.courses)
.with_enrolled_learners(self.generic_filterset) .with_enrolled_learners(self.generic_filterset, self.filter_cohort)
.with_finished_learners(self.generic_filterset) .with_finished_learners(self.generic_filterset, self.filter_cohort)
.with_paying_learners(self.generic_filterset) .with_paying_learners(self.generic_filterset, self.filter_cohort)
.annotate(specialization=F("specializations__name")) .annotate(specialization=F("specializations__name"))
.order_by("specialization", "name") .order_by("specialization", "name")
) )
...@@ -81,7 +101,7 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet): ...@@ -81,7 +101,7 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet):
.with_assignments() .with_assignments()
.with_videos() .with_videos()
.with_cohorts(self.generic_filterset) .with_cohorts(self.generic_filterset)
.with_average_time(self.generic_filterset) .with_average_time(self.generic_filterset, self.filter_cohort)
) )
return queryset return queryset
......
...@@ -18,6 +18,14 @@ def coursera_course_id(): ...@@ -18,6 +18,14 @@ def coursera_course_id():
return "27_khHs4EeaXRRKK7mMjqw" return "27_khHs4EeaXRRKK7mMjqw"
@pytest.fixture
def coursera_course_cohort():
"""
Return a cohort id from the main course.
"""
return "mqZvF2wdEeiKgQo3BSPyYA"
@pytest.fixture @pytest.fixture
def coursera_alt_course_id(): def coursera_alt_course_id():
""" """
......
...@@ -237,6 +237,77 @@ def test_course_list_view(teacher_api_client): ...@@ -237,6 +237,77 @@ def test_course_list_view(teacher_api_client):
assert list(item.keys()) == keys assert list(item.keys()) == keys
@pytest.mark.django_db
@pytest.mark.freeze_time("2018-09-25 15:00")
def test_course_cohort_analytics_view(
teacher_api_client,
coursera_course_id,
coursera_course_cohort,
django_assert_max_num_queries,
):
"""
Test that the course detail view can be accessed and returns the
appropriate data and analytics.
The following data must be present:
- id
- slug
- name
- specialization
- level
- enrolled_learners
- leaving_learners
- ratings
- finished_learners
- paying_learners
- modules
- quizzes
- assignments
- videos
- cohorts
- finished_learners_over_time
- leaving_learners_per_module
- leaving_paying_learners_per_module
- average_time
- average_time_per_module
- geo_data
- cohort_list
Also asserts that the number of database queries does not exceed the
predetermined number of queries required for this endpoint.
"""
response = teacher_api_client.get(
reverse("coursera-api:course-detail", kwargs={"pk": coursera_course_id})
+ f"?cohort={coursera_course_cohort}"
)
assert response.status_code == 200, str(response.content)
keys = [
"id",
"slug",
"name",
"specialization",
"level",
"enrolled_learners",
"leaving_learners",
"ratings",
"finished_learners",
"paying_learners",
"modules",
"quizzes",
"assignments",
"videos",
"cohorts",
"finished_learners_over_time",
"leaving_learners_per_module",
"leaving_paying_learners_per_module",
"average_time",
"average_time_per_module",
"geo_data",
"cohort_list",
]
assert list(response.data.keys()) == keys
@pytest.mark.django_db @pytest.mark.django_db
def test_video_analytics_view(