Commit a3acc448 authored by Honcoop, T.'s avatar Honcoop, T.
Browse files

Feature/cohorts

parent f5c964b5
......@@ -9,6 +9,9 @@ services:
# only cache local items.
variables:
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache"
POSTGRES_DB: test_coursera
POSTGRES_USER: user
POSTGRES_PASSWORD: ""
# Pip's cache doesn't store the python packages
# https://pip.pypa.io/en/stable/reference/pip_install/#caching
......@@ -19,11 +22,6 @@ cache:
paths:
- .cache/pip
- db
variables:
POSTGRES_DB: test_coursera
POSTGRES_USER: user
POSTGRES_PASSWORD: ""
stages:
- test
......@@ -35,7 +33,7 @@ runtests:
- apt install postgresql-client -y
- mkdir -p db
- cd db
- wget -O coursera.sql -nc $DATABASE_FILE_URL || true
- wget -O coursera.sql -N $DATABASE_FILE_URL || true
- psql -h postgres -U $POSTGRES_USER -d $POSTGRES_DB -f coursera.sql
- cd ..
- python -V # Print out python version for debugging
......
# Generated by Django 2.1.3 on 2018-11-14 13:38
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [("coursera", "0044_auto_20181114_1338")]
operations = [
migrations.RunSQL(
[
"""
CREATE """
+ settings.TABLE_TYPE
+ """ on_demand_session_users_view
AS
SELECT
course_id,
on_demand_session_id,
eitdigital_user_id,
on_demand_sessions_membership_start_ts,
on_demand_sessions_membership_end_ts
FROM
on_demand_session_memberships
""",
"""
CREATE INDEX ON on_demand_session_users_view (on_demand_session_id)
""",
"""
CREATE INDEX ON on_demand_session_users_view (course_id)
""",
"""
CREATE INDEX ON on_demand_session_users_view (eitdigital_user_id)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS on_demand_session_users_view",
)
]
......@@ -29,11 +29,13 @@ class CourseQuerySet(models.QuerySet):
)
)
def with_enrolled_learners(self, filter):
def with_enrolled_learners(self, filter, filter_cohort):
return self.annotate(
enrolled_learners=CountSubquery(
filter(
CourseMembership.objects.filter(course_id=OuterRef("pk")).filter(
filter_cohort(
CourseMembership.objects.filter(course_id=OuterRef("pk"))
).filter(
role__in=[
CourseMembership.LEARNER,
CourseMembership.PRE_ENROLLED_LEARNER,
......@@ -67,21 +69,23 @@ class CourseQuerySet(models.QuerySet):
)
)
def with_finished_learners(self, filter):
def with_finished_learners(self, filter, filter_cohort):
return self.annotate(
finished_learners=CountSubquery(
filter(
Grade.objects.filter(course_id=OuterRef("pk")).filter(
passing_state__in=[Grade.PASSED, Grade.VERIFIED_PASSED]
)
filter_cohort(
Grade.objects.filter(course_id=OuterRef("pk"))
).filter(passing_state__in=[Grade.PASSED, Grade.VERIFIED_PASSED])
)
)
)
def with_paying_learners(self, filter):
def with_paying_learners(self, filter, filter_cohort):
return self.annotate(
paying_learners=CountSubquery(
CertificatePayment.objects.filter(course_id=OuterRef("pk"))
filter_cohort(
CertificatePayment.objects.filter(course_id=OuterRef("pk"))
)
)
)
......@@ -127,14 +131,14 @@ class CourseQuerySet(models.QuerySet):
)
)
def with_average_time(self, filter):
def with_average_time(self, filter, filter_cohort):
return self.annotate(
average_time=Coalesce(
AvgSubquery(
filter(
CourseDuration.objects.filter(course_id=OuterRef("pk")).values(
"duration"
)
filter_cohort(
CourseDuration.objects.filter(course_id=OuterRef("pk"))
).values("duration")
),
db_column="duration",
output_field=models.DurationField(),
......
from django.db import models
__all__ = ["OnDemandSession"]
__all__ = ["OnDemandSession", "OnDemandSessionUsers"]
class OnDemandSession(models.Model):
......@@ -35,3 +35,31 @@ class OnDemandSession(models.Model):
class Meta:
managed = False
db_table = "on_demand_sessions_view"
class OnDemandSessionUsers(models.Model):
course = models.ForeignKey(
"Course",
related_name="session_users",
on_delete=models.DO_NOTHING,
db_column="course_id",
)
id = models.CharField(
db_column="on_demand_session_id", max_length=50, primary_key=True
)
timestamp = models.DateTimeField(
db_column="on_demand_sessions_membership_start_ts", blank=True, null=True
)
end_timestamp = models.DateTimeField(
db_column="on_demand_sessions_membership_end_ts", blank=True, null=True
)
eitdigital_user = models.ForeignKey(
"EITDigitalUser",
related_name="session_users",
on_delete=models.DO_NOTHING,
db_column="eitdigital_user_id",
)
class Meta:
managed = False
db_table = "on_demand_session_users_view"
unique_together = ("eitdigital_user", "id", "timestamp")
......@@ -29,6 +29,7 @@ from coursera.models import (
LastActivity,
LastActivityPerModule,
ModuleDuration,
OnDemandSessionUsers,
)
from coursera.utils import AvgSubquery, CountSubquery
......@@ -88,6 +89,32 @@ class CourseSerializer(serializers.ModelSerializer):
get_filterset, self.context["request"].GET, request=self.context["request"]
)
def filter_cohort_user(self, queryset):
cohort_users = None
if self.context["request"].query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.context["request"].query_params.get("cohort"),
end_timestamp__isnull=True,
).values("eitdigital_user")
if cohort_users is not None:
return queryset.filter(user__in=cohort_users)
return queryset
def filter_cohort_eitdigital_user(self, queryset):
cohort_users = None
if self.context["request"].query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.context["request"].query_params.get("cohort"),
end_timestamp__isnull=True,
).values("eitdigital_user")
if cohort_users is not None:
return queryset.filter(eitdigital_user__in=cohort_users)
return queryset
def _filter_current_branch(self, course_id):
"""
Return a filtered queryset with just the current branch for `course_id`.
......@@ -152,7 +179,9 @@ class CourseSerializer(serializers.ModelSerializer):
past_leavers = 0
return (
CourseMembership.objects.filter(course_id=obj.pk)
self.filter_cohort_eitdigital_user(
CourseMembership.objects.filter(course_id=obj.pk)
)
.filter(timestamp__lte=to_date)
.filter(
role__in=[
......@@ -185,7 +214,11 @@ class CourseSerializer(serializers.ModelSerializer):
ratings = obj.ratings
except AttributeError:
ratings = list(
self.filter(CourseRating.objects.filter(course_id=obj.pk))
self.filter(
self.filter_cohort_user(
CourseRating.objects.filter(course_id=obj.pk)
)
)
.filter(
feedback_system__in=[
CourseRating.NPS_FIRST_WEEK,
......@@ -261,7 +294,9 @@ class CourseAnalyticsSerializer(CourseSerializer):
return obj.finished_learners_over_time
except AttributeError:
return list(
Grade.objects.filter(course_id=obj.pk)
self.filter_cohort_eitdigital_user(
Grade.objects.filter(course_id=obj.pk)
)
.annotate(date=TruncDate("timestamp", output_field=DateField()))
.annotate(
num_finished=Window(
......@@ -301,14 +336,18 @@ class CourseAnalyticsSerializer(CourseSerializer):
to_date = from_date
subquery = CountSubquery(
LastActivityPerModule.objects.filter(module_id=OuterRef("pk")).filter(
timestamp__lt=to_date - timedelta(weeks=6)
self.filter_cohort_eitdigital_user(
LastActivityPerModule.objects.filter(module_id=OuterRef("pk")).filter(
timestamp__lt=to_date - timedelta(weeks=6)
)
)
)
if from_date:
subquery -= CountSubquery(
LastActivityPerModule.objects.filter(module_id=OuterRef("pk")).filter(
timestamp__lt=from_date - timedelta(weeks=6)
self.filter_cohort_eitdigital_user(
LastActivityPerModule.objects.filter(
module_id=OuterRef("pk")
).filter(timestamp__lt=from_date - timedelta(weeks=6))
)
)
......@@ -385,9 +424,11 @@ class CourseAnalyticsSerializer(CourseSerializer):
average_time=Coalesce(
AvgSubquery(
self.filter(
ModuleDuration.objects.filter(
module_id=OuterRef("pk")
).values("duration")
self.filter_cohort_eitdigital_user(
ModuleDuration.objects.filter(
module_id=OuterRef("pk")
).values("duration")
)
),
db_column="duration",
output_field=DateTimeField(),
......@@ -401,12 +442,24 @@ class CourseAnalyticsSerializer(CourseSerializer):
def get_geo_data(self, obj):
"""
Return the count of countries of users of the course.
This method has the cohort filter built in since it uses the
EITDigitalUser table itself.
"""
try:
return obj.geo_data
except AttributeError:
return list(
EITDigitalUser.objects.filter(
users = None
if self.context["request"].query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.context["request"].query_params.get("cohort"),
end_timestamp__isnull=True,
).values("eitdigital_user")
users = EITDigitalUser.objects.filter(
eitdigital_user_id__in=cohort_users
).filter(
eitdigital_user_id__in=CourseMembership.objects.filter(
course_id=obj.pk
)
......@@ -418,7 +471,22 @@ class CourseAnalyticsSerializer(CourseSerializer):
)
.values("eitdigital_user_id")
)
.annotate(three_let=F("country_cd__three_let"))
else:
users = EITDigitalUser.objects.filter(
eitdigital_user_id__in=CourseMembership.objects.filter(
course_id=obj.pk
)
.filter(
role__in=[
CourseMembership.LEARNER,
CourseMembership.PRE_ENROLLED_LEARNER,
]
)
.values("eitdigital_user_id")
)
return list(
users.annotate(three_let=F("country_cd__three_let"))
.annotate(country_name=F("country_cd__country"))
.values_list("three_let", "country_name")
.annotate(country_count=Count("eitdigital_user_id"))
......@@ -429,5 +497,7 @@ class CourseAnalyticsSerializer(CourseSerializer):
Return the list of cohorts in this course.
"""
return list(
obj.sessions.values_list("timestamp", "end_timestamp").order_by("timestamp")
obj.sessions.values_list("timestamp", "end_timestamp", "id").order_by(
"timestamp"
)
)
......@@ -8,7 +8,15 @@ from rest_framework.response import Response
from rest_framework.viewsets import ReadOnlyModelViewSet
from coursera.filters import GenericFilterSet
from coursera.models import Branch, ClickstreamEvent, Course, Item, ItemType, Quiz
from coursera.models import (
Branch,
ClickstreamEvent,
Course,
Item,
ItemType,
OnDemandSessionUsers,
Quiz,
)
from coursera.serializers import (
AssignmentAnalyticsSerializer,
AssignmentSerializer,
......@@ -42,6 +50,18 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet):
return partial(get_filterset, self.request.GET, request=self.request)
def filter_cohort(self, queryset):
cohort_users = None
if self.request.query_params.get("cohort"):
cohort_users = OnDemandSessionUsers.objects.filter(
id=self.request.query_params.get("cohort"), end_timestamp__isnull=True
).values("eitdigital_user")
if cohort_users is not None:
return queryset.filter(eitdigital_user__in=cohort_users)
return queryset
def get_serializer_class(self):
"""
Return CourseAnalyticsSerializer for single objects, and
......@@ -66,9 +86,9 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet):
super()
.get_queryset()
.filter(id__in=self.request.user.courses)
.with_enrolled_learners(self.generic_filterset)
.with_finished_learners(self.generic_filterset)
.with_paying_learners(self.generic_filterset)
.with_enrolled_learners(self.generic_filterset, self.filter_cohort)
.with_finished_learners(self.generic_filterset, self.filter_cohort)
.with_paying_learners(self.generic_filterset, self.filter_cohort)
.annotate(specialization=F("specializations__name"))
.order_by("specialization", "name")
)
......@@ -81,7 +101,7 @@ class CourseAnalyticsViewSet(ReadOnlyModelViewSet):
.with_assignments()
.with_videos()
.with_cohorts(self.generic_filterset)
.with_average_time(self.generic_filterset)
.with_average_time(self.generic_filterset, self.filter_cohort)
)
return queryset
......
......@@ -18,6 +18,14 @@ def coursera_course_id():
return "27_khHs4EeaXRRKK7mMjqw"
@pytest.fixture
def coursera_course_cohort():
"""
Return a cohort id from the main course.
"""
return "mqZvF2wdEeiKgQo3BSPyYA"
@pytest.fixture
def coursera_alt_course_id():
"""
......
......@@ -237,6 +237,77 @@ def test_course_list_view(teacher_api_client):
assert list(item.keys()) == keys
@pytest.mark.django_db
@pytest.mark.freeze_time("2018-09-25 15:00")
def test_course_cohort_analytics_view(
teacher_api_client,
coursera_course_id,
coursera_course_cohort,
django_assert_max_num_queries,
):
"""
Test that the course detail view can be accessed and returns the
appropriate data and analytics.
The following data must be present:
- id
- slug
- name
- specialization
- level
- enrolled_learners
- leaving_learners
- ratings
- finished_learners
- paying_learners
- modules
- quizzes
- assignments
- videos
- cohorts
- finished_learners_over_time
- leaving_learners_per_module
- leaving_paying_learners_per_module
- average_time
- average_time_per_module
- geo_data
- cohort_list
Also asserts that the number of database queries does not exceed the
predetermined number of queries required for this endpoint.
"""
response = teacher_api_client.get(
reverse("coursera-api:course-detail", kwargs={"pk": coursera_course_id})
+ f"?cohort={coursera_course_cohort}"
)
assert response.status_code == 200, str(response.content)
keys = [
"id",
"slug",
"name",
"specialization",
"level",
"enrolled_learners",
"leaving_learners",
"ratings",
"finished_learners",
"paying_learners",
"modules",
"quizzes",
"assignments",
"videos",
"cohorts",
"finished_learners_over_time",
"leaving_learners_per_module",
"leaving_paying_learners_per_module",
"average_time",
"average_time_per_module",
"geo_data",
"cohort_list",
]
assert list(response.data.keys()) == keys
@pytest.mark.django_db
def test_video_analytics_view(
teacher_api_client, coursera_course_id, coursera_video_id
......@@ -1061,25 +1132,7 @@ def test_assignment_analytics_view(
kwargs={"course_id": coursera_course_id, "item_id": coursera_assignment_id},
)
)
keys = [
"id",
"branch",
"item_id",
"lesson",
"lesson_name",
"order",
"type",
"name",
"optional",
"submissions",
"submission_ratio",
"average_grade",
"next_item",
"next_assignment",
"last_item",
"last_assignment",
"submission_ratio_paying_learners",
]
keys = ["id", "branch", "item_id", "lesson", "lesson_name", "order", "type", "name", "optional", "submissions", "submission_ratio", "average_grade", "next_item", "next_assignment", "last_item", "last_assignment", "submission_ratio_paying_learners"]
assert response.status_code == 200, str(response.content)
assert list(response.data.keys()) == keys
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment