Commit 3d7e318c authored by Traas, J.J.'s avatar Traas, J.J.
Browse files

Merge branch 'feature_maketestdb' into 'master'

Add preparetestdb and createtestdb commands

See merge request !9
parents ae45a335 ff4fcd20
Pipeline #2265 passed with stage
in 3 minutes and 49 seconds
......@@ -106,4 +106,6 @@ venv.bak/
# editor configs
.vscode/
*.tex
\ No newline at end of file
*.tex
*.sql
\ No newline at end of file
......@@ -79,6 +79,19 @@ $ pipenv run coverage html
```
### Create test database
Run the following commands to create a small test database, which contains only the necessary courses to pass the tests.
You need to have the full coursera dump loaded into the database which is set up in your .env file.
``` bash
$ pipenv run python manage.py preparetestdb <name_of_database>
$ pipenv run python manage.py maketestdb <name_of_database>
```
You need to pass the name of an intermediate database to use, which will be used to modify the data without changing the main database.
The test database will remain in the intermediate database after the process is finished, and a coursera.sql file will appear in the root project folder.
The coursera.sql file can be imported into postgres using ``` psql <database_name> -f coursera.sql ```
## Documentation
[Django](https://docs.djangoproject.com/en/2.1/)
......
import copy
import importlib
import subprocess
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from django.db import connections
import coursera.migrations
class Command(BaseCommand):
help = "Creates a test database for quicker testing"
def add_arguments(self, parser):
parser.add_argument("db_name", type=str)
def handle(self, *args, **options):
settings.DATABASES["testdb"] = copy.deepcopy(settings.DATABASES["default"])
settings.DATABASES["testdb"]["NAME"] = options["db_name"]
print("Creating migrated tables.")
settings.TABLE_TYPE = "TABLE"
call_command("migrate", "--no-input", "--database=testdb", "coursera")
print("Dumping database to file")
subprocess.run(
[
"pg_dump",
"-O",
"-t",
"django_migrations*",
"-t",
"*_view",
"-t",
"country2to3",
options["db_name"],
"-f",
"coursera.sql",
]
)
import copy
from django.conf import settings
from django.core.management import call_command
from django.core.management.base import BaseCommand, CommandError
from django.db import connections
courses_list = [
"V4m7Xf5qEeS9ISIACxWDhA",
"27_khHs4EeaXRRKK7mMjqw",
"bmHtyVrIEee3CwoIJ_9DVg",
"oWawIRajEeWEjBINzvDOWw",
]
class Command(BaseCommand):
help = "Creates a test database for quicker testing"
def add_arguments(self, parser):
parser.add_argument("db_name", type=str)
def handle(self, *args, **options):
settings.DATABASES["testdb"] = copy.deepcopy(settings.DATABASES["default"])
settings.DATABASES["testdb"]["NAME"] = options["db_name"]
with connections["default"].cursor() as cursor:
print("Dropping destination database.")
cursor.execute("DROP DATABASE IF EXISTS " + options["db_name"] + ";")
print("Creating destination database.")
cursor.execute(
"CREATE DATABASE "
+ options["db_name"]
+ " WITH TEMPLATE "
+ settings.DATABASES["default"]["NAME"]
+ ";"
)
print("Removing materialized views from destination database.")
call_command("migrate", "--no-input", "--database=testdb", "coursera", "0001")
print("Pruning destination database.")
courses = ""
for course in courses_list:
courses += "'" + course + "',"
courses = courses[:-1]
with connections["testdb"].cursor() as cursor:
cursor.execute(
"DELETE FROM courses WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_modules WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_lessons WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_items WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_item_assessments WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branches WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_progress WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM clickstream_events WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM assessments WHERE NOT EXISTS (SELECT * FROM course_branch_item_assessments WHERE assessments.assessment_id = course_branch_item_assessments.assessment_id);"
+ "DELETE FROM assessment_responses WHERE NOT EXISTS (SELECT * FROM assessments WHERE assessments.assessment_id = assessment_responses.assessment_id);"
+ "DELETE FROM assessment_actions WHERE NOT EXISTS (SELECT * FROM assessments WHERE assessments.assessment_id = assessment_actions.assessment_id);"
"DELETE FROM assessment_assessments_questions WHERE NOT EXISTS (SELECT * FROM assessments WHERE assessments.assessment_id = assessment_assessments_questions.assessment_id);"
+ "DELETE FROM assessment_response_options WHERE NOT EXISTS (SELECT * FROM assessment_responses WHERE assessment_responses.assessment_response_id = assessment_response_options.assessment_response_id);"
+ "DELETE FROM on_demand_sessions WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM feedback_course_ratings WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_item_assessments WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_item_peer_assignments WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_branch_item_programming_assignments WHERE course_branch_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_grades WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM course_memberships WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM users_courses__certificate_payments WHERE course_id NOT IN ("
+ courses
+ ");"
+ "DELETE FROM specializations_courses WHERE course_id NOT IN ("
+ courses
+ ");"
)
print("Vacuuming destination database.")
cursor.execute("VACUUM FULL;")
# Generated by Django 2.1.1 on 2018-09-20 09:17
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_memberships_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_memberships_view
AS
SELECT
MD5(MD5(MD5(eitdigital_user_id) || course_id) || course_membership_ts)::varchar(50) as id,
......@@ -30,7 +33,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_memberships_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS course_memberships_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-24 08:55
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_branch_modules_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_branch_modules_view
AS
SELECT
MD5(MD5(course_branch_id) || course_module_id)::varchar(50) as module_id,
......@@ -31,7 +34,7 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_branch_modules_view
DROP """ + settings.TABLE_TYPE + """ IF EXISTS course_branch_modules_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-24 11:19
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
course_grades_view
AS
SELECT
......@@ -36,7 +39,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_grades_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS course_grades_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-25 10:05
from django.conf import settings
from django.db import migrations, models
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_branch_lessons_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_branch_lessons_view
AS
SELECT
MD5(MD5(course_branch_id) || course_lesson_id)::varchar(50) as lesson_id,
......@@ -31,13 +34,15 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_branch_lessons_view
DROP """ + settings.TABLE_TYPE + """ IF EXISTS course_branch_lessons_view
""",
),
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_branch_items_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_branch_items_view
AS
SELECT
MD5(MD5(course_branch_id) || course_item_id)::varchar(50) as item_id,
......@@ -62,7 +67,7 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_branch_items_view
DROP """ + settings.TABLE_TYPE + """ IF EXISTS course_branch_items_view
""",
),
]
# Generated by Django 2.1.1 on 2018-09-25 11:07
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
course_progress_view
AS
SELECT
......@@ -50,7 +53,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_progress_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS course_progress_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-25 14:56
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_branch_item_assessments_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_branch_item_assessments_view
AS
SELECT
MD5(MD5(course_item_id) || assessment_id)::varchar(50) as id,
......@@ -29,7 +32,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_branch_item_assessments_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS course_branch_item_assessments_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-26 07:37
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_branch_item_programming_assignments_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_branch_item_programming_assignments_view
AS
SELECT
MD5(MD5(course_item_id) || programming_assignment_id)::varchar(50) as id,
......@@ -29,7 +32,7 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_branch_item_programming_assignments_view
DROP """ + settings.TABLE_TYPE + """ IF EXISTS course_branch_item_programming_assignments_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-26 07:55
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW course_branch_item_peer_assignments_view
CREATE """
+ settings.TABLE_TYPE
+ """ course_branch_item_peer_assignments_view
AS
SELECT
MD5(MD5(course_item_id) || peer_assignment_id)::varchar(50) as id,
......@@ -29,7 +32,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS course_branch_item_peer_assignments_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS course_branch_item_peer_assignments_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-26 08:55
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
sql=[
"""
CREATE MATERIALIZED VIEW feedback_course_ratings_view
CREATE """
+ settings.TABLE_TYPE
+ """ feedback_course_ratings_view
AS
SELECT
MD5(MD5(MD5(MD5(course_id) || eitdigital_user_id) || feedback_system) || feedback_ts) as id,
......@@ -35,7 +38,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS feedback_course_ratings_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS feedback_course_ratings_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-26 12:20
from django.conf import settings
from django.db import migrations
......@@ -22,7 +23,9 @@ class Migration(migrations.Migration):
# The last_activity_per_module view depends on this view.
sql=[
"""
CREATE MATERIALIZED VIEW IF NOT EXISTS
CREATE """
+ settings.TABLE_TYPE
+ """ IF NOT EXISTS
last_activity_view
AS
SELECT
......@@ -55,7 +58,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS last_activity_view
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS last_activity_view
""",
)
]
# Generated by Django 2.1.1 on 2018-09-26 13:29
from django.conf import settings
from django.db import migrations
......@@ -21,7 +22,9 @@ class Migration(migrations.Migration):
# - leaving_learners_per_module
sql=[
"""
CREATE MATERIALIZED VIEW IF NOT EXISTS
CREATE """
+ settings.TABLE_TYPE
+ """ IF NOT EXISTS
last_activity_per_module
AS
SELECT
......@@ -55,7 +58,9 @@ class Migration(migrations.Migration):
""",
],
reverse_sql="""
DROP MATERIALIZED VIEW IF EXISTS last_activity_per_module
DROP """
+ settings.TABLE_TYPE
+ """ IF EXISTS last_activity_per_module
""",
)
]
# Generated by Django 2.1.1 on 2018-09-28 11:01
from django.conf import settings
from django.db import migrations
......@@ -17,7 +18,9 @@ class Migration(migrations.Migration):
# The module_duration_view depends on this view.
[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
module_first_activity_view
AS
SELECT DISTINCT ON (module_id, eitdigital_user_id)
......@@ -45,7 +48,7 @@ class Migration(migrations.Migration):
CREATE INDEX ON module_first_activity_view (course_progress_ts)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS module_first_activity_view",
reverse_sql="DROP " + settings.TABLE_TYPE + " IF EXISTS module_first_activity_view",
),
migrations.RunSQL(
# For each module and user, select the latest activity timestamp
......@@ -56,7 +59,9 @@ class Migration(migrations.Migration):
# The module_duration_view depends on this view.
[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
module_last_activity_view
AS
SELECT DISTINCT ON (module_id, eitdigital_user_id)
......@@ -84,6 +89,6 @@ class Migration(migrations.Migration):
CREATE INDEX ON module_last_activity_view (course_progress_ts)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS module_last_activity_view",
reverse_sql="DROP " + settings.TABLE_TYPE + " IF EXISTS module_last_activity_view",
),
]
# Generated by Django 2.1.2 on 2018-10-04 11:54
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
clickstream_events_view
AS
SELECT DISTINCT ON (id)
......@@ -49,7 +52,7 @@ class Migration(migrations.Migration):
CREATE INDEX ON clickstream_events_view (key)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS clickstream_events_view",
reverse_sql="DROP " + settings.TABLE_TYPE + " IF EXISTS clickstream_events_view",
),
migrations.RunSQL(
# Select all clickstream events of type 'heartbeat'. Denormalize
......@@ -63,7 +66,9 @@ class Migration(migrations.Migration):
# - views_over_runtime
[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
heartbeat_events
AS
WITH latest_branches AS (
......@@ -115,6 +120,6 @@ class Migration(migrations.Migration):
CREATE INDEX ON heartbeat_events (eitdigital_user_id)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS heartbeat_events",
reverse_sql="DROP " + settings.TABLE_TYPE + " IF EXISTS heartbeat_events",
),
]
# Generated by Django 2.1.2 on 2018-10-05 10:25
from django.conf import settings
from django.db import migrations
......@@ -11,7 +12,9 @@ class Migration(migrations.Migration):
migrations.RunSQL(
[
"""
CREATE MATERIALIZED VIEW
CREATE """
+ settings.TABLE_TYPE
+ """
course_item_grades_view
AS
SELECT DISTINCT ON (course_id, course_item_id, eitdigital_user_id)
......@@ -47,6 +50,6 @@ class Migration(migrations.Migration):
CREATE INDEX ON course_item_grades_view (eitdigital_user_id)
""",
],
reverse_sql="DROP MATERIALIZED VIEW IF EXISTS course_item_grades_view",
reverse_sql="DROP " + settings.TABLE_TYPE + " IF EXISTS course_item_grades_view",
)