From 38d28129707558630a2854b083d1e121e9957111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Redrejo?= Date: Mon, 27 Sep 2021 20:36:11 +0200 Subject: [PATCH] Creation of metadata labels model --- .../contentcuration/constants/le_labels.py | 58 ++++++++ .../migrations/0133_auto_20210927_1820.py | 60 ++++++++ contentcuration/contentcuration/models.py | 18 +++ .../contentcuration/tests/test_labels.py | 139 ++++++++++++++++++ 4 files changed, 275 insertions(+) create mode 100644 contentcuration/contentcuration/constants/le_labels.py create mode 100644 contentcuration/contentcuration/migrations/0133_auto_20210927_1820.py create mode 100644 contentcuration/contentcuration/tests/test_labels.py diff --git a/contentcuration/contentcuration/constants/le_labels.py b/contentcuration/contentcuration/constants/le_labels.py new file mode 100644 index 0000000000..f760143169 --- /dev/null +++ b/contentcuration/contentcuration/constants/le_labels.py @@ -0,0 +1,58 @@ +""" +This module contains constants representing the id of the labels. + +This file should be removed and its content replaced by loading the labels from +le-utils package after this package contains the definitive list of labels and their ids +""" +from __future__ import unicode_literals + +# Grade Level +UPPER_PRIMARY = "upperprimary" +SPECIALIZED_PROFESSIONAL_TRAINING = "6a256feb4d882f5d90b862f83e789166" +SECONDARY = "secondary" +GRADE_LEVEL = (UPPER_PRIMARY, SPECIALIZED_PROFESSIONAL_TRAINING, SECONDARY) + +# Resource Type +GAME = "game" +BOOK = "book" +VIDEO = "video" +AUDIO = "audio" +RESOURCE_TYPE = (GAME, BOOK, VIDEO, AUDIO) + +# Learning Activity +PLAY = "0aacea8079e044618e297efc2486594f" +READ = "bfbff1a273e1b9c84a2d3dfa4af948fd" +LISTEN = "363e0d138291774124d13c5d309f0e94" +WATCH = "e7e6184e49758c82ba7b601b1b90af09" +LEARNING_ACTIVITY = (PLAY, READ, LISTEN, WATCH) + +# Accessibitility +HIGH_CONTRAST_DISPLAY = "d6bada40219e36b7654715d8c5553aa0" +SIGN_LANGUAGE_CAPTIONS = "signlanguage" +ACCESIBILITY = (HIGH_CONTRAST_DISPLAY, SIGN_LANGUAGE_CAPTIONS) + +# Category +FOR_SCHOOL = "for_school" +NON_FORMAL = "nonformal" +BASIC_SKILLS = "basicskills" +MATH = "{}.math".format(FOR_SCHOOL) +SCIENCES = "{}.sciences".format(FOR_SCHOOL) +BIOLOGY = "{}.biology".format(SCIENCES) +CALCULUS = "{}.calculus".format(MATH) +ALGEBRA = "{}.algebra".format(MATH) +ROBOTICS = "{}.robotics".format(NON_FORMAL) +DIGITAL_LITERACY = "{}.digitalliteracy".format(BASIC_SKILLS) +CATEGORY = ( + FOR_SCHOOL, + NON_FORMAL, + BASIC_SKILLS, + MATH, + SCIENCES, + BIOLOGY, + CALCULUS, + ALGEBRA, + ROBOTICS, + DIGITAL_LITERACY, +) + +MATH_ARRAY = (MATH, CALCULUS, ALGEBRA) diff --git a/contentcuration/contentcuration/migrations/0133_auto_20210927_1820.py b/contentcuration/contentcuration/migrations/0133_auto_20210927_1820.py new file mode 100644 index 0000000000..512044ad0e --- /dev/null +++ b/contentcuration/contentcuration/migrations/0133_auto_20210927_1820.py @@ -0,0 +1,60 @@ +# Generated by Django 3.2.4 on 2021-09-27 18:20 +from django.db import migrations +from django.db import models + + +class Migration(migrations.Migration): + + dependencies = [ + ("contentcuration", "0132_auto_20210708_0011"), + ] + + operations = [ + migrations.AddField( + model_name="contentnode", + name="accessibility_labels", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="contentnode", + name="category_labels", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="contentnode", + name="grade_level_labels", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="contentnode", + name="learning_activity_labels", + field=models.TextField(blank=True, null=True), + ), + migrations.AddField( + model_name="contentnode", + name="resource_type_labels", + field=models.TextField(blank=True, null=True), + ), + migrations.AddIndex( + model_name="contentnode", + index=models.Index( + fields=["grade_level_labels"], name="grade_level_label_idx" + ), + ), + migrations.AddIndex( + model_name="contentnode", + index=models.Index( + fields=["resource_type_labels"], name="resource_type_label_idx" + ), + ), + migrations.AddIndex( + model_name="contentnode", + index=models.Index( + fields=["learning_activity_labels"], name="learning_activity_label_idx" + ), + ), + migrations.AddIndex( + model_name="contentnode", + index=models.Index(fields=["category_labels"], name="category_label_idx"), + ), + ] diff --git a/contentcuration/contentcuration/models.py b/contentcuration/contentcuration/models.py index 2d35b33ab8..be8d003667 100644 --- a/contentcuration/contentcuration/models.py +++ b/contentcuration/contentcuration/models.py @@ -1068,6 +1068,10 @@ def __str__(self): NODE_ID_INDEX_NAME = "node_id_idx" NODE_MODIFIED_INDEX_NAME = "node_modified_idx" NODE_MODIFIED_DESC_INDEX_NAME = "node_modified_desc_idx" +GRADE_LEVEL_LABEL_INDEX = "grade_level_label_idx" +RESOURCE_TYPE_LABEL_INDEX = "resource_type_label_idx" +LEARNING_ACTIVITY_LABEL_INDEX = "learning_activity_label_idx" +CATEGORY_LABEL_INDEX = "category_label_idx" class ContentNode(MPTTModel, models.Model): @@ -1146,6 +1150,12 @@ class ContentNode(MPTTModel, models.Model): role_visibility = models.CharField(max_length=50, choices=roles.choices, default=roles.LEARNER) freeze_authoring_data = models.BooleanField(default=False) + grade_level_labels = models.TextField(blank=True, null=True) + resource_type_labels = models.TextField(blank=True, null=True) + learning_activity_labels = models.TextField(blank=True, null=True) + accessibility_labels = models.TextField(blank=True, null=True) + category_labels = models.TextField(blank=True, null=True) + objects = CustomContentNodeTreeManager() # Track all updates and ignore a blacklist of attributes @@ -1745,6 +1755,14 @@ class Meta: indexes = [ models.Index(fields=["node_id"], name=NODE_ID_INDEX_NAME), models.Index(fields=["-modified"], name=NODE_MODIFIED_DESC_INDEX_NAME), + models.Index(fields=["grade_level_labels"], name=GRADE_LEVEL_LABEL_INDEX), + models.Index( + fields=["resource_type_labels"], name=RESOURCE_TYPE_LABEL_INDEX + ), + models.Index( + fields=["learning_activity_labels"], name=LEARNING_ACTIVITY_LABEL_INDEX + ), + models.Index(fields=["category_labels"], name=CATEGORY_LABEL_INDEX), ] diff --git a/contentcuration/contentcuration/tests/test_labels.py b/contentcuration/contentcuration/tests/test_labels.py new file mode 100644 index 0000000000..4708868e27 --- /dev/null +++ b/contentcuration/contentcuration/tests/test_labels.py @@ -0,0 +1,139 @@ +import logging +import os +from random import randint +from random import shuffle +from time import time + +import pytest + +from .base import BaseTestCase +from contentcuration.constants.le_labels import ACCESIBILITY +from contentcuration.constants.le_labels import CALCULUS +from contentcuration.constants.le_labels import CATEGORY +from contentcuration.constants.le_labels import GRADE_LEVEL +from contentcuration.constants.le_labels import LEARNING_ACTIVITY +from contentcuration.constants.le_labels import MATH +from contentcuration.constants.le_labels import RESOURCE_TYPE +from contentcuration.constants.le_labels import VIDEO +from contentcuration.models import ContentNode +from contentcuration.utils.db_tools import TreeBuilder + +# from faker import Faker +# from .testdata import topic + +logger = logging.getLogger(__name__) +logger.setLevel(logging.DEBUG) + + +def _random_labels(labels, length_labels): + amount = randint(0, length_labels + 1) + if amount: + shuffle(labels) + return labels[:amount] + return [] + + +def assign_labels(): + nodes = ContentNode.objects.all() + grades, length_grades = list(GRADE_LEVEL), len(GRADE_LEVEL) + resources, length_resources = list(RESOURCE_TYPE), len(RESOURCE_TYPE) + activities, length_activities = list(LEARNING_ACTIVITY), len(LEARNING_ACTIVITY) + accesibility, length_accessibility = list(ACCESIBILITY), len(ACCESIBILITY) + categories, length_categories = list(CATEGORY), len(CATEGORY) + for node in nodes: + grade = _random_labels(grades, length_grades) + res = _random_labels(resources, length_resources) + acts = _random_labels(activities, length_activities) + accs = _random_labels(accesibility, length_accessibility) + cats = _random_labels(categories, length_categories) + node.grade_level_labels = ",".join(grade) + node.resource_type_labels = ",".join(res) + node.learning_activity_labels = ",".join(acts) + node.accessibility_labels = ",".join(accs) + node.category_labels = ",".join(cats) + node.save() + + +class NodesLabelsTestCase(BaseTestCase): + def setUp(self): + # creates node hierarchy according to + # contentcuration/contentcuration/tests/fixtures/tree.json + super(NodesLabelsTestCase, self).setUp() + assign_labels() + self.node_query = ContentNode.objects.filter(title__icontains="Topic") + + def test_nodes_of_a_label(self): + """ + Get all ContentNodes with a label or one of its descendant labels + """ + maths = ContentNode.objects.filter(resource_type_labels__contains=VIDEO) + calculus = ContentNode.objects.filter(resource_type_labels__contains=CALCULUS) + assert len(maths) >= len(calculus) + + +@pytest.fixture(scope="class") +def create_many_nodes(): + print("Creating nodes") + TreeBuilder(levels=3, num_children=10) + + +@pytest.mark.skipif( + os.environ.get("LABELS_MASSIVE", "false") != "true", + reason="Env variable to run massive test is not set", +) +@pytest.mark.usefixtures("create_many_nodes") +class LabelsMassiveTestCase(BaseTestCase): + """ + To run this class tests, pytest must be launched with + LABELS_MASSIVE=true pytest -s contentcuration/contentcuration/tests/test_labels.py::LabelsMassiveTestCase + """ + + def setUp(self): + self.elapsed = 0 + self.nodes = ContentNode.objects.all() + self.records = len(self.nodes) + print("{} nodes created".format(self.records)) + + def test_massive_str(self): + init_time = time() + grades, length_grades = list(GRADE_LEVEL), len(GRADE_LEVEL) + resources, length_resources = list(RESOURCE_TYPE), len(RESOURCE_TYPE) + activities, length_activities = list(LEARNING_ACTIVITY), len(LEARNING_ACTIVITY) + accesibility, length_accessibility = list(ACCESIBILITY), len(ACCESIBILITY) + categories, length_categories = list(CATEGORY), len(CATEGORY) + for node in self.nodes: + grade = _random_labels(grades, length_grades) + res = _random_labels(resources, length_resources) + acts = _random_labels(activities, length_activities) + accs = _random_labels(accesibility, length_accessibility) + cats = _random_labels(categories, length_categories) + node.grade_level_labels = ",".join(grade) + node.resource_type_labels = ",".join(res) + node.learning_activity_labels = ",".join(acts) + node.accessibility_labels = ",".join(accs) + node.category_labels = ",".join(cats) + node.save() + self.elapsed = time() - init_time + print( + "USING STRING: Assigning random labels to {} nodes took {} seconds".format( + self.records, self.elapsed + ) + ) + + init_time = time() + strings = len(ContentNode.objects.filter(resource_type_labels__contains=VIDEO)) + self.elapsed = time() - init_time + print( + "USING STRING: Finding for resource label in {} nodes took {} seconds".format( + strings, self.elapsed + ) + ) + + init_time = time() + strings = len(ContentNode.objects.filter(category_labels__contains=MATH)) + self.elapsed = time() - init_time + print( + "USING STRING: Finding for maths and descendants label in {} nodes took {} seconds".format( + strings, self.elapsed + ) + )