diff --git a/CHANGELOG b/CHANGELOG index 6827377e45e..3f013526f33 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,12 @@ We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO. +19.29.0 (2019-10-02) +=================== +- Use new pagecounter fields for increased query efficiency +- Allow meetings to be sorted on download count +- Remove old permissions fields now that we have Guardian + 19.28.0 (2019-09-24) =================== - API v2: Use consistent naming for JSON API type (kebab-case) diff --git a/Dockerfile b/Dockerfile index 63b11ab5427..83d95063fd8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,99 +1,27 @@ -FROM python:2.7-slim-stretch - -ENV GOSU_VERSION=1.10 \ - NODE_ENV=production \ - NODE_VERSION=8.6.0 \ - YARN_VERSION=1.1.0 - -# Libraries such as matplotlib require a HOME directory for cache and configuration -RUN set -ex \ - && mkdir -p /var/www \ - && chown www-data:www-data /var/www \ - && apt-get update \ - && apt-get install -y gnupg2 \ - && for key in \ - # GOSU - B42F6819007F00F88E364FD4036A9C25BF357DD4 \ - # https://github.com/nodejs/docker-node/blob/9c25cbe93f9108fd1e506d14228afe4a3d04108f/8.2/Dockerfile - # gpg keys listed at https://github.com/nodejs/node#release-team - # Node - 9554F04D7259F04124DE6B476D5A82AC7E37093B \ - 94AE36675C464D64BAFA68DD7434390BDBE9B9C5 \ - FD3A5288F042B6850C66B31F09FE44734EB7990E \ - 71DCFD284A79C3B38668286BC97EC7A07EDE3FC1 \ - DD8F2338BAE7501E3DD5AC78C273792F7D83545D \ - B9AE9905FFD7803F25714661B63B535A4C206CA9 \ - C4F0DFFF4E8C1A8236409D08E73BC641CC11F4C8 \ - 56730D5401028683275BD23C23EFEFE93C4CFFFE \ - # Yarn - 6A010C5166006599AA17F08146C2130DFD2497F5 \ - ; do \ - gpg --keyserver hkp://ipv4.pool.sks-keyservers.net:80 --recv-keys "$key" || \ - gpg --keyserver hkp://ha.pool.sks-keyservers.net:80 --recv-keys "$key" || \ - gpg --keyserver hkp://pgp.mit.edu:80 --recv-keys "$key" || \ - gpg --keyserver hkp://keyserver.pgp.com:80 --recv-keys "$key" \ - ; done \ - # Install dependancies - && apt-get install -y \ - git \ - libev4 \ - libev-dev \ - libevent-dev \ - libxml2-dev \ - libxslt1-dev \ - zlib1g-dev \ - curl \ - # cryptography - build-essential \ - libssl-dev \ - libffi-dev \ - python-dev \ - # postgresql - libpq-dev \ - # file audits - par2 \ - && ARCH= \ - && dpkgArch="$(dpkg --print-architecture)" \ - && case "${dpkgArch##*-}" in \ - amd64) ARCH='x64';; \ - ppc64el) ARCH='ppc64le';; \ - *) echo "unsupported architecture"; exit 1 ;; \ - esac \ - # grab gosu for easy step-down from root - && curl -o /usr/local/bin/gosu -SL "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch" \ - && curl -o /usr/local/bin/gosu.asc -SL "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch.asc" \ - && gpg --verify /usr/local/bin/gosu.asc \ - && rm /usr/local/bin/gosu.asc \ - && chmod +x /usr/local/bin/gosu \ - # Node - && curl -SLO "https://nodejs.org/dist/v$NODE_VERSION/node-v$NODE_VERSION-linux-$ARCH.tar.xz" \ - && curl -SLO --compressed "https://nodejs.org/dist/v$NODE_VERSION/SHASUMS256.txt.asc" \ - && gpg --batch --decrypt --output SHASUMS256.txt SHASUMS256.txt.asc \ - && grep " node-v$NODE_VERSION-linux-$ARCH.tar.xz\$" SHASUMS256.txt | sha256sum -c - \ - && tar -xJf "node-v$NODE_VERSION-linux-$ARCH.tar.xz" -C /usr/local --strip-components=1 \ - && rm "node-v$NODE_VERSION-linux-$ARCH.tar.xz" SHASUMS256.txt.asc SHASUMS256.txt \ - && ln -s /usr/local/bin/node /usr/local/bin/nodejs \ - # Yarn - && curl -fSLO --compressed "https://yarnpkg.com/downloads/$YARN_VERSION/yarn-v$YARN_VERSION.tar.gz" \ - && curl -fSLO --compressed "https://yarnpkg.com/downloads/$YARN_VERSION/yarn-v$YARN_VERSION.tar.gz.asc" \ - && gpg --batch --verify yarn-v$YARN_VERSION.tar.gz.asc yarn-v$YARN_VERSION.tar.gz \ - && mkdir -p /opt/yarn \ - && tar -xzf yarn-v$YARN_VERSION.tar.gz -C /opt/yarn --strip-components=1 \ - && ln -s /opt/yarn/bin/yarn /usr/local/bin/yarn \ - && ln -s /opt/yarn/bin/yarn /usr/local/bin/yarnpkg \ - && yarn global add bower \ - && yarn cache clean \ - && rm -rf \ - yarn-v$YARN_VERSION.tar.gz.asc \ - yarn-v$YARN_VERSION.tar.gz \ - $HOME/.gnupg \ - $HOME/.cache \ - && apt-get remove -y \ - curl \ - && apt-get clean \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* \ - && mkdir -p /code +FROM node:8-alpine + +# Source: https://github.com/docker-library/httpd/blob/7976cabe162268bd5ad2d233d61e340447bfc371/2.4/alpine/Dockerfile#L3 +RUN set -x \ + && addgroup -g 82 -S www-data \ + && adduser -h /var/www -u 82 -D -S -G www-data www-data + +RUN apk add --no-cache --virtual .run-deps \ + su-exec \ + bash \ + python \ + py-pip \ + git \ + # lxml2 + libxml2 \ + libxslt \ + # psycopg2 + postgresql-libs \ + # cryptography + libffi \ + # gevent + libev \ + libevent \ + && yarn global add bower WORKDIR /code @@ -118,7 +46,22 @@ COPY ./addons/twofactor/requirements.txt ./addons/twofactor/ #COPY ./addons/wiki/requirements.txt ./addons/wiki/ COPY ./addons/zotero/requirements.txt ./addons/zotero/ -RUN for reqs_file in \ +RUN set -ex \ + && mkdir -p /var/www \ + && chown www-data:www-data /var/www \ + && apk add --no-cache --virtual .build-deps \ + build-base \ + linux-headers \ + python-dev \ + # lxml2 + musl-dev \ + libxml2-dev \ + libxslt-dev \ + # psycopg2 + postgresql-dev \ + # cryptography + libffi-dev \ + && for reqs_file in \ /code/requirements.txt \ /code/requirements/release.txt \ /code/addons/*/requirements.txt \ @@ -128,17 +71,10 @@ RUN for reqs_file in \ && (pip uninstall uritemplate.py --yes || true) \ && pip install --no-cache-dir uritemplate.py==0.3.0 \ # Fix: https://github.com/CenterForOpenScience/osf.io/pull/6783 - && python -m compileall /usr/local/lib/python2.7 || true - -# OSF: Assets -COPY ./.bowerrc ./bower.json ./ -RUN bower install --production --allow-root \ - && bower cache clean --allow-root - -COPY ./package.json ./.yarnrc ./yarn.lock ./ -RUN yarn install --frozen-lockfile \ - && yarn cache clean + && python -m compileall /usr/lib/python2.7 || true \ + && apk del .build-deps +# Settings COPY ./tasks/ ./tasks/ COPY ./website/settings/ ./website/settings/ COPY ./api/base/settings/ ./api/base/settings/ @@ -148,8 +84,29 @@ RUN mv ./website/settings/local-dist.py ./website/settings/local.py \ && mv ./api/base/settings/local-dist.py ./api/base/settings/local.py \ && sed 's/DEBUG_MODE = True/DEBUG_MODE = False/' -i ./website/settings/local.py +# Bower Assets +COPY ./.bowerrc ./bower.json ./ +COPY ./admin/.bowerrc ./admin/bower.json ./admin/ +RUN \ + # OSF + bower install --production --allow-root \ + && bower cache clean --allow-root \ + # Admin + && cd ./admin \ + && bower install --production --allow-root \ + && bower cache clean --allow-root + +# Webpack Assets +# +## OSF +COPY ./package.json ./.yarnrc ./yarn.lock ./ COPY ./webpack* ./ COPY ./website/static/ ./website/static/ +## Admin +COPY ./admin/package.json ./admin/yarn.lock ./admin/ +COPY ./admin/webpack* ./admin/ +COPY ./admin/static/ ./admin/static/ +## Addons COPY ./addons/bitbucket/static/ ./addons/bitbucket/static/ COPY ./addons/box/static/ ./addons/box/static/ COPY ./addons/citations/static/ ./addons/citations/static/ @@ -168,27 +125,20 @@ COPY ./addons/s3/static/ ./addons/s3/static/ COPY ./addons/twofactor/static/ ./addons/twofactor/static/ COPY ./addons/wiki/static/ ./addons/wiki/static/ COPY ./addons/zotero/static/ ./addons/zotero/static/ -RUN mkdir -p ./website/static/built/ \ +RUN \ + # OSF + yarn install --frozen-lockfile \ + && mkdir -p ./website/static/built/ \ && invoke build_js_config_files \ - && yarn run webpack-prod -# /OSF: Assets - -# Admin: Assets -COPY ./admin/.bowerrc ./admin/bower.json ./admin/ -RUN cd ./admin \ - && bower install --production --allow-root \ - && bower cache clean --allow-root - -COPY ./admin/package.json ./admin/yarn.lock ./admin/ -RUN cd ./admin \ + && yarn run webpack-prod \ + # Admin + && cd ./admin \ && yarn install --frozen-lockfile \ - && yarn cache clean - -COPY ./admin/webpack* ./admin/ -COPY ./admin/static/ ./admin/static/ -RUN cd ./admin \ - && yarn run webpack-prod -# /Admin: Assets + && yarn run webpack-prod \ + && cd ../ \ + # Cleanup + && yarn cache clean \ + && npm cache clean --force # Copy the rest of the code over COPY ./ ./ @@ -196,6 +146,8 @@ COPY ./ ./ ARG GIT_COMMIT= ENV GIT_COMMIT ${GIT_COMMIT} +# TODO: Admin/API should fully specify their bower static deps, and not include ./website/static in their defaults.py. +# (this adds an additional 300+mb to the build image) RUN for module in \ api.base.settings \ admin.base.settings \ @@ -211,4 +163,4 @@ RUN for module in \ ; done \ && rm ./website/settings/local.py ./api/base/settings/local.py -CMD ["gosu", "nobody", "invoke", "--list"] +CMD ["su-exec", "nobody", "invoke", "--list"] diff --git a/addons/base/views.py b/addons/base/views.py index 44a83d2c4a0..f268a932e04 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -330,9 +330,9 @@ def get_auth(auth, **kwargs): # version index is 0 based version_index = version - 1 if action == 'render': - update_analytics(node, file_id, version_index, 'view') + update_analytics(node, filenode, version_index, 'view') elif action == 'download' and not from_mfr: - update_analytics(node, file_id, version_index, 'download') + update_analytics(node, filenode, version_index, 'download') if waffle.switch_is_active(features.ELASTICSEARCH_METRICS): if isinstance(node, Preprint): metric_class = get_metric_class_for_action(action, from_mfr=from_mfr) @@ -517,7 +517,6 @@ def create_waterbutler_log(payload, **kwargs): def addon_delete_file_node(self, target, user, event_type, payload): """ Get addon BaseFileNode(s), move it into the TrashedFileNode collection and remove it from StoredFileNode. - Required so that the guids of deleted addon files are not re-pointed when an addon file or folder is moved or renamed. """ diff --git a/addons/osfstorage/tests/test_models.py b/addons/osfstorage/tests/test_models.py index 42147bf5768..1fb0606ffaf 100644 --- a/addons/osfstorage/tests/test_models.py +++ b/addons/osfstorage/tests/test_models.py @@ -184,9 +184,9 @@ def test_download_count_file(self, mock_session): mock_session.data = {} child = self.node_settings.get_root().append_file('Test') - utils.update_analytics(self.project, child._id, 0) - utils.update_analytics(self.project, child._id, 1) - utils.update_analytics(self.project, child._id, 2) + utils.update_analytics(self.project, child, 0) + utils.update_analytics(self.project, child, 1) + utils.update_analytics(self.project, child, 2) assert_equals(child.get_download_count(), 3) assert_equals(child.get_download_count(0), 1) diff --git a/addons/osfstorage/tests/test_utils.py b/addons/osfstorage/tests/test_utils.py index 7e5e82d2711..91dd42cb642 100644 --- a/addons/osfstorage/tests/test_utils.py +++ b/addons/osfstorage/tests/test_utils.py @@ -31,9 +31,9 @@ def setUp(self): def test_serialize_revision(self): sessions.sessions[request._get_current_object()] = Session() - utils.update_analytics(self.project, self.record._id, 0) - utils.update_analytics(self.project, self.record._id, 0) - utils.update_analytics(self.project, self.record._id, 2) + utils.update_analytics(self.project, self.record, 0) + utils.update_analytics(self.project, self.record, 0) + utils.update_analytics(self.project, self.record, 2) expected = { 'index': 1, 'user': { @@ -58,9 +58,9 @@ def test_serialize_revision(self): def test_anon_revisions(self): sessions.sessions[request._get_current_object()] = Session() - utils.update_analytics(self.project, self.record._id, 0) - utils.update_analytics(self.project, self.record._id, 0) - utils.update_analytics(self.project, self.record._id, 2) + utils.update_analytics(self.project, self.record, 0) + utils.update_analytics(self.project, self.record, 0) + utils.update_analytics(self.project, self.record, 2) expected = { 'index': 2, 'user': None, diff --git a/addons/osfstorage/utils.py b/addons/osfstorage/utils.py index 055ac3d32e3..bf49e952c5b 100644 --- a/addons/osfstorage/utils.py +++ b/addons/osfstorage/utils.py @@ -16,24 +16,28 @@ LOCATION_KEYS = ['service', settings.WATERBUTLER_RESOURCE, 'object'] -def update_analytics(node, file_id, version_idx, action='download'): +def update_analytics(node, file, version_idx, action='download'): """ :param Node node: Root node to update :param str file_id: The _id field of a filenode :param int version_idx: Zero-based version index :param str action: is this logged as download or a view """ - # Pass in contributors to check that contributors' downloads + # Pass in contributors and group members to check that their downloads # do not count towards total download count contributors = [] - if node.contributors: + if getattr(node, 'contributors_and_group_members', None): + contributors = node.contributors_and_group_members + elif getattr(node, 'contributors', None): contributors = node.contributors + node_info = { 'contributors': contributors } + resource = node.guids.first() - update_counter('{0}:{1}:{2}'.format(action, node._id, file_id), node_info=node_info) - update_counter('{0}:{1}:{2}:{3}'.format(action, node._id, file_id, version_idx), node_info=node_info) + update_counter(resource, file, version=None, action=action, node_info=node_info) + update_counter(resource, file, version_idx, action, node_info=node_info) def serialize_revision(node, record, version, index, anon=False): @@ -44,7 +48,6 @@ def serialize_revision(node, record, version, index, anon=False): :param FileVersion version: The version to serialize :param int index: One-based index of version """ - if anon: user = None else: diff --git a/addons/osfstorage/views.py b/addons/osfstorage/views.py index b65657f420e..0e0c091e207 100644 --- a/addons/osfstorage/views.py +++ b/addons/osfstorage/views.py @@ -106,8 +106,7 @@ def osfstorage_get_revisions(file_node, payload, target, **kwargs): counter_prefix = 'download:{}:{}:'.format(file_node.target._id, file_node._id) version_count = file_node.versions.count() - # Don't worry. The only % at the end of the LIKE clause, the index is still used - counts = dict(PageCounter.objects.filter(_id__startswith=counter_prefix).values_list('_id', 'total')) + counts = dict(PageCounter.objects.filter(resource=file_node.target.guids.first().id, file=file_node, action='download').values_list('_id', 'total')) qs = FileVersion.includable_objects.filter(basefilenode__id=file_node.id).include('creator__guids').order_by('-created') for i, version in enumerate(qs): @@ -233,7 +232,10 @@ def osfstorage_get_children(file_node, **kwargs): ) CHECKOUT_GUID ON TRUE LEFT JOIN LATERAL ( SELECT P.total AS DOWNLOAD_COUNT FROM osf_pagecounter AS P - WHERE P._id = 'download:' || %s || ':' || F._id + WHERE P.resource_id = %s + AND P.file_id = F.id + AND P.action = 'download' + AND P.version ISNULL LIMIT 1 ) DOWNLOAD_COUNT ON TRUE LEFT JOIN LATERAL ( @@ -268,7 +270,7 @@ def osfstorage_get_children(file_node, **kwargs): AND (NOT F.type IN ('osf.trashedfilenode', 'osf.trashedfile', 'osf.trashedfolder')) """, [ user_content_type_id, - file_node.target._id, + file_node.target.guids.first().id, user_pk, user_pk, user_id, diff --git a/api/meetings/serializers.py b/api/meetings/serializers.py index c44c5165916..09f30e73f02 100644 --- a/api/meetings/serializers.py +++ b/api/meetings/serializers.py @@ -154,7 +154,7 @@ def get_download_link(self, obj): assuming its first file is the meeting submission. """ if getattr(obj, 'file_id', None): - submission_file = OsfStorageFile.objects.get(_id=obj.file_id) + submission_file = OsfStorageFile.objects.get(id=obj.file_id) else: submission_file = self.get_submission_file(obj) diff --git a/api/meetings/views.py b/api/meetings/views.py index e814c042123..7ae1f2c493e 100644 --- a/api/meetings/views.py +++ b/api/meetings/views.py @@ -2,10 +2,14 @@ from rest_framework import generics, permissions as drf_permissions from rest_framework.exceptions import NotFound from django.db.models import Q, Count, Subquery, OuterRef, Case, When, Value, CharField, F +from django.db.models.functions import Coalesce +from django.contrib.contenttypes.models import ContentType +from addons.osfstorage.models import OsfStorageFile from api.base import permissions as base_permissions from api.base.exceptions import InvalidFilterOperator from api.base.filters import ListFilterMixin + from api.base.views import JSONAPIBaseView from api.base.utils import get_object_or_error from api.base.versioning import PrivateVersioning @@ -15,7 +19,7 @@ from framework.auth.oauth_scopes import CoreScopes -from osf.models import AbstractNode, Conference, Contributor, Tag +from osf.models import AbstractNode, Conference, Contributor, Tag, PageCounter from website import settings @@ -111,7 +115,7 @@ class MeetingSubmissionList(BaseMeetingSubmission, generics.ListAPIView, ListFil view_name = 'meeting-submissions' ordering = ('-created', ) # default ordering - ordering_fields = ('title', 'meeting_category', 'author_name', 'created', ) + ordering_fields = ('title', 'meeting_category', 'author_name', 'created', 'download_count',) # overrides ListFilterMixin def get_default_queryset(self): @@ -138,6 +142,7 @@ def build_query_from_field(self, field_name, operation): def annotate_queryset_for_filtering_and_sorting(self, meeting, queryset): queryset = self.annotate_queryset_with_meeting_category(meeting, queryset) queryset = self.annotate_queryset_with_author_name(queryset) + queryset = self.annotate_queryset_with_download_count(queryset) return queryset def annotate_queryset_with_meeting_category(self, meeting, queryset): @@ -186,6 +191,27 @@ def annotate_queryset_with_author_name(self, queryset): ) return queryset + def annotate_queryset_with_download_count(self, queryset): + """ + Annotates queryset with download count of first osfstorage file + """ + pages = PageCounter.objects.filter( + action='download', + resource_id=OuterRef('guids__id'), + file_id=OuterRef('file_id'), + version=None, + ) + + file_subqs = OsfStorageFile.objects.filter( + target_content_type_id=ContentType.objects.get_for_model(AbstractNode), + target_object_id=OuterRef('pk'), + ).order_by('created') + + queryset = queryset.annotate(file_id=Subquery(file_subqs.values('id')[:1])).annotate( + download_count=Coalesce(Subquery(pages.values('total')[:1]), Value(0)), + ) + return queryset + class MeetingSubmissionDetail(BaseMeetingSubmission, generics.RetrieveAPIView, NodeMixin): view_name = 'meeting-submission-detail' diff --git a/api_tests/meetings/views/test_meetings_submissions_detail.py b/api_tests/meetings/views/test_meetings_submissions_detail.py index a00765ef2e7..39a8b4a557c 100644 --- a/api_tests/meetings/views/test_meetings_submissions_detail.py +++ b/api_tests/meetings/views/test_meetings_submissions_detail.py @@ -56,7 +56,15 @@ def file(self, user, meeting_one_submission): return file def mock_download(self, project, file, download_count): - return PageCounter.objects.create(_id='download:{}:{}'.format(project._id, file._id), total=download_count) + pc, _ = PageCounter.objects.get_or_create( + _id='download:{}:{}'.format(project._id, file._id), + resource=project.guids.first(), + action='download', + file=file + ) + pc.total = download_count + pc.save() + return pc def test_meeting_submission_detail(self, app, user, meeting, base_url, meeting_one_submission, meeting_one_private_submission, random_project, meeting_submission_no_category, file): diff --git a/api_tests/meetings/views/test_meetings_submissions_list.py b/api_tests/meetings/views/test_meetings_submissions_list.py index 12d9524e0a4..0e5d3e5b2f9 100644 --- a/api_tests/meetings/views/test_meetings_submissions_list.py +++ b/api_tests/meetings/views/test_meetings_submissions_list.py @@ -92,7 +92,15 @@ def file_three(self, user, meeting_two_third_submission): return file def mock_download(self, project, file, download_count): - return PageCounter.objects.create(_id='download:{}:{}'.format(project._id, file._id), total=download_count) + pc, _ = PageCounter.objects.get_or_create( + _id='download:{}:{}'.format(project._id, file._id), + resource=project.guids.first(), + action='download', + file=file + ) + pc.total = download_count + pc.save() + return pc def test_meeting_submissions_list(self, app, user, meeting, url, meeting_one_submission, meeting_one_private_submission): api_utils.create_test_file(meeting_one_submission, user, create_guid=False) @@ -234,3 +242,19 @@ def test_meeting_submissions_list_sorting_and_filtering(self, app, url_meeting_t data = res.json['data'] assert len(data) == 3 assert [third, second, first] == [meeting['id'] for meeting in data] + + # test sort download count + res = app.get(url_meeting_two + '?sort=download_count') + assert res.status_code == 200 + data = res.json['data'] + assert len(data) == 3 + assert [third, second, first] == [meeting['id'] for meeting in data] + assert [0, 1, 2] == [meeting['attributes']['download_count'] for meeting in data] + + # test reverse sort download count + res = app.get(url_meeting_two + '?sort=-download_count') + assert res.status_code == 200 + data = res.json['data'] + assert len(data) == 3 + assert [first, second, third] == [meeting['id'] for meeting in data] + assert [2, 1, 0] == [meeting['attributes']['download_count'] for meeting in data] diff --git a/docker-compose.yml b/docker-compose.yml index acc3ef5462f..62d0afb345d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -367,9 +367,9 @@ services: - /bin/bash - -c - invoke requirements --all && - (python -m compileall /usr/local/lib/python2.7 || true) && + (python -m compileall /usr/lib/python2.7 || true) && rm -Rf /python2.7/* && - cp -Rf -p /usr/local/lib/python2.7 / + cp -Rf -p /usr/lib/python2.7 / restart: 'no' environment: DJANGO_SETTINGS_MODULE: api.base.settings @@ -385,7 +385,7 @@ services: DJANGO_SETTINGS_MODULE: api.base.settings volumes: - ./:/code:cached - - osf_requirements_vol:/usr/local/lib/python2.7 + - osf_requirements_vol:/usr/lib/python2.7 - osf_bower_components_vol:/code/website/static/vendor/bower_components - osf_node_modules_vol:/code/node_modules stdin_open: true @@ -398,7 +398,7 @@ services: DJANGO_SETTINGS_MODULE: admin.base.settings volumes: - ./:/code:cached - - osf_requirements_vol:/usr/local/lib/python2.7 + - osf_requirements_vol:/usr/lib/python2.7 - osf_node_modules_vol:/code/node_modules # needed due to admin references of ../webpack.<...>.js configurations. - osf_bower_components_vol:/code/website/static/vendor/bower_components - osf_admin_bower_components_vol:/code/admin/static/vendor/bower_components @@ -450,7 +450,7 @@ services: - .docker-compose.env volumes: - ./:/code:cached - - osf_requirements_vol:/usr/local/lib/python2.7 + - osf_requirements_vol:/usr/lib/python2.7 - osf_bower_components_vol:/code/website/static/vendor/bower_components - osf_node_modules_vol:/code/node_modules # - ./ssl/ca-chain.cert.pem:/etc/ssl/certs/ca-chain.cert.pem:ro @@ -475,7 +475,7 @@ services: stdin_open: true volumes: - ./:/code:cached - - osf_requirements_vol:/usr/local/lib/python2.7 + - osf_requirements_vol:/usr/lib/python2.7 - osf_bower_components_vol:/code/website/static/vendor/bower_components - osf_node_modules_vol:/code/node_modules - osf_admin_bower_components_vol:/code/admin/static/vendor/bower_components @@ -497,7 +497,7 @@ services: - .docker-compose.env volumes: - ./:/code:cached - - osf_requirements_vol:/usr/local/lib/python2.7 + - osf_requirements_vol:/usr/lib/python2.7 - osf_bower_components_vol:/code/website/static/vendor/bower_components - osf_node_modules_vol:/code/node_modules stdin_open: true @@ -519,7 +519,7 @@ services: - .docker-compose.sharejs.env volumes: - ./:/code:cached - - osf_requirements_vol:/usr/local/lib/python2.7 + - osf_requirements_vol:/usr/lib/python2.7 - osf_bower_components_vol:/code/website/static/vendor/bower_components - osf_node_modules_vol:/code/node_modules - ember_osf_web_dist_vol:/ember_osf_web diff --git a/framework/analytics/__init__.py b/framework/analytics/__init__.py index 97d64301b4a..c79eb46637b 100644 --- a/framework/analytics/__init__.py +++ b/framework/analytics/__init__.py @@ -20,15 +20,18 @@ def get_total_activity_count(user_id): return UserActivityCounter.get_total_activity_count(user_id) -def update_counter(page, node_info=None): - """Update counters for page. +def update_counter(resource, file, version, action, node_info=None): + """Update counters for resource. - :param str page: Colon-delimited page key in analytics collection + :param obj resource + :param obj file + :param int version + :param str action, ex. 'download' """ from osf.models import PageCounter - return PageCounter.update_counter(page, node_info) + return PageCounter.update_counter(resource, file, version=version, action=action, node_info=node_info) -def get_basic_counters(page): +def get_basic_counters(resource, file, version, action): from osf.models import PageCounter - return PageCounter.get_basic_counters(page) + return PageCounter.get_basic_counters(resource, file, version=version, action=action) diff --git a/osf/migrations/0186_make_pagecounter_fields_nonnull.py b/osf/migrations/0186_make_pagecounter_fields_nonnull.py new file mode 100644 index 00000000000..7476afdb923 --- /dev/null +++ b/osf/migrations/0186_make_pagecounter_fields_nonnull.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.15 on 2019-08-28 20:09 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0185_basefilenode_versions'), + ] + + operations = [ + migrations.AlterField( + model_name='pagecounter', + name='action', + field=models.CharField(max_length=128), + ), + migrations.AlterField( + model_name='pagecounter', + name='file', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pagecounters', to='osf.BaseFileNode'), + ), + migrations.AlterField( + model_name='pagecounter', + name='resource', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='pagecounters', to='osf.Guid'), + ), + ] diff --git a/osf/migrations/0187_remove_outdated_contributor_permissions.py b/osf/migrations/0187_remove_outdated_contributor_permissions.py new file mode 100644 index 00000000000..0b5f746b77b --- /dev/null +++ b/osf/migrations/0187_remove_outdated_contributor_permissions.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.15 on 2019-09-25 20:07 +from __future__ import unicode_literals + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('osf', '0186_make_pagecounter_fields_nonnull'), + ] + + operations = [ + migrations.RemoveField( + model_name='contributor', + name='admin', + ), + migrations.RemoveField( + model_name='contributor', + name='read', + ), + migrations.RemoveField( + model_name='contributor', + name='write', + ), + migrations.RemoveField( + model_name='institutionalcontributor', + name='admin', + ), + migrations.RemoveField( + model_name='institutionalcontributor', + name='read', + ), + migrations.RemoveField( + model_name='institutionalcontributor', + name='write', + ), + ] diff --git a/osf/models/analytics.py b/osf/models/analytics.py index 5385504ae43..034c80bd951 100644 --- a/osf/models/analytics.py +++ b/osf/models/analytics.py @@ -8,7 +8,6 @@ from framework.sessions import session from osf.models.base import BaseModel, Guid -from osf.models.files import BaseFileNode from osf.utils.datetime_aware_jsonfield import DateTimeAwareJSONField logger = logging.getLogger(__name__) @@ -61,18 +60,17 @@ class PageCounter(BaseModel): _id = models.CharField(max_length=300, null=False, blank=False, db_index=True, unique=True) # 272 in prod + date = DateTimeAwareJSONField(default=dict) total = models.PositiveIntegerField(default=0) unique = models.PositiveIntegerField(default=0) - action = models.CharField(max_length=128, null=True, blank=True) - resource = models.ForeignKey(Guid, related_name='pagecounters', null=True, blank=True) - file = models.ForeignKey('osf.BaseFileNode', null=True, blank=True, related_name='pagecounters') + action = models.CharField(max_length=128, null=False, blank=False) + resource = models.ForeignKey(Guid, related_name='pagecounters', null=False, blank=False) + file = models.ForeignKey('osf.BaseFileNode', null=False, blank=False, related_name='pagecounters') version = models.IntegerField(null=True, blank=True) - DOWNLOAD_ALL_VERSIONS_ID_PATTERN = r'^download:[^:]*:{1}[^:]*$' - @classmethod def get_all_downloads_on_date(cls, date): """ @@ -81,9 +79,8 @@ def get_all_downloads_on_date(cls, date): :return: long sum: """ formatted_date = date.strftime('%Y/%m/%d') - # Get all PageCounters with data for the date made for all versions downloads, - # regex insures one colon so all versions are queried. - page_counters = cls.objects.filter(date__has_key=formatted_date, _id__regex=cls.DOWNLOAD_ALL_VERSIONS_ID_PATTERN) + # Get all PageCounters with data for the date made for all versions downloads - don't include specific versions + page_counters = cls.objects.filter(date__has_key=formatted_date, version__isnull=True, action='download') # Get the total download numbers from the nested dict on the PageCounter by annotating it as daily_total then # aggregating the sum. @@ -99,24 +96,13 @@ def clean_page(page): '$', '_' ) - @staticmethod - def deconstruct_id(page): - """ - Backwards compatible code for use in writing to both _id field and - action, resource, file, and version fields simultaneously. - """ - split = page.split(':') - action = split[0] - resource = Guid.load(split[1]) - file = BaseFileNode.load(split[2]) - if len(split) == 3: - version = None + @classmethod + def update_counter(cls, resource, file, version, action, node_info): + if version is not None: + page = '{0}:{1}:{2}:{3}'.format(action, resource._id, file._id, version) else: - version = split[3] - return resource, file, action, version + page = '{0}:{1}:{2}'.format(action, resource._id, file._id) - @classmethod - def update_counter(cls, page, node_info): cleaned_page = cls.clean_page(page) date = timezone.now() date_string = date.strftime('%Y/%m/%d') @@ -125,13 +111,13 @@ def update_counter(cls, page, node_info): # Temporary backwards compat - when creating new PageCounters, temporarily keep writing to _id field. # After we're sure this is stable, we can stop writing to the _id field, and query on # resource/file/action/version - resource, file, action, version = cls.deconstruct_id(cleaned_page) - model_instance, created = PageCounter.objects.select_for_update().get_or_create(_id=cleaned_page) - - model_instance.resource = resource - model_instance.file = file - model_instance.action = action - model_instance.version = version + model_instance, created = cls.objects.select_for_update().get_or_create( + _id=cleaned_page, + resource=resource, + file=file, + action=action, + version=version + ) # if they visited something today if date_string == visited_by_date['date']: @@ -188,9 +174,9 @@ def update_counter(cls, page, node_info): model_instance.save() @classmethod - def get_basic_counters(cls, page): + def get_basic_counters(cls, resource, file, version, action): try: - counter = cls.objects.get(_id=cls.clean_page(page)) + counter = cls.objects.get(resource=resource, file=file, version=version, action=action) return (counter.unique, counter.total) except cls.DoesNotExist: return (None, None) diff --git a/osf/models/contributor.py b/osf/models/contributor.py index 1383bb7e6c0..551d99569d0 100644 --- a/osf/models/contributor.py +++ b/osf/models/contributor.py @@ -32,11 +32,6 @@ def permission(self): class Contributor(AbstractBaseContributor): - # TO BE DELETED (read/write/admin fields) - read = models.BooleanField(default=False) - write = models.BooleanField(default=False) - admin = models.BooleanField(default=False) - node = models.ForeignKey('AbstractNode', on_delete=models.CASCADE) @property @@ -69,11 +64,6 @@ class Meta: class InstitutionalContributor(AbstractBaseContributor): - # TO BE DELETED (read/write/admin fields) - read = models.BooleanField(default=False) - write = models.BooleanField(default=False) - admin = models.BooleanField(default=False) - institution = models.ForeignKey('Institution', on_delete=models.CASCADE) class Meta: diff --git a/osf/models/files.py b/osf/models/files.py index 2f143cf109b..51e41b6a219 100644 --- a/osf/models/files.py +++ b/osf/models/files.py @@ -351,11 +351,7 @@ def get_page_counter_count(self, count_type, version=None): """Assembles a string to retrieve the correct file data from the pagecounter collection, then calls get_basic_counters to retrieve the total count. Limit to version if specified. """ - parts = [count_type, self.target._id, self._id] - if version is not None: - parts.append(version) - page = ':'.join([format(part) for part in parts]) - _, count = get_basic_counters(page) + _, count = get_basic_counters(self.target.guids.first(), self, version=version, action=count_type) return count or 0 diff --git a/osf_tests/test_analytics.py b/osf_tests/test_analytics.py index d3c93822227..7145893c00b 100644 --- a/osf_tests/test_analytics.py +++ b/osf_tests/test_analytics.py @@ -4,7 +4,6 @@ """ import mock -import re import pytest from django.utils import timezone from nose.tools import * # noqa: F403 @@ -13,7 +12,7 @@ from addons.osfstorage.models import OsfStorageFile from framework import analytics -from osf.models import PageCounter +from osf.models import PageCounter, OSFGroup from tests.base import OsfTestCase from osf_tests.factories import UserFactory, ProjectFactory @@ -71,19 +70,22 @@ def file_node3(project): @pytest.fixture() def page_counter(project, file_node): page_counter_id = 'download:{}:{}'.format(project._id, file_node.id) - page_counter, created = PageCounter.objects.get_or_create(_id=page_counter_id, date={u'2018/02/04': {u'total': 41, u'unique': 33}}) + resource = project.guids.first() + page_counter, created = PageCounter.objects.get_or_create(_id=page_counter_id, resource=resource, file=file_node, version=None, action='download', date={u'2018/02/04': {u'total': 41, u'unique': 33}}) return page_counter @pytest.fixture() def page_counter2(project, file_node2): page_counter_id = 'download:{}:{}'.format(project._id, file_node2.id) - page_counter, created = PageCounter.objects.get_or_create(_id=page_counter_id, date={u'2018/02/04': {u'total': 4, u'unique': 26}}) + resource = project.guids.first() + page_counter, created = PageCounter.objects.get_or_create(_id=page_counter_id, resource=resource, file=file_node2, version=None, action='download', date={u'2018/02/04': {u'total': 4, u'unique': 26}}) return page_counter @pytest.fixture() def page_counter_for_individual_version(project, file_node3): - page_counter_id = 'download:{}:{}:0'.format(project._id, file_node3.id) - page_counter, created = PageCounter.objects.get_or_create(_id=page_counter_id, date={u'2018/02/04': {u'total': 1, u'unique': 1}}) + page_counter_id = 'download:{}:{}'.format(project._id, file_node3.id) + resource = project.guids.first() + page_counter, created = PageCounter.objects.get_or_create(_id=page_counter_id, resource=resource, file=file_node3, version=0, action='download', date={u'2018/02/04': {u'total': 1, u'unique': 1}}) return page_counter @@ -93,15 +95,14 @@ class TestPageCounter: @mock.patch('osf.models.analytics.session') def test_download_update_counter(self, mock_session, project, file_node): mock_session.data = {} - page_counter_id = 'download:{}:{}'.format(project._id, file_node.id) + resource = project.guids.first() + PageCounter.update_counter(resource, file_node, version=None, action='download', node_info={}) - PageCounter.update_counter(page_counter_id, {}) - - page_counter = PageCounter.objects.get(_id=page_counter_id) + page_counter = PageCounter.objects.get(resource=resource, file=file_node, version=None, action='download') assert page_counter.total == 1 assert page_counter.unique == 1 - PageCounter.update_counter(page_counter_id, {}) + PageCounter.update_counter(resource, file_node, version=None, action='download', node_info={}) page_counter.refresh_from_db() assert page_counter.total == 2 @@ -110,19 +111,39 @@ def test_download_update_counter(self, mock_session, project, file_node): @mock.patch('osf.models.analytics.session') def test_download_update_counter_contributor(self, mock_session, user, project, file_node): mock_session.data = {'auth_user_id': user._id} - page_counter_id = 'download:{}:{}'.format(project._id, file_node.id) + resource = project.guids.first() + + PageCounter.update_counter(resource, file_node, version=None, action='download', node_info={'contributors': project.contributors}) - PageCounter.update_counter(page_counter_id, {'contributors': project.contributors}) - page_counter = PageCounter.objects.get(_id=page_counter_id) + page_counter = PageCounter.objects.get(resource=resource, file=file_node, version=None, action='download') assert page_counter.total == 0 assert page_counter.unique == 0 - PageCounter.update_counter(page_counter_id, {'contributors': project.contributors}) + PageCounter.update_counter(resource, file_node, version=None, action='download', node_info={'contributors': project.contributors}) page_counter.refresh_from_db() assert page_counter.total == 0 assert page_counter.unique == 0 + platform_group = OSFGroup.objects.create(creator=user, name='Platform') + group_member = UserFactory() + project.add_osf_group(platform_group) + + mock_session.data = {'auth_user_id': group_member._id} + PageCounter.update_counter(resource, file_node, version=None, action='download', node_info={ + 'contributors': project.contributors_and_group_members} + ) + page_counter.refresh_from_db() + assert page_counter.total == 1 + assert page_counter.unique == 1 + + platform_group.make_member(group_member) + PageCounter.update_counter(resource, file_node, version=None, action='download', node_info={ + 'contributors': project.contributors_and_group_members} + ) + assert page_counter.total == 1 + assert page_counter.unique == 1 + def test_get_all_downloads_on_date(self, page_counter, page_counter2): """ This method tests that multiple pagecounter objects have their download totals summed properly. @@ -150,21 +171,3 @@ def test_get_all_downloads_on_date_exclude_versions(self, page_counter, page_cou total_downloads = PageCounter.get_all_downloads_on_date(date) assert total_downloads == 45 - - -class TestPageCounterRegex: - - def test_download_all_versions_regex(self): - # Checks regex to ensure we don't double count versions totals for that file node. - - match = re.match(PageCounter.DOWNLOAD_ALL_VERSIONS_ID_PATTERN, 'bad id') - assert not match - - match = re.match(PageCounter.DOWNLOAD_ALL_VERSIONS_ID_PATTERN, 'views:guid1:fileid') - assert not match - - match = re.match(PageCounter.DOWNLOAD_ALL_VERSIONS_ID_PATTERN, 'download:guid1:fileid:0') - assert not match - - match = re.match(PageCounter.DOWNLOAD_ALL_VERSIONS_ID_PATTERN, 'download:guid1:fileid') - assert match diff --git a/package.json b/package.json index 9417a06eb0b..864d0504cd6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "OSF", - "version": "19.28.0", + "version": "19.29.0", "description": "Facilitating Open Science", "repository": "https://github.com/CenterForOpenScience/osf.io", "author": "Center for Open Science", diff --git a/scripts/tests/test_downloads_summary.py b/scripts/tests/test_downloads_summary.py index 8fdc0c84131..e07b1c700e1 100644 --- a/scripts/tests/test_downloads_summary.py +++ b/scripts/tests/test_downloads_summary.py @@ -7,6 +7,7 @@ from addons.osfstorage import utils from addons.osfstorage.tests.utils import StorageTestCase +from api_tests import utils as api_utils from osf_tests.factories import ProjectFactory @@ -20,10 +21,11 @@ def test_download_count(self): # Keen does not allow same day requests so we have to do some time traveling to my birthday with mock.patch('django.utils.timezone.now', return_value=datetime.datetime(1991, 9, 25).replace(tzinfo=pytz.utc)): node = ProjectFactory() - utils.update_analytics(node, 'fake id', {'contributors': node.contributors}) + file = api_utils.create_test_file( + node, node.creator, filename='file_one') + utils.update_analytics(node, file, 0, 'download') query_date = datetime.date(1991, 9, 25) - event = DownloadCountSummary().get_events(query_date) assert event[0]['files']['total'] == 1 diff --git a/website/conferences/views.py b/website/conferences/views.py index 54880c724aa..4efc2a2528b 100644 --- a/website/conferences/views.py +++ b/website/conferences/views.py @@ -156,9 +156,7 @@ def conference_data(meeting): def conference_submissions_sql(conf): """ Serializes all meeting submissions to a conference (returns array of dictionaries) - :param obj conf: Conference object. - """ submission1_name = conf.field_names['submission1'] submission2_name = conf.field_names['submission2'] @@ -200,7 +198,7 @@ def conference_submissions_sql(conf): LIMIT 1 ) AUTHOR ON TRUE -- Returns first visible contributor LEFT JOIN LATERAL ( - SELECT osf_guid._id + SELECT osf_guid._id, osf_guid.id FROM osf_guid WHERE (osf_guid.object_id = osf_abstractnode.id AND osf_guid.content_type_id = %s) -- Content type for AbstractNode ORDER BY osf_guid.created DESC @@ -225,7 +223,9 @@ def conference_submissions_sql(conf): LEFT JOIN LATERAL ( SELECT P.total AS DOWNLOAD_COUNT FROM osf_pagecounter AS P - WHERE P._id = 'download:' || GUID._id || ':' || FILE._id + WHERE P.action = 'download' + AND P.resource_id = GUID.id + AND P.file_id = FILE.id LIMIT 1 ) DOWNLOAD_COUNT ON TRUE -- Get all the nodes for a specific meeting @@ -234,7 +234,6 @@ def conference_submissions_sql(conf): AND osf_abstractnode.is_public = TRUE AND AUTHOR_GUID IS NOT NULL) ORDER BY osf_abstractnode.created DESC; - """, [ submission1_name, submission1_name, @@ -276,7 +275,6 @@ def serialize_conference(conf): @ember_flag_is_active(features.EMBER_MEETING_DETAIL) def conference_results(meeting): """Return the data for the grid view for a conference. - :param str meeting: Endpoint name for a conference. """ try: