Skip to content
Snippets Groups Projects
Commit d7a36120 authored by Valentin Groues's avatar Valentin Groues :eyes:
Browse files

Merge branch 'fix/many_queries_export' into 'master'

reduce the number of queries in export subjects from 63k to 9k and in...

See merge request !459
parents 09a5beaa 29c45d06
No related branches found
Tags 1.4.3
1 merge request!459reduce the number of queries in export subjects from 63k to 9k and in...
Pipeline #91304 passed
......@@ -4,6 +4,7 @@ smasch (1.4.3-1) stable; urgency=medium
* make API endpoint /daily_planning/workers/availabilities faster
* make API endpoint /api/appointments/{} use less queries
* move copyright note to settings
* make /redcap/inconsistent_subjects slightly faster
-- Carlos Vega <carlos.vega@lih.lu> Tue, 14 May 2024 10:07:00 +0200
......
......@@ -9,8 +9,7 @@ from django.db.models.signals import post_save
from django.dispatch import receiver
from web.models import Appointment, Location, Provenance, Visit, VoucherType
from web.models.constants import BOOL_CHOICES, FILE_STORAGE
from web.models.custom_data import (CustomStudySubjectField,
CustomStudySubjectValue)
from web.models.custom_data import CustomStudySubjectField, CustomStudySubjectValue
logger = logging.getLogger(__name__)
......@@ -32,9 +31,7 @@ class StudySubject(models.Model):
visit.save()
def finish_all_appointments(self):
appointments = Appointment.objects.filter(
visit__subject=self, status=Appointment.APPOINTMENT_STATUS_SCHEDULED
)
appointments = Appointment.objects.filter(visit__subject=self, status=Appointment.APPOINTMENT_STATUS_SCHEDULED)
for appointment in appointments:
appointment.status = Appointment.APPOINTMENT_STATUS_CANCELLED
appointment.save()
......@@ -70,9 +67,7 @@ class StudySubject(models.Model):
on_delete=models.CASCADE,
)
postponed = models.BooleanField(
choices=BOOL_CHOICES, verbose_name="Postponed", default=False
)
postponed = models.BooleanField(choices=BOOL_CHOICES, verbose_name="Postponed", default=False)
datetime_contact_reminder = models.DateTimeField(
null=True,
blank=True,
......@@ -111,9 +106,7 @@ class StudySubject(models.Model):
on_delete=models.SET_NULL,
)
screening_number = models.CharField(
max_length=50, verbose_name="Screening number", blank=True, null=True
)
screening_number = models.CharField(max_length=50, verbose_name="Screening number", blank=True, null=True)
nd_number = models.CharField(
max_length=25,
blank=True,
......@@ -121,9 +114,7 @@ class StudySubject(models.Model):
)
comments = models.TextField(max_length=2000, blank=True, verbose_name="Comments")
date_added = models.DateField(verbose_name="Added on", auto_now_add=True)
referral = models.CharField(
max_length=128, null=True, blank=True, verbose_name="Referred by"
)
referral = models.CharField(max_length=128, null=True, blank=True, verbose_name="Referred by")
referral_letter = models.FileField(
storage=FILE_STORAGE,
upload_to="referral_letters",
......@@ -145,32 +136,16 @@ class StudySubject(models.Model):
default=False,
)
voucher_types = models.ManyToManyField(
VoucherType, blank=True, verbose_name="Voucher types"
)
voucher_types = models.ManyToManyField(VoucherType, blank=True, verbose_name="Voucher types")
information_sent = models.BooleanField(
verbose_name="Information sent", default=False
)
information_sent = models.BooleanField(verbose_name="Information sent", default=False)
resigned = models.BooleanField(
verbose_name="Resigned", default=False, editable=True
)
resign_reason = models.TextField(
max_length=2000, blank=True, verbose_name="Resign reason"
)
excluded = models.BooleanField(
verbose_name="Excluded", default=False, editable=True
)
exclude_reason = models.TextField(
max_length=2000, blank=True, verbose_name="Exclude reason"
)
endpoint_reached = models.BooleanField(
verbose_name="Endpoint Reached", default=False, editable=True
)
endpoint_reached_reason = models.TextField(
max_length=2000, blank=True, verbose_name="Endpoint reached comments"
)
resigned = models.BooleanField(verbose_name="Resigned", default=False, editable=True)
resign_reason = models.TextField(max_length=2000, blank=True, verbose_name="Resign reason")
excluded = models.BooleanField(verbose_name="Excluded", default=False, editable=True)
exclude_reason = models.TextField(max_length=2000, blank=True, verbose_name="Exclude reason")
endpoint_reached = models.BooleanField(verbose_name="Endpoint Reached", default=False, editable=True)
endpoint_reached_reason = models.TextField(max_length=2000, blank=True, verbose_name="Endpoint reached comments")
def sort_matched_screening_first(self, pattern, reverse=False):
if self.screening_number is None:
......@@ -186,9 +161,7 @@ class StudySubject(models.Model):
letter, number = chunks
try:
tupl = (letter, int(number))
except (
ValueError
): # better than isdigit because isdigit fails with negative numbers and others
except ValueError: # better than isdigit because isdigit fails with negative numbers and others
tupl = (letter, number)
else:
logger.warning(
......@@ -222,9 +195,7 @@ class StudySubject(models.Model):
return True
def can_schedule(self):
return not any(
[self.resigned, self.excluded, self.endpoint_reached, self.subject.dead]
)
return not any([self.resigned, self.excluded, self.endpoint_reached, self.subject.dead])
@property
def status(self):
......@@ -243,23 +214,25 @@ class StudySubject(models.Model):
def custom_data_values(self):
# find the custom fields that have not yet been populated into the study subject
# https://docs.djangoproject.com/en/3.2/ref/models/querysets/#filteredrelation-objects
fields = CustomStudySubjectField.objects.annotate(
t=FilteredRelation(
"customstudysubjectvalue",
condition=Q(customstudysubjectvalue__study_subject=self),
fields = (
CustomStudySubjectField.objects
.annotate(
t=FilteredRelation(
"customstudysubjectvalue",
condition=Q(customstudysubjectvalue__study_subject=self),
)
)
).filter(t__study_subject_field__isnull=True, study=self.study)
.filter(t__study_subject_field__isnull=True, study=self.study)
)
for field in fields:
CustomStudySubjectValue.objects.create(
study_subject=self, value=field.default_value, study_subject_field=field
)
return CustomStudySubjectValue.objects.filter(study_subject=self).prefetch_related('study_subject_field')
return CustomStudySubjectValue.objects.filter(study_subject=self).prefetch_related("study_subject_field")
def set_custom_data_value(
self, custom_study_subject_field: CustomStudySubjectField, value: str
):
def set_custom_data_value(self, custom_study_subject_field: CustomStudySubjectField, value: str):
found = False
for existing_value in self.customstudysubjectvalue_set.all():
if existing_value.study_subject_field == custom_study_subject_field:
......@@ -279,9 +252,7 @@ class StudySubject(models.Model):
# pylint: disable-next=C0209
return "%s %s" % (self.subject.first_name, self.subject.last_name)
def get_custom_data_value(
self, custom_field: CustomStudySubjectField
) -> Optional[CustomStudySubjectValue]:
def get_custom_data_value(self, custom_field: CustomStudySubjectField) -> Optional[CustomStudySubjectValue]:
for value in self.custom_data_values:
if value.study_subject_field == custom_field:
return value
......@@ -302,9 +273,7 @@ class StudySubject(models.Model):
# SIGNALS
@receiver(post_save, sender=StudySubject)
def set_as_resigned_or_excluded_or_endpoint_reached(
sender, instance, **kwargs
): # pylint: disable=unused-argument
def set_as_resigned_or_excluded_or_endpoint_reached(sender, instance, **kwargs): # pylint: disable=unused-argument
if instance.excluded:
instance.mark_as_excluded()
if instance.resigned:
......
......@@ -12,8 +12,9 @@ from django.contrib.auth import get_user_model
from django.forms.models import model_to_dict
from django_cron import CronJobBase, Schedule
from six import ensure_str
from collections import defaultdict
from web.models import ConfigurationItem, StudySubject, Language, AppointmentType, Appointment, Visit, Study, \
from web.models import ConfigurationItem, StudySubject, Subject, Language, AppointmentType, Appointment, Visit, Study, \
Provenance, Worker
from web.models.constants import REDCAP_TOKEN_CONFIGURATION_TYPE, \
REDCAP_BASE_URL_CONFIGURATION_TYPE, CRON_JOB_TIMEOUT, RED_CAP_LANGUAGE_4_FIELD_TYPE, \
......@@ -67,18 +68,20 @@ class RedcapVisit:
def different_string(string1, string2):
if isinstance(string1, bytes):
s1 = ""
s2 = ""
try:
# concat should raise the typeError if string1 is bytes
s1 = string1 + "" if string1 else ""
except TypeError:
s1 = string1.decode('utf8')
else:
s1 = string1
if isinstance(string2, bytes):
s2 = string1.decode('utf8')
else:
s2 = string2
if s1 is None:
s1 = ""
if s2 is None:
s2 = ""
try:
s2 = string2 + "" if string2 else ""
except TypeError:
s2 = string2.decode('utf8')
return s1.strip() != s2.strip()
......@@ -134,6 +137,11 @@ class RedcapConnector:
else:
self.importer_user = Worker.objects.filter(user=user)
table = Subject.languages.through.objects.all().prefetch_related('language')
self.subjects_languages = defaultdict(set)
for element in table:
self.subjects_languages[element.subject_id].add(element.language)
def find_missing(self):
pid = self.get_project_id()
redcap_version = self.get_redcap_version()
......@@ -223,7 +231,16 @@ class RedcapConnector:
for subject in red_cap_subjects:
red_cap_subject_by_nd[subject.nd_number] = subject
smash_subjects = StudySubject.objects.exclude(nd_number='')
smash_subjects = StudySubject.objects.exclude(nd_number='').prefetch_related(
"subject",
"study",
"type",
"flying_team",
"default_location",
"subject__languages",
"subject__country",
"subject__default_written_communication_language"
)
result = []
for subject in smash_subjects:
......@@ -302,13 +319,12 @@ class RedcapConnector:
subject.save()
return result
@staticmethod
def check_sex_consistency(red_cap_subject, study_subject):
# pylint: disable=R6301
def check_sex_consistency(self, red_cap_subject, study_subject):
if study_subject.subject.sex != red_cap_subject.sex:
return InconsistentField.create("sex", study_subject.subject.sex, red_cap_subject.sex)
@staticmethod
def check_birth_date_consistency(red_cap_subject, study_subject):
def check_birth_date_consistency(self, red_cap_subject, study_subject):
subject_date_born = ""
if study_subject.subject.date_born is not None:
subject_date_born = study_subject.subject.date_born.strftime('%Y-%m-%d')
......@@ -320,39 +336,36 @@ class RedcapConnector:
if subject_date_born != redcap_subject_date_born:
return InconsistentField.create("date of birth", subject_date_born, redcap_subject_date_born)
@staticmethod
def check_dead_consistency(red_cap_subject, study_subject):
def check_dead_consistency(self, red_cap_subject, study_subject):
if study_subject.subject.dead != red_cap_subject.dead:
return InconsistentField.create("dead", str(study_subject.subject.dead), str(red_cap_subject.dead))
@staticmethod
def check_mpower_id_consistency(red_cap_subject: RedcapSubject, study_subject: StudySubject) \
def check_mpower_id_consistency(self, red_cap_subject: RedcapSubject, study_subject: StudySubject) \
-> Optional[InconsistentField]:
if different_string(study_subject.get_custom_field_value('MPower ID'), red_cap_subject.mpower_id):
return InconsistentField.create("mpower id", study_subject.get_custom_field_value('MPower ID'),
red_cap_subject.mpower_id)
@staticmethod
def check_languages_consistency(red_cap_subject, study_subject):
def check_languages_consistency(self, red_cap_subject, study_subject):
missing_language = False
s_languages = self.subjects_languages[study_subject.subject.id]
if len(red_cap_subject.languages) < 4:
for language in study_subject.subject.languages.all():
for language in s_languages:
if language not in red_cap_subject.languages:
missing_language = True
for language in red_cap_subject.languages:
if language not in study_subject.subject.languages.all():
if language not in s_languages:
missing_language = True
if missing_language:
subject_languages = ""
for language in study_subject.subject.languages.all():
for language in s_languages:
subject_languages += language.name + ", "
red_cap_subject_languages = ""
for language in red_cap_subject.languages:
red_cap_subject_languages += language.name + ", "
return InconsistentField.create("languages", subject_languages, red_cap_subject_languages)
@staticmethod
def create_inconsistency_subject(red_cap_subject: RedcapSubject, study_subject: StudySubject,
def create_inconsistency_subject(self, red_cap_subject: RedcapSubject, study_subject: StudySubject,
url: str) -> RedcapInconsistentSubject:
# func dict
field_checks = {
......@@ -369,7 +382,7 @@ class RedcapConnector:
fields_to_check = [k for k, v in model_to_dict(study_subject.study.redcap_columns).items() if v is True]
for field_to_check in fields_to_check:
field = field_checks[field_to_check](red_cap_subject, study_subject)
field = field_checks[field_to_check](self, red_cap_subject, study_subject)
if field is not None:
fields.append(field)
......
......@@ -22,11 +22,7 @@ def export_to_csv(request, study_id, data_type="subjects"):
selected_fields = request.GET.get("fields", None)
response = HttpResponse(content_type="text/csv; charset=utf-8")
response["Content-Disposition"] = (
'attachment; filename="'
+ data_type
+ "-"
+ get_today_midnight_date().strftime("%Y-%m-%d")
+ '.csv"'
'attachment; filename="' + data_type + "-" + get_today_midnight_date().strftime("%Y-%m-%d") + '.csv"'
)
if data_type == "subjects":
......@@ -92,12 +88,8 @@ class CustomField:
DROP_OUT_FIELD = CustomField({"verbose_name": "DROP OUT", "name": "custom-drop-out"})
APPOINTMENT_TYPE_FIELD = CustomField(
{"name": "appointment_types", "verbose_name": "Appointment Types"}
)
STUDY_SUBJECT_FIELDS = [
CustomField({"name": "nd_number", "verbose_name": "Subject number"})
]
APPOINTMENT_TYPE_FIELD = CustomField({"name": "appointment_types", "verbose_name": "Appointment Types"})
STUDY_SUBJECT_FIELDS = [CustomField({"name": "nd_number", "verbose_name": "Subject number"})]
SUBJECT_FIELDS = [
CustomField({"name": "last_name", "verbose_name": "Family name"}),
......@@ -144,7 +136,16 @@ def get_subjects_as_array(study: Study, selected_fields: str = None):
field_names = [field.verbose_name for field in subject_fields] # faster than loop
result.append(field_names)
subjects = StudySubject.objects.order_by("-subject__last_name")
subjects = StudySubject.objects.order_by("-subject__last_name").prefetch_related(
"subject",
"study",
"type",
"flying_team",
"default_location",
"subject__languages",
"subject__country",
"subject__default_written_communication_language",
)
subject2row = subject_to_row_for_fields_processor(study, subject_fields)
for subject in subjects:
row = subject2row.subject_to_row_for_fields(subject)
......@@ -233,13 +234,7 @@ def get_appointment_fields():
):
appointments_fields.append(field)
all_fields = (
STUDY_SUBJECT_FIELDS
+ SUBJECT_FIELDS
+ VISIT_FIELDS
+ appointments_fields
+ [APPOINTMENT_TYPE_FIELD]
)
all_fields = STUDY_SUBJECT_FIELDS + SUBJECT_FIELDS + VISIT_FIELDS + appointments_fields + [APPOINTMENT_TYPE_FIELD]
return all_fields, appointments_fields
......@@ -248,14 +243,14 @@ def get_appointments_as_array(selected_fields=None):
result = []
all_fields, appointments_fields = get_appointment_fields()
all_fields = filter_fields_from_selected_fields(all_fields, selected_fields)
appointments_fields = filter_fields_from_selected_fields(
appointments_fields, selected_fields
)
appointments_fields = filter_fields_from_selected_fields(appointments_fields, selected_fields)
field_names = [field.verbose_name for field in all_fields] # faster than loop
result.append(field_names)
appointments = Appointment.objects.order_by("-datetime_when")
appointments = Appointment.objects.order_by("-datetime_when").prefetch_related(
"visit", "visit__subject", "visit__subject__subject", "worker_assigned", "location", "appointment_types"
)
for appointment in appointments:
# add field_names ['ND number', 'Family name', 'Name', 'Visit'] first
......@@ -282,12 +277,7 @@ def get_appointments_as_array(selected_fields=None):
row.append(getattr(appointment, field.name))
if APPOINTMENT_TYPE_FIELD.verbose_name in field_names:
# avoid last comma in the list of appointment types
type_string = ",".join(
[
appointment_type.code
for appointment_type in appointment.appointment_types.all()
]
)
type_string = ",".join([appointment_type.code for appointment_type in appointment.appointment_types.all()])
row.append(type_string)
result.append([str(s).replace("\n", ";").replace("\r", ";") for s in row])
return result
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment