# coding=utf-8 import cStringIO import json import logging import pycurl import certifi import timeout_decorator from django_cron import CronJobBase, Schedule from web.models import ConfigurationItem, StudySubject, Language from web.models.constants import REDCAP_TOKEN_CONFIGURATION_TYPE, \ REDCAP_BASE_URL_CONFIGURATION_TYPE, CRON_JOB_TIMEOUT from web.models.inconsistent_subject import InconsistentField, InconsistentSubject from web.models.missing_subject import MissingSubject RED_CAP_LANGUAGE_4_FIELD = 'dm_language_4' RED_CAP_LANGUAGE_3_FIELD = 'dm_language_3' RED_CAP_LANGUAGE_2_FIELD = 'dm_language_2' RED_CAP_LANGUAGE_1_FIELD = 'dm_language_1' # noinspection SpellCheckingInspection RED_CAP_MPOWER_ID_FIELD = 'dm_mpowerid' RED_CAP_DEAD_FIELD = 'dm_death' # noinspection SpellCheckingInspection RED_CAP_SEX_FIELD = 'cdisc_dm_sex' # noinspection SpellCheckingInspection RED_CAP_DATE_BORN_FIELD = 'cdisc_dm_brthdtc' # noinspection SpellCheckingInspection RED_CAP_ND_NUMBER_FIELD = 'cdisc_dm_usubjd' logger = logging.getLogger(__name__) class RedcapSubject(object): url = None nd_number = None date_born = None sex = None dead = None languages = None mpower_id = None def __init__(self): self.languages = [] def add_language(self, language): if language is not None: self.languages.append(language) def different_string(string1, string2): if string1 is None: string1 = "" if string2 is None: string2 = "" return string1.strip() != string2.strip() class RedcapConnector(object): def __init__(self): self.token = None self.base_url = None items = ConfigurationItem.objects.filter(type=REDCAP_TOKEN_CONFIGURATION_TYPE) if len(items) > 0: if items[0].value: self.token = items[0].value items = ConfigurationItem.objects.filter(type=REDCAP_BASE_URL_CONFIGURATION_TYPE) if len(items) > 0: if items[0].value: self.base_url = items[0].value self.language_by_name = {} languages = Language.objects.all() for language in languages: self.language_by_name[language.name.lower()] = language def find_missing(self): pid = self.get_project_id() redcap_version = self.get_redcap_version() red_cap_subjects = self.get_red_cap_subjects() red_cap_subject_by_nd = {} for subject in red_cap_subjects: red_cap_subject_by_nd[subject.nd_number] = subject smash_subjects = StudySubject.objects.exclude(nd_number='') smash_subject_by_nd = {} for subject in smash_subjects: smash_subject_by_nd[subject.nd_number] = subject result = [] for subject in red_cap_subjects: if smash_subject_by_nd.get(subject.nd_number) is None: url = self.create_redcap_link(pid, redcap_version, subject) result.append(MissingSubject.create(red_cap_subject=subject, smash_subject=None, url=url)) for subject in smash_subjects: if red_cap_subject_by_nd.get(subject.nd_number) is None: result.append(MissingSubject.create(red_cap_subject=None, smash_subject=subject)) return result @staticmethod def add_missing(missing_subjects): MissingSubject.objects.filter(ignore=False).delete() ignored_missing_subjects = MissingSubject.objects.all() ignored_redcap_by_nd_number = {} ignored_smash_by_nd_number = {} for missing_subject in ignored_missing_subjects: if missing_subject.redcap_id is not None: ignored_redcap_by_nd_number[missing_subject.redcap_id] = missing_subject if missing_subject.subject is not None: ignored_smash_by_nd_number[missing_subject.subject.nd_number] = missing_subject for missing_subject in missing_subjects: ignored = False if missing_subject.redcap_id is not None and ignored_redcap_by_nd_number.get( missing_subject.redcap_id) is not None: ignored = True if missing_subject.subject is not None and ignored_smash_by_nd_number.get( missing_subject.subject.nd_number) is not None: ignored = True if not ignored: MissingSubject.objects.create(subject=missing_subject.subject, redcap_id=missing_subject.redcap_id, redcap_url=missing_subject.redcap_url) @staticmethod def add_inconsistent(inconsistent_subjects): InconsistentField.objects.all().delete() InconsistentSubject.objects.all().delete() for inconsistent_subject in inconsistent_subjects: subject = InconsistentSubject.objects.create(subject=inconsistent_subject.subject, redcap_url=inconsistent_subject.redcap_url) for field in inconsistent_subject.fields: InconsistentField.objects.create( name=field.name, smash_value=field.smash_value, redcap_value=field.redcap_value, inconsistent_subject=subject) def refresh_missing(self): missing = self.find_missing() self.add_missing(missing) def refresh_inconsistent(self): inconsistent = self.find_inconsistent() self.add_inconsistent(inconsistent) def find_inconsistent(self): pid = self.get_project_id() redcap_version = self.get_redcap_version() red_cap_subjects = self.get_red_cap_subjects() red_cap_subject_by_nd = {} for subject in red_cap_subjects: red_cap_subject_by_nd[subject.nd_number] = subject smash_subjects = StudySubject.objects.exclude(nd_number='') result = [] for subject in smash_subjects: red_cap_subject = red_cap_subject_by_nd.get(subject.nd_number) if red_cap_subject is not None: url = self.create_redcap_link(pid, redcap_version, subject) subject = self.create_inconsistency_subject(red_cap_subject, subject, url) if subject is not None: result.append(subject) return result @staticmethod def create_inconsistency_subject(red_cap_subject, study_subject, url): fields = [] if study_subject.subject.sex != red_cap_subject.sex: field = InconsistentField.create("sex", study_subject.subject.sex, red_cap_subject.sex) fields.append(field) subject_date_born = "" if study_subject.subject.date_born is not None: subject_date_born = study_subject.subject.date_born.strftime('%Y-%m-%d') redcap_subject_date_born = red_cap_subject.date_born if redcap_subject_date_born is None: redcap_subject_date_born = "" if len(redcap_subject_date_born) > 10: redcap_subject_date_born = redcap_subject_date_born[:10] if subject_date_born != redcap_subject_date_born: field = InconsistentField.create("date of birth", subject_date_born, redcap_subject_date_born) fields.append(field) if study_subject.subject.dead != red_cap_subject.dead: field = InconsistentField.create("dead", str(study_subject.subject.dead), str(red_cap_subject.dead)) fields.append(field) if different_string(study_subject.mpower_id, red_cap_subject.mpower_id): field = InconsistentField.create("mpower id", study_subject.mpower_id, red_cap_subject.mpower_id) fields.append(field) missing_language = False if len(red_cap_subject.languages) < 4: for language in study_subject.subject.languages.all(): if language not in red_cap_subject.languages: missing_language = True for language in red_cap_subject.languages: if language not in study_subject.subject.languages.all(): missing_language = True if missing_language: subject_languages = "" for language in study_subject.subject.languages.all(): subject_languages += language.name + ", " red_cap_subject_languages = "" for language in red_cap_subject.languages: red_cap_subject_languages += language.name + ", " field = InconsistentField.create("languages", subject_languages, red_cap_subject_languages) fields.append(field) result = None if len(fields) > 0: result = InconsistentSubject.create(smash_subject=study_subject, url=url, fields=fields) return result def create_redcap_link(self, pid, redcap_version, subject): return self.base_url + "/redcap_v" + redcap_version + "/DataEntry/index.php?pid=" + str(pid) + "&id=" + \ subject.nd_number + "&page=demographics" def get_red_cap_subjects(self): query_data = { 'token': self.token, 'content': 'record', 'format': 'json', 'type': 'flat', 'fields[0]': RED_CAP_DATE_BORN_FIELD, 'fields[1]': RED_CAP_SEX_FIELD, 'fields[2]': RED_CAP_ND_NUMBER_FIELD, 'fields[3]': RED_CAP_DEAD_FIELD, 'fields[4]': RED_CAP_LANGUAGE_1_FIELD, 'fields[5]': RED_CAP_LANGUAGE_2_FIELD, 'fields[6]': RED_CAP_LANGUAGE_3_FIELD, 'fields[7]': RED_CAP_LANGUAGE_4_FIELD, 'fields[8]': RED_CAP_MPOWER_ID_FIELD, 'events[0]': 'visit_1_arm_1', 'rawOrLabel': 'label', 'rawOrLabelHeaders': 'raw', 'exportCheckboxLabel': 'false', 'exportSurveyFields': 'false', 'exportDataAccessGroups': 'false', 'returnFormat': 'json' } data = self.execute_query(query_data) result = [] for row in data: redcap_subject = RedcapSubject() redcap_subject.nd_number = row[RED_CAP_ND_NUMBER_FIELD] redcap_subject.date_born = row[RED_CAP_DATE_BORN_FIELD] redcap_subject.sex = row[RED_CAP_SEX_FIELD] redcap_subject.dead = (row[RED_CAP_DEAD_FIELD].lower() == "yes") redcap_subject.mpower_id = row[RED_CAP_MPOWER_ID_FIELD] if row[RED_CAP_LANGUAGE_1_FIELD]: redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_1_FIELD])) if row[RED_CAP_LANGUAGE_2_FIELD]: redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_2_FIELD])) if row[RED_CAP_LANGUAGE_3_FIELD]: redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_3_FIELD])) if row[RED_CAP_LANGUAGE_4_FIELD]: redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_4_FIELD])) result.append(redcap_subject) return result def get_language(self, name): language = self.language_by_name.get(name.lower()) if language is None: logger.warn("Unknown language: " + name) return language def execute_query(self, query_data, is_json=True): buf = cStringIO.StringIO() curl_connection = pycurl.Curl() curl_connection.setopt(pycurl.CAINFO, certifi.where()) curl_connection.setopt(curl_connection.URL, self.base_url + "/api/") curl_connection.setopt(curl_connection.HTTPPOST, query_data.items()) curl_connection.setopt(curl_connection.WRITEFUNCTION, buf.write) curl_connection.perform() curl_connection.close() if is_json: data = json.loads(buf.getvalue()) else: data = buf.getvalue() buf.close() return data def get_project_id(self): query_data = { 'token': self.token, 'content': 'project', 'format': 'json', 'returnFormat': 'json' } data = self.execute_query(query_data) return data['project_id'] def get_redcap_version(self): query_data = { 'token': self.token, 'content': 'version' } data = self.execute_query(query_data, is_json=False) return data def is_valid(self): if not self.token: return False if not self.base_url: return False return True class RedCapRefreshJob(CronJobBase): RUN_EVERY_MINUTES = 60 schedule = Schedule(run_every_mins=RUN_EVERY_MINUTES) code = 'web.red_cap_hourly_refresh' # a unique code @timeout_decorator.timeout(CRON_JOB_TIMEOUT) def do(self): connector = RedcapConnector() if connector.is_valid(): logger.info("Refreshing redcap data") connector.refresh_inconsistent() connector.refresh_missing() logger.info("Redcap data refreshed") return "ok" else: logger.info("Redcap connector is down") return "connector down"