-
Piotr Gawron authoredPiotr Gawron authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
redcap_connector.py 12.91 KiB
# coding=utf-8
import cStringIO
import json
import logging
import pycurl
import certifi
import timeout_decorator
from django_cron import CronJobBase, Schedule
from web.models import ConfigurationItem, StudySubject, Language
from web.models.constants import REDCAP_TOKEN_CONFIGURATION_TYPE, \
REDCAP_BASE_URL_CONFIGURATION_TYPE, CRON_JOB_TIMEOUT
from web.models.inconsistent_subject import InconsistentField, InconsistentSubject
from web.models.missing_subject import MissingSubject
RED_CAP_LANGUAGE_4_FIELD = 'dm_language_4'
RED_CAP_LANGUAGE_3_FIELD = 'dm_language_3'
RED_CAP_LANGUAGE_2_FIELD = 'dm_language_2'
RED_CAP_LANGUAGE_1_FIELD = 'dm_language_1'
# noinspection SpellCheckingInspection
RED_CAP_MPOWER_ID_FIELD = 'dm_mpowerid'
RED_CAP_DEAD_FIELD = 'dm_death'
# noinspection SpellCheckingInspection
RED_CAP_SEX_FIELD = 'cdisc_dm_sex'
# noinspection SpellCheckingInspection
RED_CAP_DATE_BORN_FIELD = 'cdisc_dm_brthdtc'
# noinspection SpellCheckingInspection
RED_CAP_ND_NUMBER_FIELD = 'cdisc_dm_usubjd'
logger = logging.getLogger(__name__)
class RedcapSubject(object):
url = None
nd_number = None
date_born = None
sex = None
dead = None
languages = None
mpower_id = None
def __init__(self):
self.languages = []
def add_language(self, language):
if language is not None:
self.languages.append(language)
def different_string(string1, string2):
if string1 is None:
string1 = ""
if string2 is None:
string2 = ""
return string1.strip() != string2.strip()
class RedcapConnector(object):
def __init__(self):
self.token = None
self.base_url = None
items = ConfigurationItem.objects.filter(type=REDCAP_TOKEN_CONFIGURATION_TYPE)
if len(items) > 0:
if items[0].value:
self.token = items[0].value
items = ConfigurationItem.objects.filter(type=REDCAP_BASE_URL_CONFIGURATION_TYPE)
if len(items) > 0:
if items[0].value:
self.base_url = items[0].value
self.language_by_name = {}
languages = Language.objects.all()
for language in languages:
self.language_by_name[language.name.lower()] = language
def find_missing(self):
pid = self.get_project_id()
redcap_version = self.get_redcap_version()
red_cap_subjects = self.get_red_cap_subjects()
red_cap_subject_by_nd = {}
for subject in red_cap_subjects:
red_cap_subject_by_nd[subject.nd_number] = subject
smash_subjects = StudySubject.objects.exclude(nd_number='')
smash_subject_by_nd = {}
for subject in smash_subjects:
smash_subject_by_nd[subject.nd_number] = subject
result = []
for subject in red_cap_subjects:
if smash_subject_by_nd.get(subject.nd_number) is None:
url = self.create_redcap_link(pid, redcap_version, subject)
result.append(MissingSubject.create(red_cap_subject=subject, smash_subject=None, url=url))
for subject in smash_subjects:
if red_cap_subject_by_nd.get(subject.nd_number) is None:
result.append(MissingSubject.create(red_cap_subject=None, smash_subject=subject))
return result
@staticmethod
def add_missing(missing_subjects):
MissingSubject.objects.filter(ignore=False).delete()
ignored_missing_subjects = MissingSubject.objects.all()
ignored_redcap_by_nd_number = {}
ignored_smash_by_nd_number = {}
for missing_subject in ignored_missing_subjects:
if missing_subject.redcap_id is not None:
ignored_redcap_by_nd_number[missing_subject.redcap_id] = missing_subject
if missing_subject.subject is not None:
ignored_smash_by_nd_number[missing_subject.subject.nd_number] = missing_subject
for missing_subject in missing_subjects:
ignored = False
if missing_subject.redcap_id is not None and ignored_redcap_by_nd_number.get(
missing_subject.redcap_id) is not None:
ignored = True
if missing_subject.subject is not None and ignored_smash_by_nd_number.get(
missing_subject.subject.nd_number) is not None:
ignored = True
if not ignored:
MissingSubject.objects.create(subject=missing_subject.subject, redcap_id=missing_subject.redcap_id,
redcap_url=missing_subject.redcap_url)
@staticmethod
def add_inconsistent(inconsistent_subjects):
InconsistentField.objects.all().delete()
InconsistentSubject.objects.all().delete()
for inconsistent_subject in inconsistent_subjects:
subject = InconsistentSubject.objects.create(subject=inconsistent_subject.subject,
redcap_url=inconsistent_subject.redcap_url)
for field in inconsistent_subject.fields:
InconsistentField.objects.create(
name=field.name,
smash_value=field.smash_value,
redcap_value=field.redcap_value,
inconsistent_subject=subject)
def refresh_missing(self):
missing = self.find_missing()
self.add_missing(missing)
def refresh_inconsistent(self):
inconsistent = self.find_inconsistent()
self.add_inconsistent(inconsistent)
def find_inconsistent(self):
pid = self.get_project_id()
redcap_version = self.get_redcap_version()
red_cap_subjects = self.get_red_cap_subjects()
red_cap_subject_by_nd = {}
for subject in red_cap_subjects:
red_cap_subject_by_nd[subject.nd_number] = subject
smash_subjects = StudySubject.objects.exclude(nd_number='')
result = []
for subject in smash_subjects:
red_cap_subject = red_cap_subject_by_nd.get(subject.nd_number)
if red_cap_subject is not None:
url = self.create_redcap_link(pid, redcap_version, subject)
subject = self.create_inconsistency_subject(red_cap_subject, subject, url)
if subject is not None:
result.append(subject)
return result
@staticmethod
def create_inconsistency_subject(red_cap_subject, study_subject, url):
fields = []
if study_subject.subject.sex != red_cap_subject.sex:
field = InconsistentField.create("sex", study_subject.subject.sex, red_cap_subject.sex)
fields.append(field)
subject_date_born = ""
if study_subject.subject.date_born is not None:
subject_date_born = study_subject.subject.date_born.strftime('%Y-%m-%d')
redcap_subject_date_born = red_cap_subject.date_born
if redcap_subject_date_born is None:
redcap_subject_date_born = ""
if len(redcap_subject_date_born) > 10:
redcap_subject_date_born = redcap_subject_date_born[:10]
if subject_date_born != redcap_subject_date_born:
field = InconsistentField.create("date of birth", subject_date_born, redcap_subject_date_born)
fields.append(field)
if study_subject.subject.dead != red_cap_subject.dead:
field = InconsistentField.create("dead", str(study_subject.subject.dead), str(red_cap_subject.dead))
fields.append(field)
if different_string(study_subject.mpower_id, red_cap_subject.mpower_id):
field = InconsistentField.create("mpower id", study_subject.mpower_id, red_cap_subject.mpower_id)
fields.append(field)
missing_language = False
if len(red_cap_subject.languages) < 4:
for language in study_subject.subject.languages.all():
if language not in red_cap_subject.languages:
missing_language = True
for language in red_cap_subject.languages:
if language not in study_subject.subject.languages.all():
missing_language = True
if missing_language:
subject_languages = ""
for language in study_subject.subject.languages.all():
subject_languages += language.name + ", "
red_cap_subject_languages = ""
for language in red_cap_subject.languages:
red_cap_subject_languages += language.name + ", "
field = InconsistentField.create("languages", subject_languages, red_cap_subject_languages)
fields.append(field)
result = None
if len(fields) > 0:
result = InconsistentSubject.create(smash_subject=study_subject, url=url, fields=fields)
return result
def create_redcap_link(self, pid, redcap_version, subject):
return self.base_url + "/redcap_v" + redcap_version + "/DataEntry/index.php?pid=" + str(pid) + "&id=" + \
subject.nd_number + "&page=demographics"
def get_red_cap_subjects(self):
query_data = {
'token': self.token,
'content': 'record',
'format': 'json',
'type': 'flat',
'fields[0]': RED_CAP_DATE_BORN_FIELD,
'fields[1]': RED_CAP_SEX_FIELD,
'fields[2]': RED_CAP_ND_NUMBER_FIELD,
'fields[3]': RED_CAP_DEAD_FIELD,
'fields[4]': RED_CAP_LANGUAGE_1_FIELD,
'fields[5]': RED_CAP_LANGUAGE_2_FIELD,
'fields[6]': RED_CAP_LANGUAGE_3_FIELD,
'fields[7]': RED_CAP_LANGUAGE_4_FIELD,
'fields[8]': RED_CAP_MPOWER_ID_FIELD,
'events[0]': 'visit_1_arm_1',
'rawOrLabel': 'label',
'rawOrLabelHeaders': 'raw',
'exportCheckboxLabel': 'false',
'exportSurveyFields': 'false',
'exportDataAccessGroups': 'false',
'returnFormat': 'json'
}
data = self.execute_query(query_data)
result = []
for row in data:
redcap_subject = RedcapSubject()
redcap_subject.nd_number = row[RED_CAP_ND_NUMBER_FIELD]
redcap_subject.date_born = row[RED_CAP_DATE_BORN_FIELD]
redcap_subject.sex = row[RED_CAP_SEX_FIELD]
redcap_subject.dead = (row[RED_CAP_DEAD_FIELD].lower() == "yes")
redcap_subject.mpower_id = row[RED_CAP_MPOWER_ID_FIELD]
if row[RED_CAP_LANGUAGE_1_FIELD]:
redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_1_FIELD]))
if row[RED_CAP_LANGUAGE_2_FIELD]:
redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_2_FIELD]))
if row[RED_CAP_LANGUAGE_3_FIELD]:
redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_3_FIELD]))
if row[RED_CAP_LANGUAGE_4_FIELD]:
redcap_subject.add_language(self.get_language(row[RED_CAP_LANGUAGE_4_FIELD]))
result.append(redcap_subject)
return result
def get_language(self, name):
language = self.language_by_name.get(name.lower())
if language is None:
logger.warn("Unknown language: " + name)
return language
def execute_query(self, query_data, is_json=True):
buf = cStringIO.StringIO()
curl_connection = pycurl.Curl()
curl_connection.setopt(pycurl.CAINFO, certifi.where())
curl_connection.setopt(curl_connection.URL, self.base_url + "/api/")
curl_connection.setopt(curl_connection.HTTPPOST, query_data.items())
curl_connection.setopt(curl_connection.WRITEFUNCTION, buf.write)
curl_connection.perform()
curl_connection.close()
if is_json:
data = json.loads(buf.getvalue())
else:
data = buf.getvalue()
buf.close()
return data
def get_project_id(self):
query_data = {
'token': self.token,
'content': 'project',
'format': 'json',
'returnFormat': 'json'
}
data = self.execute_query(query_data)
return data['project_id']
def get_redcap_version(self):
query_data = {
'token': self.token,
'content': 'version'
}
data = self.execute_query(query_data, is_json=False)
return data
def is_valid(self):
if not self.token:
return False
if not self.base_url:
return False
return True
class RedCapRefreshJob(CronJobBase):
RUN_EVERY_MINUTES = 60
schedule = Schedule(run_every_mins=RUN_EVERY_MINUTES)
code = 'web.red_cap_hourly_refresh' # a unique code
@timeout_decorator.timeout(CRON_JOB_TIMEOUT, use_signals=False)
def do(self):
connector = RedcapConnector()
if connector.is_valid():
logger.info("Refreshing redcap data")
connector.refresh_inconsistent()
connector.refresh_missing()
logger.info("Redcap data refreshed")
return "ok"
else:
logger.info("Redcap connector is down")
return "connector down"