Skip to content
Snippets Groups Projects
Commit c12ec6c9 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

tns importer imerged with generic subject importer

parent 3e2d6436
No related branches found
No related tags found
1 merge request!275Resolve "update automatic visit/subject importer"
from .csv_subject_import_reader import CsvSubjectImportReader
from .csv_tns_subject_import_reader import TnsCsvSubjectImportReader
from .csv_tns_visit_import_reader import TnsCsvVisitImportReader
from .exporter import SubjectExporter, VisitExporter
from .exporter_cron_job import SubjectExporterCronJob, VisitExporterCronJob
......@@ -9,5 +8,5 @@ from .subject_import_reader import SubjectImportReader
from .warning_counter import MsgCounterHandler
__all__ = [Importer, SubjectImportReader, CsvSubjectImportReader, SubjectImporterCronJob, VisitImporterCronJob,
SubjectExporter, VisitExporter, SubjectExporterCronJob, VisitExporterCronJob, TnsCsvSubjectImportReader,
SubjectExporter, VisitExporter, SubjectExporterCronJob, VisitExporterCronJob,
TnsCsvVisitImportReader, MsgCounterHandler]
......@@ -8,7 +8,7 @@ from django.db import models
from django.db.models import Field
from web.models import StudySubject, Subject, SubjectImportData
from .subject_import_reader import SubjectImportReader
from .subject_import_reader import SubjectImportReader, EtlCommon
logger = logging.getLogger(__name__)
......@@ -23,7 +23,8 @@ class CsvSubjectImportReader(SubjectImportReader):
def load_data(self) -> List[StudySubject]:
study_subjects = []
with open(self.import_data.get_absolute_file_path()) as csv_file:
reader = csv.reader(csv_file, delimiter=self.import_data.csv_delimiter)
reader = csv.reader((EtlCommon.remove_bom(line) for line in csv_file),
delimiter=self.import_data.csv_delimiter)
headers = next(reader, None)
for row in reader:
subject = Subject()
......@@ -34,6 +35,8 @@ class CsvSubjectImportReader(SubjectImportReader):
self.add_data(study_subject, header, value)
if study_subject.nd_number is None or study_subject.nd_number == "":
study_subject.nd_number = study_subject.screening_number
if study_subject.screening_number is None or study_subject.screening_number == "":
study_subject.screening_number = study_subject.nd_number
study_subjects.append(study_subject)
return study_subjects
......@@ -42,7 +45,7 @@ class CsvSubjectImportReader(SubjectImportReader):
value = raw_data
if field is None:
logger.warning("Don't know how to handle column " + column_name + " with data " + value)
pass
return
if field.get_internal_type() == "DateField":
value = self.get_date(value)
......@@ -62,7 +65,8 @@ class CsvSubjectImportReader(SubjectImportReader):
def add_mappings(self, object_type: Type[models.Model]):
for field in object_type._meta.get_fields():
if field.get_internal_type() == "CharField" or \
field.get_internal_type() == "DateField":
field.get_internal_type() == "DateField" or \
field.get_internal_type() == "TextField":
found = False
for mapping in self.import_data.column_mappings.all():
if mapping.table_name == object_type._meta.db_table and field.name == mapping.column_name:
......
import csv
import logging
from typing import List
from web.models import StudySubject, Subject, SubjectImportData
from .subject_import_reader import SubjectImportReader, EtlCommon
logger = logging.getLogger(__name__)
class TnsCsvSubjectImportReader(SubjectImportReader):
def __init__(self, import_data: SubjectImportData):
super().__init__(import_data)
def load_data(self) -> List[StudySubject]:
study_subjects = []
with open(self.import_data.get_absolute_file_path()) as csv_file:
reader = csv.reader((EtlCommon.remove_bom(line) for line in csv_file),
delimiter=self.import_data.csv_delimiter)
headers = next(reader, None)
for row in reader:
subject = Subject()
study_subject = StudySubject()
study_subject.subject = subject
study_subject.study = self.import_data.study
for header, value in zip(headers, row):
self.add_data(study_subject, header, value)
if study_subject.nd_number is None or study_subject.nd_number == "":
study_subject.nd_number = study_subject.screening_number
study_subjects.append(study_subject)
return study_subjects
def add_data(self, study_subject: StudySubject, column_name: str, value: str):
if column_name == "firstname" or column_name == "sig_firstname":
study_subject.subject.first_name = self.get_new_value(study_subject.subject.first_name, column_name, value)
elif column_name == "lastname" or column_name == "sig_lastname":
study_subject.subject.last_name = self.get_new_value(study_subject.subject.last_name, column_name, value)
elif column_name == "donor_id":
study_subject.screening_number = self.get_new_value(study_subject.screening_number, column_name, value)
elif column_name == "email":
study_subject.subject.email = self.get_new_value(study_subject.subject.email, column_name, value)
elif column_name == "address":
study_subject.subject.address = self.get_new_value(study_subject.subject.address, column_name, value)
elif column_name == "postal_code":
study_subject.subject.postal_code = self.get_new_value(study_subject.subject.postal_code, column_name,
value)
elif column_name == "city":
study_subject.subject.city = self.get_new_value(study_subject.subject.city, column_name, value)
elif column_name == "phonenr":
study_subject.subject.phone_number = self.get_new_value(study_subject.subject.phone_number, column_name,
value)
elif column_name == "representative":
study_subject.subject.next_of_keen_name = self.get_new_value(study_subject.subject.next_of_keen_name,
column_name, value)
elif column_name == "treatingphysician":
if value is not None and value != "":
value = "Treating physician: " + value
study_subject.comments = self.get_new_value(study_subject.comments, column_name, value)
elif column_name == "dateofbirth":
study_subject.subject.date_born = self.get_new_date_value(study_subject.subject.date_born, column_name,
value)
else:
logger.warning("Don't know how to handle column " + column_name + " with data " + value)
......@@ -53,7 +53,8 @@ class Importer(object):
except:
self.problematic_count += 1
traceback.print_exc(file=sys.stdout)
logger.error("Problem with importing study subject: " + study_subject.screening_number)
logger.error("Problem with importing study subject: " + str(study_subject.screening_number) + "," +
str(study_subject.nd_number))
if "WARNING" in warning_counter.level2count:
self.warning_count = warning_counter.level2count["WARNING"]
logging.getLogger('').removeHandler(warning_counter)
......
......@@ -12,7 +12,7 @@ from django_cron import CronJobBase, Schedule
from web.models import ConfigurationItem, Study, VisitImportData, SubjectImportData
from web.models.constants import CRON_JOB_TIMEOUT, DEFAULT_FROM_EMAIL, GLOBAL_STUDY_ID
from web.smash_email import EmailSender
from .csv_tns_subject_import_reader import TnsCsvSubjectImportReader
from . import CsvSubjectImportReader
from .csv_tns_visit_import_reader import TnsCsvVisitImportReader
from .importer import Importer
......@@ -54,7 +54,7 @@ class SubjectImporterCronJob(CronJobBase):
return "import file not found"
# noinspection PyBroadException
try:
importer = Importer(TnsCsvSubjectImportReader(import_data))
importer = Importer(CsvSubjectImportReader(import_data))
importer.execute()
email_body = importer.get_summary()
EmailSender().send_email(email_title,
......
......@@ -39,6 +39,11 @@ class EtlCommon:
return old_value
if old_value == new_value:
return new_value
if type(new_value) == str and type(old_value) == str:
if new_value in old_value:
return old_value
if old_value in new_value:
return new_value
logger.warning(
"Contradicting entries in csv file for column: " + column_name + "(" + str(new_value) + "," + str(
old_value) + "). Latest value will be used")
......
......@@ -8,7 +8,8 @@ from django.utils.timezone import make_aware, is_aware
from web.models import Location, AppointmentType, StudySubject, Worker, Visit, Appointment, ConfigurationItem, \
Language, ContactAttempt, FlyingTeam, Availability, Subject, Study, StudyColumns, StudyNotificationParameters, \
VoucherType, VoucherTypePrice, Voucher, Room, Item, WorkerStudyRole, StudyRedCapColumns
VoucherType, VoucherTypePrice, Voucher, Room, Item, WorkerStudyRole, StudyRedCapColumns, EtlColumnMapping, \
SubjectImportData
from web.models.constants import REDCAP_TOKEN_CONFIGURATION_TYPE, REDCAP_BASE_URL_CONFIGURATION_TYPE, \
SEX_CHOICES_MALE, SUBJECT_TYPE_CHOICES_CONTROL, CONTACT_TYPES_PHONE, \
MONDAY_AS_DAY_OF_WEEK, COUNTRY_AFGHANISTAN_ID, VOUCHER_STATUS_NEW, GLOBAL_STUDY_ID, DEFAULT_LOCALE_NAME
......@@ -435,3 +436,42 @@ def datetimeify_date(date, timezone=datetime.timezone.utc):
actual_type = str(type(date))
raise TypeError(
"Date should be either a subclass of 'datetime.date', string or bytes! But is: {} instead".format(actual_type))
def create_tns_column_mapping(subject_import_data: SubjectImportData):
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="nd_number",
csv_column_name="donor_id",
table_name=StudySubject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="comments",
csv_column_name="treatingphysician",
table_name=StudySubject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="first_name",
csv_column_name="firstname",
table_name=Subject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="first_name",
csv_column_name="sig_firstname",
table_name=Subject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="last_name",
csv_column_name="lastname",
table_name=Subject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="last_name",
csv_column_name="sig_lastname",
table_name=Subject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="phone_number",
csv_column_name="phonenr",
table_name=Subject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="date_born",
csv_column_name="dateofbirth",
table_name=Subject._meta.db_table)
EtlColumnMapping.objects.create(etl_data=subject_import_data,
column_name="next_of_keen_name",
csv_column_name="representative",
table_name=Subject._meta.db_table)
......@@ -4,24 +4,29 @@ import logging
from django.test import TestCase
from web.importer import CsvSubjectImportReader
from web.importer import CsvSubjectImportReader, MsgCounterHandler
from web.models import SubjectImportData, EtlColumnMapping, StudySubject
from web.tests.functions import get_resource_path, get_test_study
from web.tests.functions import get_resource_path, get_test_study, create_tns_column_mapping
logger = logging.getLogger(__name__)
class TestCsvReader(TestCase):
def setUp(self):
self.study_import_data = SubjectImportData.objects.create(study=get_test_study(), date_format="%d-%m-%Y")
EtlColumnMapping.objects.create(etl_data=self.study_import_data,
self.subject_import_data = SubjectImportData.objects.create(study=get_test_study(), date_format="%d-%m-%Y")
EtlColumnMapping.objects.create(etl_data=self.subject_import_data,
column_name="screening_number",
csv_column_name="participant_id",
table_name=StudySubject._meta.db_table)
self.warning_counter = MsgCounterHandler()
logging.getLogger('').addHandler(self.warning_counter)
def tearDown(self):
logging.getLogger('').removeHandler(self.warning_counter)
def test_load_data(self):
self.study_import_data.filename = get_resource_path('import.csv')
study_subjects = CsvSubjectImportReader(self.study_import_data).load_data()
self.subject_import_data.filename = get_resource_path('import.csv')
study_subjects = CsvSubjectImportReader(self.subject_import_data).load_data()
self.assertEqual(1, len(study_subjects))
study_subject = study_subjects[0]
......@@ -37,10 +42,45 @@ class TestCsvReader(TestCase):
self.assertIsNotNone(study_subject.study)
def test_load_problematic_dates(self):
self.study_import_data.filename = get_resource_path('import_date_of_birth.csv')
study_subjects = CsvSubjectImportReader(self.study_import_data).load_data()
self.subject_import_data.filename = get_resource_path('import_date_of_birth.csv')
study_subjects = CsvSubjectImportReader(self.subject_import_data).load_data()
self.assertEqual(3, len(study_subjects))
self.assertIsNone(study_subjects[0].subject.date_born)
self.assertIsNone(study_subjects[1].subject.date_born)
self.assertIsNone(study_subjects[2].subject.date_born)
def test_load_data_for_tns(self):
self.subject_import_data = SubjectImportData.objects.create(study=get_test_study(),
date_format="%d/%m/%Y",
csv_delimiter=";",
filename=get_resource_path(
'tns_subjects_import.csv'))
create_tns_column_mapping(self.subject_import_data)
study_subjects = CsvSubjectImportReader(self.subject_import_data).load_data()
self.assertEqual(3, len(study_subjects))
study_subject = study_subjects[1]
self.assertEqual("John2", study_subject.subject.first_name)
self.assertEqual("Doe2", study_subject.subject.last_name)
self.assertEqual("cov-222333", study_subject.screening_number)
self.assertEqual("cov-222333", study_subject.nd_number)
self.assertEqual("621000000", study_subject.subject.phone_number)
self.assertEqual("john.doe@neverland.lu", study_subject.subject.email)
self.assertEqual("5, avenue blabla", study_subject.subject.address)
self.assertEqual("9940", study_subject.subject.postal_code)
self.assertEqual("Belval", study_subject.subject.city)
self.assertTrue("Gregory House2" in study_subject.comments)
self.assertEqual(1, study_subject.subject.date_born.day)
self.assertEqual(2, study_subject.subject.date_born.month)
self.assertEqual(1977, study_subject.subject.date_born.year)
self.assertIsNotNone(study_subject.study)
self.assertEqual(0, self.get_warnings_count())
def get_warnings_count(self):
if "WARNING" in self.warning_counter.level2count:
return self.warning_counter.level2count["WARNING"]
else:
return 0
......@@ -12,7 +12,8 @@ from django_cron.models import CronJobLog
from web.importer import SubjectImporterCronJob, VisitImporterCronJob
from web.models import Visit, VisitImportData, SubjectImportData
from web.tests.functions import get_resource_path, get_test_study, create_appointment_type, create_worker
from web.tests.functions import get_resource_path, get_test_study, create_appointment_type, create_worker, \
create_tns_column_mapping
logger = logging.getLogger(__name__)
......@@ -30,10 +31,10 @@ class TestCronJobImporter(TestCase):
self.subject_import_data = SubjectImportData.objects.create(study=self.study,
import_worker=create_worker(),
csv_delimiter=';')
create_tns_column_mapping(self.subject_import_data)
CronJobLog.objects.all().delete()
def test_import_without_configuration(self):
job = SubjectImporterCronJob()
status = job.do()
......
# coding=utf-8
import logging
from django.test import TestCase
from web.importer import MsgCounterHandler
from web.importer import TnsCsvSubjectImportReader
from web.models import SubjectImportData
from web.tests.functions import get_resource_path, get_test_study
logger = logging.getLogger(__name__)
class TestTnsCsvSubjectReader(TestCase):
def setUp(self):
self.study_import_data = SubjectImportData.objects.create(study=get_test_study(),
date_format="%d/%m/%Y",
csv_delimiter=";")
self.warning_counter = MsgCounterHandler()
logging.getLogger('').addHandler(self.warning_counter)
def tearDown(self):
logging.getLogger('').removeHandler(self.warning_counter)
def test_load_data(self):
self.study_import_data.filename = get_resource_path('tns_subjects_import.csv')
study_subjects = TnsCsvSubjectImportReader(self.study_import_data).load_data()
self.assertEqual(3, len(study_subjects))
study_subject = study_subjects[1]
self.assertEqual("John2", study_subject.subject.first_name)
self.assertEqual("Doe2", study_subject.subject.last_name)
self.assertEqual("cov-222333", study_subject.screening_number)
self.assertEqual("cov-222333", study_subject.nd_number)
self.assertEqual("621000000", study_subject.subject.phone_number)
self.assertEqual("john.doe@neverland.lu", study_subject.subject.email)
self.assertEqual("5, avenue blabla", study_subject.subject.address)
self.assertEqual("9940", study_subject.subject.postal_code)
self.assertEqual("Belval", study_subject.subject.city)
self.assertTrue("Gregory House2" in study_subject.comments)
self.assertEqual(1, study_subject.subject.date_born.day)
self.assertEqual(2, study_subject.subject.date_born.month)
self.assertEqual(1977, study_subject.subject.date_born.year)
self.assertIsNotNone(study_subject.study)
self.assertEqual(0, self.get_warnings_count())
def get_warnings_count(self):
if "WARNING" in self.warning_counter.level2count:
return self.warning_counter.level2count["WARNING"]
else:
return 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment