diff --git a/smash/web/importer/__init__.py b/smash/web/importer/__init__.py index c1cb4fbf977323af3452024f54f80a4e5c557aef..03e2b4d5efc3a0ce6407b5f1584d13fb6e35e2d8 100644 --- a/smash/web/importer/__init__.py +++ b/smash/web/importer/__init__.py @@ -1,5 +1,4 @@ from .csv_subject_import_reader import CsvSubjectImportReader -from .csv_tns_subject_import_reader import TnsCsvSubjectImportReader from .csv_tns_visit_import_reader import TnsCsvVisitImportReader from .exporter import SubjectExporter, VisitExporter from .exporter_cron_job import SubjectExporterCronJob, VisitExporterCronJob @@ -9,5 +8,5 @@ from .subject_import_reader import SubjectImportReader from .warning_counter import MsgCounterHandler __all__ = [Importer, SubjectImportReader, CsvSubjectImportReader, SubjectImporterCronJob, VisitImporterCronJob, - SubjectExporter, VisitExporter, SubjectExporterCronJob, VisitExporterCronJob, TnsCsvSubjectImportReader, + SubjectExporter, VisitExporter, SubjectExporterCronJob, VisitExporterCronJob, TnsCsvVisitImportReader, MsgCounterHandler] diff --git a/smash/web/importer/csv_subject_import_reader.py b/smash/web/importer/csv_subject_import_reader.py index 2e1e49862c5b2925d15dfcd2cb05e0d1d8896cfb..e83325b40873bfaa774cb6aa08e0ac525e56c4b8 100644 --- a/smash/web/importer/csv_subject_import_reader.py +++ b/smash/web/importer/csv_subject_import_reader.py @@ -8,7 +8,7 @@ from django.db import models from django.db.models import Field from web.models import StudySubject, Subject, SubjectImportData -from .subject_import_reader import SubjectImportReader +from .subject_import_reader import SubjectImportReader, EtlCommon logger = logging.getLogger(__name__) @@ -23,7 +23,8 @@ class CsvSubjectImportReader(SubjectImportReader): def load_data(self) -> List[StudySubject]: study_subjects = [] with open(self.import_data.get_absolute_file_path()) as csv_file: - reader = csv.reader(csv_file, delimiter=self.import_data.csv_delimiter) + reader = csv.reader((EtlCommon.remove_bom(line) for line in csv_file), + delimiter=self.import_data.csv_delimiter) headers = next(reader, None) for row in reader: subject = Subject() @@ -34,6 +35,8 @@ class CsvSubjectImportReader(SubjectImportReader): self.add_data(study_subject, header, value) if study_subject.nd_number is None or study_subject.nd_number == "": study_subject.nd_number = study_subject.screening_number + if study_subject.screening_number is None or study_subject.screening_number == "": + study_subject.screening_number = study_subject.nd_number study_subjects.append(study_subject) return study_subjects @@ -42,7 +45,7 @@ class CsvSubjectImportReader(SubjectImportReader): value = raw_data if field is None: logger.warning("Don't know how to handle column " + column_name + " with data " + value) - pass + return if field.get_internal_type() == "DateField": value = self.get_date(value) @@ -62,7 +65,8 @@ class CsvSubjectImportReader(SubjectImportReader): def add_mappings(self, object_type: Type[models.Model]): for field in object_type._meta.get_fields(): if field.get_internal_type() == "CharField" or \ - field.get_internal_type() == "DateField": + field.get_internal_type() == "DateField" or \ + field.get_internal_type() == "TextField": found = False for mapping in self.import_data.column_mappings.all(): if mapping.table_name == object_type._meta.db_table and field.name == mapping.column_name: diff --git a/smash/web/importer/csv_tns_subject_import_reader.py b/smash/web/importer/csv_tns_subject_import_reader.py deleted file mode 100644 index 9c4f13dd7a0be3a7a823ae26fa07630677c2e42b..0000000000000000000000000000000000000000 --- a/smash/web/importer/csv_tns_subject_import_reader.py +++ /dev/null @@ -1,63 +0,0 @@ -import csv -import logging -from typing import List - -from web.models import StudySubject, Subject, SubjectImportData -from .subject_import_reader import SubjectImportReader, EtlCommon - -logger = logging.getLogger(__name__) - - -class TnsCsvSubjectImportReader(SubjectImportReader): - def __init__(self, import_data: SubjectImportData): - super().__init__(import_data) - - def load_data(self) -> List[StudySubject]: - study_subjects = [] - with open(self.import_data.get_absolute_file_path()) as csv_file: - reader = csv.reader((EtlCommon.remove_bom(line) for line in csv_file), - delimiter=self.import_data.csv_delimiter) - headers = next(reader, None) - for row in reader: - subject = Subject() - study_subject = StudySubject() - study_subject.subject = subject - study_subject.study = self.import_data.study - for header, value in zip(headers, row): - self.add_data(study_subject, header, value) - if study_subject.nd_number is None or study_subject.nd_number == "": - study_subject.nd_number = study_subject.screening_number - study_subjects.append(study_subject) - return study_subjects - - def add_data(self, study_subject: StudySubject, column_name: str, value: str): - if column_name == "firstname" or column_name == "sig_firstname": - study_subject.subject.first_name = self.get_new_value(study_subject.subject.first_name, column_name, value) - elif column_name == "lastname" or column_name == "sig_lastname": - study_subject.subject.last_name = self.get_new_value(study_subject.subject.last_name, column_name, value) - elif column_name == "donor_id": - study_subject.screening_number = self.get_new_value(study_subject.screening_number, column_name, value) - elif column_name == "email": - study_subject.subject.email = self.get_new_value(study_subject.subject.email, column_name, value) - elif column_name == "address": - study_subject.subject.address = self.get_new_value(study_subject.subject.address, column_name, value) - elif column_name == "postal_code": - study_subject.subject.postal_code = self.get_new_value(study_subject.subject.postal_code, column_name, - value) - elif column_name == "city": - study_subject.subject.city = self.get_new_value(study_subject.subject.city, column_name, value) - elif column_name == "phonenr": - study_subject.subject.phone_number = self.get_new_value(study_subject.subject.phone_number, column_name, - value) - elif column_name == "representative": - study_subject.subject.next_of_keen_name = self.get_new_value(study_subject.subject.next_of_keen_name, - column_name, value) - elif column_name == "treatingphysician": - if value is not None and value != "": - value = "Treating physician: " + value - study_subject.comments = self.get_new_value(study_subject.comments, column_name, value) - elif column_name == "dateofbirth": - study_subject.subject.date_born = self.get_new_date_value(study_subject.subject.date_born, column_name, - value) - else: - logger.warning("Don't know how to handle column " + column_name + " with data " + value) diff --git a/smash/web/importer/importer.py b/smash/web/importer/importer.py index abd6787a6fe55d11a94dbbc752bd0fe19d93e8c9..edc220ce61cfe9d5ef188eee2f2ff31bc70c7cd6 100644 --- a/smash/web/importer/importer.py +++ b/smash/web/importer/importer.py @@ -53,7 +53,8 @@ class Importer(object): except: self.problematic_count += 1 traceback.print_exc(file=sys.stdout) - logger.error("Problem with importing study subject: " + study_subject.screening_number) + logger.error("Problem with importing study subject: " + str(study_subject.screening_number) + "," + + str(study_subject.nd_number)) if "WARNING" in warning_counter.level2count: self.warning_count = warning_counter.level2count["WARNING"] logging.getLogger('').removeHandler(warning_counter) diff --git a/smash/web/importer/importer_cron_job.py b/smash/web/importer/importer_cron_job.py index 25a9253d32cbd6438f21dd07a687f21c6d37f342..ccba3ab948df4bb487ef6307bf5e86a394fc7ab7 100644 --- a/smash/web/importer/importer_cron_job.py +++ b/smash/web/importer/importer_cron_job.py @@ -12,7 +12,7 @@ from django_cron import CronJobBase, Schedule from web.models import ConfigurationItem, Study, VisitImportData, SubjectImportData from web.models.constants import CRON_JOB_TIMEOUT, DEFAULT_FROM_EMAIL, GLOBAL_STUDY_ID from web.smash_email import EmailSender -from .csv_tns_subject_import_reader import TnsCsvSubjectImportReader +from . import CsvSubjectImportReader from .csv_tns_visit_import_reader import TnsCsvVisitImportReader from .importer import Importer @@ -54,7 +54,7 @@ class SubjectImporterCronJob(CronJobBase): return "import file not found" # noinspection PyBroadException try: - importer = Importer(TnsCsvSubjectImportReader(import_data)) + importer = Importer(CsvSubjectImportReader(import_data)) importer.execute() email_body = importer.get_summary() EmailSender().send_email(email_title, diff --git a/smash/web/importer/subject_import_reader.py b/smash/web/importer/subject_import_reader.py index 9ff92dcbe7909aa8f56670539901383b45c1861c..31dc5088b5bfc1928f679e0d6156c461fb24544d 100644 --- a/smash/web/importer/subject_import_reader.py +++ b/smash/web/importer/subject_import_reader.py @@ -39,6 +39,11 @@ class EtlCommon: return old_value if old_value == new_value: return new_value + if type(new_value) == str and type(old_value) == str: + if new_value in old_value: + return old_value + if old_value in new_value: + return new_value logger.warning( "Contradicting entries in csv file for column: " + column_name + "(" + str(new_value) + "," + str( old_value) + "). Latest value will be used") diff --git a/smash/web/tests/functions.py b/smash/web/tests/functions.py index 52a926bc02a0d046f4757ea572af739e21876ec3..1f4e0221f1162e276a571817dae5e21ef1980140 100644 --- a/smash/web/tests/functions.py +++ b/smash/web/tests/functions.py @@ -8,7 +8,8 @@ from django.utils.timezone import make_aware, is_aware from web.models import Location, AppointmentType, StudySubject, Worker, Visit, Appointment, ConfigurationItem, \ Language, ContactAttempt, FlyingTeam, Availability, Subject, Study, StudyColumns, StudyNotificationParameters, \ - VoucherType, VoucherTypePrice, Voucher, Room, Item, WorkerStudyRole, StudyRedCapColumns + VoucherType, VoucherTypePrice, Voucher, Room, Item, WorkerStudyRole, StudyRedCapColumns, EtlColumnMapping, \ + SubjectImportData from web.models.constants import REDCAP_TOKEN_CONFIGURATION_TYPE, REDCAP_BASE_URL_CONFIGURATION_TYPE, \ SEX_CHOICES_MALE, SUBJECT_TYPE_CHOICES_CONTROL, CONTACT_TYPES_PHONE, \ MONDAY_AS_DAY_OF_WEEK, COUNTRY_AFGHANISTAN_ID, VOUCHER_STATUS_NEW, GLOBAL_STUDY_ID, DEFAULT_LOCALE_NAME @@ -435,3 +436,42 @@ def datetimeify_date(date, timezone=datetime.timezone.utc): actual_type = str(type(date)) raise TypeError( "Date should be either a subclass of 'datetime.date', string or bytes! But is: {} instead".format(actual_type)) + + +def create_tns_column_mapping(subject_import_data: SubjectImportData): + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="nd_number", + csv_column_name="donor_id", + table_name=StudySubject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="comments", + csv_column_name="treatingphysician", + table_name=StudySubject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="first_name", + csv_column_name="firstname", + table_name=Subject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="first_name", + csv_column_name="sig_firstname", + table_name=Subject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="last_name", + csv_column_name="lastname", + table_name=Subject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="last_name", + csv_column_name="sig_lastname", + table_name=Subject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="phone_number", + csv_column_name="phonenr", + table_name=Subject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="date_born", + csv_column_name="dateofbirth", + table_name=Subject._meta.db_table) + EtlColumnMapping.objects.create(etl_data=subject_import_data, + column_name="next_of_keen_name", + csv_column_name="representative", + table_name=Subject._meta.db_table) diff --git a/smash/web/tests/importer/test_csv_subject_import_reader.py b/smash/web/tests/importer/test_csv_subject_import_reader.py index aeb932dc0bd06b484ecff530b4ddfcea2ff14c79..a304d33548aa0cca8b3a1cd91295094b8ed56431 100644 --- a/smash/web/tests/importer/test_csv_subject_import_reader.py +++ b/smash/web/tests/importer/test_csv_subject_import_reader.py @@ -4,24 +4,29 @@ import logging from django.test import TestCase -from web.importer import CsvSubjectImportReader +from web.importer import CsvSubjectImportReader, MsgCounterHandler from web.models import SubjectImportData, EtlColumnMapping, StudySubject -from web.tests.functions import get_resource_path, get_test_study +from web.tests.functions import get_resource_path, get_test_study, create_tns_column_mapping logger = logging.getLogger(__name__) class TestCsvReader(TestCase): def setUp(self): - self.study_import_data = SubjectImportData.objects.create(study=get_test_study(), date_format="%d-%m-%Y") - EtlColumnMapping.objects.create(etl_data=self.study_import_data, + self.subject_import_data = SubjectImportData.objects.create(study=get_test_study(), date_format="%d-%m-%Y") + EtlColumnMapping.objects.create(etl_data=self.subject_import_data, column_name="screening_number", csv_column_name="participant_id", table_name=StudySubject._meta.db_table) + self.warning_counter = MsgCounterHandler() + logging.getLogger('').addHandler(self.warning_counter) + + def tearDown(self): + logging.getLogger('').removeHandler(self.warning_counter) def test_load_data(self): - self.study_import_data.filename = get_resource_path('import.csv') - study_subjects = CsvSubjectImportReader(self.study_import_data).load_data() + self.subject_import_data.filename = get_resource_path('import.csv') + study_subjects = CsvSubjectImportReader(self.subject_import_data).load_data() self.assertEqual(1, len(study_subjects)) study_subject = study_subjects[0] @@ -37,10 +42,45 @@ class TestCsvReader(TestCase): self.assertIsNotNone(study_subject.study) def test_load_problematic_dates(self): - self.study_import_data.filename = get_resource_path('import_date_of_birth.csv') - study_subjects = CsvSubjectImportReader(self.study_import_data).load_data() + self.subject_import_data.filename = get_resource_path('import_date_of_birth.csv') + study_subjects = CsvSubjectImportReader(self.subject_import_data).load_data() self.assertEqual(3, len(study_subjects)) self.assertIsNone(study_subjects[0].subject.date_born) self.assertIsNone(study_subjects[1].subject.date_born) self.assertIsNone(study_subjects[2].subject.date_born) + + def test_load_data_for_tns(self): + self.subject_import_data = SubjectImportData.objects.create(study=get_test_study(), + date_format="%d/%m/%Y", + csv_delimiter=";", + filename=get_resource_path( + 'tns_subjects_import.csv')) + create_tns_column_mapping(self.subject_import_data) + study_subjects = CsvSubjectImportReader(self.subject_import_data).load_data() + self.assertEqual(3, len(study_subjects)) + study_subject = study_subjects[1] + self.assertEqual("John2", study_subject.subject.first_name) + self.assertEqual("Doe2", study_subject.subject.last_name) + self.assertEqual("cov-222333", study_subject.screening_number) + self.assertEqual("cov-222333", study_subject.nd_number) + self.assertEqual("621000000", study_subject.subject.phone_number) + self.assertEqual("john.doe@neverland.lu", study_subject.subject.email) + self.assertEqual("5, avenue blabla", study_subject.subject.address) + self.assertEqual("9940", study_subject.subject.postal_code) + self.assertEqual("Belval", study_subject.subject.city) + + self.assertTrue("Gregory House2" in study_subject.comments) + + self.assertEqual(1, study_subject.subject.date_born.day) + self.assertEqual(2, study_subject.subject.date_born.month) + self.assertEqual(1977, study_subject.subject.date_born.year) + + self.assertIsNotNone(study_subject.study) + self.assertEqual(0, self.get_warnings_count()) + + def get_warnings_count(self): + if "WARNING" in self.warning_counter.level2count: + return self.warning_counter.level2count["WARNING"] + else: + return 0 diff --git a/smash/web/tests/importer/test_importer_cron_job.py b/smash/web/tests/importer/test_importer_cron_job.py index 813a91e19a79df8257a82ab3a512390d3ab63f56..4a89fb2223a9f71f06ddc0c91394d55cd1bc5be2 100644 --- a/smash/web/tests/importer/test_importer_cron_job.py +++ b/smash/web/tests/importer/test_importer_cron_job.py @@ -12,7 +12,8 @@ from django_cron.models import CronJobLog from web.importer import SubjectImporterCronJob, VisitImporterCronJob from web.models import Visit, VisitImportData, SubjectImportData -from web.tests.functions import get_resource_path, get_test_study, create_appointment_type, create_worker +from web.tests.functions import get_resource_path, get_test_study, create_appointment_type, create_worker, \ + create_tns_column_mapping logger = logging.getLogger(__name__) @@ -30,10 +31,10 @@ class TestCronJobImporter(TestCase): self.subject_import_data = SubjectImportData.objects.create(study=self.study, import_worker=create_worker(), csv_delimiter=';') + create_tns_column_mapping(self.subject_import_data) CronJobLog.objects.all().delete() def test_import_without_configuration(self): - job = SubjectImporterCronJob() status = job.do() diff --git a/smash/web/tests/importer/test_tns_csv_subject_import_reader.py b/smash/web/tests/importer/test_tns_csv_subject_import_reader.py deleted file mode 100644 index d47d4adcd7a9c52aee3e9d876082699a0d1a12e1..0000000000000000000000000000000000000000 --- a/smash/web/tests/importer/test_tns_csv_subject_import_reader.py +++ /dev/null @@ -1,55 +0,0 @@ -# coding=utf-8 - -import logging - -from django.test import TestCase - -from web.importer import MsgCounterHandler -from web.importer import TnsCsvSubjectImportReader -from web.models import SubjectImportData -from web.tests.functions import get_resource_path, get_test_study - -logger = logging.getLogger(__name__) - - -class TestTnsCsvSubjectReader(TestCase): - - def setUp(self): - self.study_import_data = SubjectImportData.objects.create(study=get_test_study(), - date_format="%d/%m/%Y", - csv_delimiter=";") - self.warning_counter = MsgCounterHandler() - logging.getLogger('').addHandler(self.warning_counter) - - def tearDown(self): - logging.getLogger('').removeHandler(self.warning_counter) - - def test_load_data(self): - self.study_import_data.filename = get_resource_path('tns_subjects_import.csv') - study_subjects = TnsCsvSubjectImportReader(self.study_import_data).load_data() - self.assertEqual(3, len(study_subjects)) - study_subject = study_subjects[1] - self.assertEqual("John2", study_subject.subject.first_name) - self.assertEqual("Doe2", study_subject.subject.last_name) - self.assertEqual("cov-222333", study_subject.screening_number) - self.assertEqual("cov-222333", study_subject.nd_number) - self.assertEqual("621000000", study_subject.subject.phone_number) - self.assertEqual("john.doe@neverland.lu", study_subject.subject.email) - self.assertEqual("5, avenue blabla", study_subject.subject.address) - self.assertEqual("9940", study_subject.subject.postal_code) - self.assertEqual("Belval", study_subject.subject.city) - - self.assertTrue("Gregory House2" in study_subject.comments) - - self.assertEqual(1, study_subject.subject.date_born.day) - self.assertEqual(2, study_subject.subject.date_born.month) - self.assertEqual(1977, study_subject.subject.date_born.year) - - self.assertIsNotNone(study_subject.study) - self.assertEqual(0, self.get_warnings_count()) - - def get_warnings_count(self): - if "WARNING" in self.warning_counter.level2count: - return self.warning_counter.level2count["WARNING"] - else: - return 0