From 932fb16254a5f3025a23a46668b4341dcd75ffa6 Mon Sep 17 00:00:00 2001
From: Piotr Gawron <piotr.gawron@uni.lu>
Date: Sun, 5 Apr 2020 13:34:10 +0200
Subject: [PATCH] simpe csv parser added

---
 smash/web/importer/__init__.py                |  3 +-
 .../web/importer/csv_subject_import_reader.py | 69 +++++++++++++++++++
 smash/web/tests/data/import.csv               |  2 +
 .../test_csv_subject_import_reader.py         | 29 ++++++++
 4 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 smash/web/importer/csv_subject_import_reader.py
 create mode 100644 smash/web/tests/data/import.csv
 create mode 100644 smash/web/tests/importer/test_csv_subject_import_reader.py

diff --git a/smash/web/importer/__init__.py b/smash/web/importer/__init__.py
index 4a1ef7ed..76e28e32 100644
--- a/smash/web/importer/__init__.py
+++ b/smash/web/importer/__init__.py
@@ -1,4 +1,5 @@
 from importer import Importer
 from subject_import_reader import SubjectImportReader
+from csv_subject_import_reader import CsvSubjectImportReader
 
-__all__ = [Importer, SubjectImportReader]
+__all__ = [Importer, SubjectImportReader, CsvSubjectImportReader]
diff --git a/smash/web/importer/csv_subject_import_reader.py b/smash/web/importer/csv_subject_import_reader.py
new file mode 100644
index 00000000..54883161
--- /dev/null
+++ b/smash/web/importer/csv_subject_import_reader.py
@@ -0,0 +1,69 @@
+import csv
+import datetime
+import logging
+
+from subject_import_reader import SubjectImportReader
+from web.models import StudySubject, Subject, Study
+from web.models.constants import GLOBAL_STUDY_ID
+
+CSV_DATE_FORMAT = "%d-%m-%Y"
+
+logger = logging.getLogger(__name__)
+
+
+class CsvSubjectImportReader(SubjectImportReader):
+    def __init__(self):
+        self.study = Study.objects.filter(id=GLOBAL_STUDY_ID)[0]
+
+    def load_data(self, filename):
+        study_subjects = []
+        with open(filename) as csv_file:
+            reader = csv.reader(csv_file, delimiter=',')
+            headers = next(reader, None)
+            for row in reader:
+                subject = Subject()
+                study_subject = StudySubject()
+                study_subject.subject = subject
+                study_subject.study = self.study
+                for header, value in zip(headers, row):
+                    self.add_data(study_subject, header, value)
+                if study_subject.nd_number is None or study_subject.nd_number == "":
+                    study_subject.nd_number = study_subject.screening_number
+                study_subjects.append(study_subject)
+        return study_subjects
+
+    def add_data(self, study_subject, column_name, value):
+        # type: (StudySubject, str, str) ->  None
+        if column_name == "first_name":
+            study_subject.subject.first_name = self.get_new_value(study_subject.subject.first_name, column_name, value)
+        elif column_name == "last_name":
+            study_subject.subject.last_name = self.get_new_value(study_subject.subject.last_name, column_name, value)
+        elif column_name == "participant_id":
+            study_subject.screening_number = self.get_new_value(study_subject.screening_number, column_name, value)
+        elif column_name == "date_born":
+            study_subject.subject.date_born = self.get_new_date_value(study_subject.subject.date_born, column_name,
+                                                                      value)
+        else:
+            logger.warn("Don't know how to handle column " + column_name + " with data " + value)
+
+    def get_new_value(self, old_value, column_name, new_value):
+        # type: (unicode,unicode,unicode) ->  unicode
+        if old_value is None or old_value == "":
+            return new_value
+        if new_value is None or new_value == "":
+            return old_value
+        logger.warn(
+            "Contradicting entries in csv file for column: " + column_name + "(" + new_value + "," + old_value +
+            "). Latest value will be used")
+        return new_value
+
+    def get_new_date_value(self, old_value, column_name, new_value):
+        # type: (datetime,unicode,unicode) ->  datetime
+        if old_value is None or old_value == "":
+            return datetime.datetime.strptime(new_value, CSV_DATE_FORMAT)
+        if new_value is None or new_value == "":
+            return old_value
+        logger.warn(
+            "Contradicting entries in csv file for column: " + column_name + "(" + new_value + "," + old_value +
+            "). Latest value will be used")
+        return datetime.datetime.strptime(new_value, CSV_DATE_FORMAT)
diff --git a/smash/web/tests/data/import.csv b/smash/web/tests/data/import.csv
new file mode 100644
index 00000000..b13e437a
--- /dev/null
+++ b/smash/web/tests/data/import.csv
@@ -0,0 +1,2 @@
+first_name,last_name,participant_id,date_born
+Piotr,Gawron,Cov-000001,01-02-2020
\ No newline at end of file
diff --git a/smash/web/tests/importer/test_csv_subject_import_reader.py b/smash/web/tests/importer/test_csv_subject_import_reader.py
new file mode 100644
index 00000000..79846518
--- /dev/null
+++ b/smash/web/tests/importer/test_csv_subject_import_reader.py
@@ -0,0 +1,29 @@
+# coding=utf-8
+
+import logging
+
+from django.test import TestCase
+
+from web.importer import CsvSubjectImportReader
+from web.tests.functions import get_resource_path
+
+logger = logging.getLogger(__name__)
+
+
+class TestCsvReader(TestCase):
+
+    def test_load_data(self):
+        filename = get_resource_path('import.csv')
+        study_subjects = CsvSubjectImportReader().load_data(filename)
+        self.assertEqual(1, len(study_subjects))
+        study_subject = study_subjects[0]
+        self.assertEqual("Piotr", study_subject.subject.first_name)
+        self.assertEqual("Gawron", study_subject.subject.last_name)
+        self.assertEqual("Cov-000001", study_subject.screening_number)
+        self.assertEqual("Cov-000001", study_subject.nd_number)
+
+        self.assertEqual(1, study_subject.subject.date_born.day)
+        self.assertEqual(2, study_subject.subject.date_born.month)
+        self.assertEqual(2020, study_subject.subject.date_born.year)
+
+        self.assertIsNotNone(study_subject.study)
-- 
GitLab