added vouchers

b13fe510 · Carlos Vega · f2c979ac · b13fe510
Commit b13fe510 authored 6 years ago by Carlos Vega
--- a/smash/import_file.py
+++ b/smash/import_file.py
@@ -7,41 +7,67 @@ import pandas as pd
 import numpy as np
 import logging
 import datetime
+import re
+from operator import itemgetter
+from collections import OrderedDict, defaultdict
+import sys
 import string
-from web.models.constants import SEX_CHOICES, SEX_CHOICES_MALE, SEX_CHOICES_FEMALE
+from web.models.constants import VOUCHER_STATUS_IN_USE, SUBJECT_TYPE_CHOICES_PATIENT, GLOBAL_STUDY_ID, SEX_CHOICES, SEX_CHOICES_MALE, SEX_CHOICES_FEMALE
 from web.algorithm import VerhoeffAlgorithm, LuhnAlgorithm
 from web.utils import is_valid_social_security_number
+from web.models import VoucherType, Voucher, Country, AppointmentTypeLink, AppointmentType, Study, Worker, Language, Subject, WorkerStudyRole, StudySubject, Location, FlyingTeam, Visit, Appointment, AppointmentType
+from web.models.worker_study_role import WORKER_STAFF, ROLE_CHOICES_SECRETARY, ROLE_CHOICES_HEALTH_PARTNER, \
+    WORKER_HEALTH_PARTNER, ROLE_CHOICES_VOUCHER_PARTNER, WORKER_VOUCHER_PARTNER
+DEFAULT_LOCATION = 'CHL'
+date_regex = re.compile(r'\d{1,2}\.\d{1,2}\.\d{4}')
+def itembetter(items, lst):
+    if len(items) == 1:
+        return [itemgetter(*items)(lst)]
+    else:
+        return list(itemgetter(*items)(lst))
+def indexof(element, l):
+    return [i for i, x in enumerate(l) if x == element]
 '''
-# Things that *could* aready be on the database:
+# Things that *could* already be on the database:
 - Language
 - Country
- A subject with the same SS number
- A subject with the same ND number
- Referals (Health Partner)
 - Location
 - Flying Team
+- Referals (Health Partner)
+- A subject with the same SS number, first name and last name
+- A studySubject with the same ND number, subject
 # Columns to be transformed to a standard format
 - Gender
 - Language
 - Prefered writen language
+- Country
 - SS number
 - Date of birth
- Date added (V1)
+- Date added (V1) # visits might have comments like (Tel)
 - ... (V2)
 - ... (V3)
 - ... (V4)
+- Voucher activity (remove cells that include None in any form and split by breakline)
+- Voucher reference (split)
 Boolean
 - Deceased
 - Postponed
 - Resigned
 - Excluded
- PDP 1.0 
+- PDP 1.0
 '''
@@ -49,21 +75,55 @@ Boolean
 Column Converter Functions
 '''
-# in converters dict
+# converters
+# Boolean:
+# Deceased
+# Postponed
+# Resigned
+# Excluded
+# PDP 1.0
+# Flying Team (FT)
+def parse_voucher_reference(vr):
+    vr = vr.strip() #strip spaces
+    return vr.split('\n') if vr != u'' else [] #if empty string then return empty list, otherwise split by break line
-def parse_column_date_of_birth(date):
+def parse_voucher_type(vt):
-    return datetime.datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d')
+    vt = '' if 'NONE' in vt.upper() else vt #if vt includes none in any form, then return empty
+    vt = vt.strip() #strip spaces
+    return vt.split('\n') if vt != u'' else [] #if empty string then return empty list, otherwise split by break line
+def parse_boolean(boolean_Y_N):
+    '''
+    Return True if 'y' or 'Y' is found.
+    Otherwise return False even if it fails
+    '''
+    try:
+        if isinstance(boolean_Y_N, float) and np.isnan(boolean_Y_N):
+            return False
+        elif boolean_Y_N.upper() == 'Y':
+            return True
+        else:
+            return False
+    except Exception as e:
+        logging.warn('parse_boolean failed for {}.'.format(boolean_Y_N))
+        logging.warn('{} {}'.format(e.message, e.args))
+        return False
-gender_table = {'m': SEX_CHOICES_MALE, 'f': SEX_CHOICES_FEMALE}
+# birth date
+def parse_column_date_of_birth(date):
+    return datetime.datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d')
+# gender
+gender_table = {'m': SEX_CHOICES_MALE, 'f': SEX_CHOICES_FEMALE}
 def parse_column_gender(gender):
    try:
        return gender_table[gender.lower()]
    except:
        return None
+# SS number
 def parse_column_ss_number(ss):
@@ -80,13 +140,11 @@ def parse_column_ss_number(ss):
    if len(ss) == 13:
        if not is_valid_social_security_number(ss):
            logging.debug('Invalid SS number: |{}|'.format(ss))
-            return None
    else:
        logging.debug('Invalid SS number: (Length not valid) |{}|'.format(ss))
-        return ss
    return ss
-# not in coverters dict
+# Language
 language_table = {
@@ -98,7 +156,8 @@ language_table = {
    'E': 'English',
    'P': 'Portuguese',
    'A': 'Arabic',
-    'SP': 'Spanish'
+    'SP': 'Spanish',
+    'FIN': 'Finnish'
 }
 language_translation_table = {
@@ -116,7 +175,8 @@ def apply_column_prefered_language(languages):
 def apply_column_languages(languages):
-    if type(languages) != float:
+    languages = languages.strip()
+    if type(languages) != float and len(languages) > 0:
        # replacements and transformations
        languages = unicode(languages).upper().translate(
            language_translation_table)
@@ -128,28 +188,306 @@ def apply_column_languages(languages):
        return np.array(new_list)
    else:
        logging.debug(
-            'Parse Languages: Empty or invalid Languages: |{}|'.format(languages))
+            'Parse Languages: Empty, NaN, or invalid Languages: |{}|'.format(languages))
        return np.array([])
-converters = {
+# Country
-    'Date of birth': parse_column_date_of_birth,
-    'Gender': parse_column_gender,
+country_table = {
-    'SS Number': parse_column_ss_number
+    'LUX': 'Luxembourg'
 }
-dtype = {
-    'Languages': object,
+def apply_column_country(country):
-    'Prefered writen language': object
+    try:
+        return country_table[country]
+    except:
+        logging.warn('Invalid Country: {}'.format(country))
+        return country
+'''
+Instead of using the converters parameter from read_excel method,
+we opt for make the transformations later since the read_excel method does not allow
+converters that return a list.
+'''
+converters = {
+    'DATE OF BIRTH': parse_column_date_of_birth,
+    'GENDER': parse_column_gender,
+    'SS NUMBER': parse_column_ss_number,
+    'COUNTRY': apply_column_country,
+    'LANGUAGES': apply_column_languages,
+    'PREFERED WRITEN LANGUAGE': apply_column_prefered_language,
+    'DECEASED': parse_boolean,
+    'POSTPONED': parse_boolean,
+    'RESIGNED': parse_boolean,
+    'EXCLUDED': parse_boolean,
+    'PDP 1.0': parse_boolean,
+    'FLYING TEAM (FT)': parse_boolean,
+    'VOUCHER ACTIVITY': parse_voucher_type,
+    'VOUCHER REFERENCE': parse_voucher_reference
 }
+# add voucher for subject
+voucher_partners = {}
+voucher_partners['ZIT'] = 'Zitha'
+def add_subject_vouchers(voucher_reference, referral, voucher_types):
+    nd_number, date, voucher_partner, voucher_type, num = voucher_reference.split('-')
+    issue_date = datetime.datetime.strptime(date, '%Y%m%d')
+    expiry_date = issue_date + datetime.timedelta(days=365)
+    usage_partner, created = Worker.objects.update_or_create(
+        name=voucher_partners.get(voucher_partner, voucher_partner))
+    usage_partner.roles.update(role=ROLE_CHOICES_VOUCHER_PARTNER)
+    # create workerStudyRole
+    workerStudyRole, _ = WorkerStudyRole.objects.update_or_create(worker=usage_partner, 
+        study_id=GLOBAL_STUDY_ID, role=ROLE_CHOICES_VOUCHER_PARTNER)
+    usage_partner.voucher_types.set(voucher_types.values())
+    usage_partner.save()
+    if created:
+        logging.warn('New Voucher Partner created: {}'.format(voucher_partner))
+    vt = VoucherType.objects.get(code=voucher_type)
+    study_subject = StudySubject.objects.get(nd_number=nd_number)
+    voucher, created = Voucher.objects.update_or_create(number=voucher_reference, issue_date=issue_date, 
+        expiry_date=expiry_date, voucher_type=vt, study_subject=study_subject, 
+        status=VOUCHER_STATUS_IN_USE, usage_partner=usage_partner, issue_worker=referral)
+    logging.warn('New Voucher added: {}'.format(voucher_reference))
+    return voucher
-def processFile(file, converters=converters, dtype=dtype):
-    return pd.read_excel(file, converters=converters, dtype=dtype)
+# create voucher types
+def create_voucher_types(voucher_types_dict, study):
+    voucher_types = {}
+    for name, code in voucher_types_dict.items():
+        voucher_type, _ = VoucherType.objects.update_or_create(code=code, description=name, study=study)
+        voucher_types[name] = voucher_type
+    return voucher_types
+# create appointment types
+def create_appointment_types(assessments):
+    appointmentTypes = []
+    for name, duration in assessments.items():
+        code = filter(str.isupper, name)
+        appointmentType, _ = AppointmentType.objects.update_or_create(
+            code=code, default_duration=duration, description=name)
+        appointmentType.save()
+        appointmentTypes.append(appointmentType)
+    return appointmentTypes
+def parse_row(index, row, visit_columns, appointmentTypes, voucher_types):
+    # Languages
+    if len(row['LANGUAGES']) == 0 and len(row['PREFERED WRITEN LANGUAGE']) == 0:
+        logging.warn('No Languages available')
+    elif len(row['LANGUAGES']) == 0 and len(row['PREFERED WRITEN LANGUAGE']) > 0:
+        row['LANGUAGES'] = row['PREFERED WRITEN LANGUAGE']
+    elif len(row['LANGUAGES']) > 0 and len(row['PREFERED WRITEN LANGUAGE']) == 0:
+        row['PREFERED WRITEN LANGUAGE'] = row['LANGUAGES']
+    languages = []
+    for language in row['LANGUAGES']:
+        lang, created = Language.objects.get_or_create(
+            name=language)
+        languages.append(lang)
+        if created:
+            logging.warn('New Language added: {}'.format(language))
+        lang.save()
+    for language in row['PREFERED WRITEN LANGUAGE'][:1]:
+        pref_lang, created = Language.objects.get_or_create(name=language)
+        if created:
+            logging.warn(
+                'New Language (from Prefered) added: {}'.format(language))
+        pref_lang.save()
+    # Country
+    country = row['COUNTRY']
+    country, created = Country.objects.get_or_create(name=country)
+    if created:
+        logging.warn('New Country added: {}'.format(row['COUNTRY']))
+    country.save()
+    # Location and Flying Team
+    # If no FT, then default location is CHL
+    ft = None
+    location = None
+    if not row['FLYING TEAM (FT)']:
+        location, created = Location.objects.get_or_create(
+            name=DEFAULT_LOCATION)
+        if created:
+            logging.warn('New location added: {}'.format(DEFAULT_LOCATION))
+        location.save()
+    else:
+        location, created = Location.objects.get_or_create(
+            name='Flying Team')
+        if created:
+            logging.warn('New location added: Flying Team')
+        location.save()
+        # Create Flying Team
+        ft, created = FlyingTeam.objects.get_or_create(
+            place=row['LOCATION OF FT'])
+        if created:
+            logging.warn('New Flying Team added: {}'.format(
+                row['LOCATION OF FT']))
+        ft.save()
+    # Health Partner
+    # create health partner (Referral)
+    health_partner, created = Worker.objects.get_or_create(name=row['REFERRAL'])
+    health_partner.roles.update(role=ROLE_CHOICES_HEALTH_PARTNER)
+    # create workerStudyRole
+    workerStudyRole, _ = WorkerStudyRole.objects.update_or_create(
+        worker=health_partner, study_id=GLOBAL_STUDY_ID, role=ROLE_CHOICES_HEALTH_PARTNER)
+    health_partner.save()
+    if created:
+        logging.warn('New Health Partner added: {}'.format(row['REFERRAL']))
+    subject, created = Subject.objects.get_or_create(social_security_number=row['SS NUMBER'],
+                                                     first_name=row['FIRST NAME'],
+                                                     last_name=row['LAST NAME'],
+                                                     defaults={
+        'social_security_number': row['SS NUMBER'],
+        'first_name': row['FIRST NAME'],
+        'last_name': row['LAST NAME'],
+        'sex': row['GENDER'],
+        'phone_number': row['PHONE NUMBER 1'],
+        'phone_number_2': row['PHONE NUMBER 2'],
+        'email': row['E-MAIL'],
+        'date_born': row['DATE OF BIRTH'],
+        'address': row['ADDRESS'],
+        'postal_code': row['POSTAL CODE'],
+        'city': row['CITY'],
+        'country': country,
+        'dead': row['DECEASED'],
+        'default_written_communication_language': pref_lang
+    })
+    subject.languages.set(languages)
+    subject.save()
+    if created:
+        logging.warn('New Subject added with SS number: {}'.format(row['SS NUMBER']))
+    # StudySubject
+    study = Study.objects.filter(id=GLOBAL_STUDY_ID)[0]
+    studySubject, created = StudySubject.objects.get_or_create(subject=subject, nd_number=row['ND NUMBER'],
+                                                               defaults={
+        'subject': subject,
+        'study': study,
+        'postponed': row['POSTPONED'],
+        'nd_number': row['ND NUMBER'],
+        'resigned': row['RESIGNED'],
+        'resign_reason': row['REASON'],
+        'type': SUBJECT_TYPE_CHOICES_PATIENT,
+        'excluded': row['EXCLUDED'],
+        'exclude_reason': row['REASON.1'],
+        'previously_in_study': row['PDP 1.0'],
+        'comments': row['COMMENT'],
+        'date_added': parse_column_date_of_birth(row['DATE ADDED (V1)'])
+    })
+    #all study subjects can have all voucher types
+    studySubject.voucher_types.set(voucher_types.values())
+    studySubject.save()
+    if created:
+        logging.warn('New StudySubject added with ND number: {}'.format(row['ND NUMBER']))
+    #VOUCHERS
+    voucher_references = row['VOUCHER REFERENCE']
+    for voucher_reference in voucher_references:
+        voucher = add_subject_vouchers(voucher_reference, health_partner, voucher_types)
+    # Visits
+    # Consider all visits as part of the same visit with multiple appointments
+    appointments = []
+    appointment = None
+    '''
+    map(date_regex.findall gets all the dates in the strings ignoring comments such as Tel
+    sum(Ans, []) flattens the resulting list from the map since each findall returns a list
+    map to convert string to datetime
+    '''
+    visit_dates = map(lambda x: datetime.datetime.strptime(
+        x, '%d.%m.%Y'), sum(map(date_regex.findall, row[visit_columns].values), []))
+    # get first and last elements of the sorted element
+    datetime_begin, datetime_end = itemgetter(*[0, -1])(sorted(visit_dates))
+    datetime_begin = datetime_begin.strftime('%Y-%m-%d')
+    datetime_end = datetime_end.strftime('%Y-%m-%d')
+    visit, created = Visit.objects.get_or_create(
+        subject=studySubject, datetime_begin=datetime_begin, datetime_end=datetime_end, defaults={
+            'is_finished': True})
+    if created:
+        logging.warn('New Visit added for ND number {} starting on {}'.format(
+            row['ND NUMBER'], datetime_begin))
+    appointment_types = appointmentTypes[:len(set(visit_dates))] #in this case appointment types are incremental
+    visit.appointment_types.set(appointment_types)
+    visit.save()
+    '''
+    If there are two Vx with the same date we put together the appointment types in the same appointment
+    '''
+    for visit_date in set(visit_dates):
+        datetime_when = visit_date.strftime('%Y-%m-%d')
+        #  get the indices of each occurrence of the date and use them to get
+        # the appointment types
+        appointment_types = itembetter(
+            indexof(visit_date, visit_dates), appointmentTypes)
+        # creatre appointment
+        appointment, _ = Appointment.objects.update_or_create(
+            visit=visit, length=sum(
+                [a.default_duration for a in appointment_types]),
+            flying_team=ft, location=location,
+            status=Appointment.APPOINTMENT_STATUS_FINISHED, datetime_when=datetime_when)
+        date_when = visit_date.replace(
+            hour=9, minute=0, second=0, microsecond=0)
+        for appointment_type in appointment_types:
+            app_type_link = AppointmentTypeLink(
+                appointment=appointment,  date_when=date_when,
+                appointment_type=appointment_type)
+            date_when += datetime.timedelta(
+                minutes=appointment_type.default_duration)
 if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
-    df = processFile(
+    file = '/Users/carlos.vega/ownCloud/Documents/Projects/PDP/copy.xlsx'
-        '/Users/carlos.vega/ownCloud/Documents/Projects/PDP/copy.xlsx')
+    df = pd.read_excel(file, dtype=object)
-    df['Languages'] = df['Languages'].apply(apply_column_languages)
+    df = df.fillna('').astype(unicode)
-    df['Prefered writen language'] = df[
+    df.columns = [c.upper() for c in df.columns]
-        'Prefered writen language'].apply(apply_column_prefered_language)
+    # make transformations
+    for column, function in converters.items():
+        logging.warn(column)
+        df[column] = df[column].apply(function)
+    # get visits columns
+    regex = re.compile(r'\(V\d\)')
+    # 
+    study = Study.objects.filter(id=GLOBAL_STUDY_ID)[0]
+    #enable vouchers
+    study.columns.voucher_types = True
+    study.columns.vouchers = True
+    study.columns.save()
+    study.save()
+    #
+    visit_columns = filter(regex.search, df.columns)
+    assessments = OrderedDict([('Cognitive Test', 180), ('Risk Factor', 120),
+                               ('Voucher Distribution', 120), ('Follow Up', 90)])
+    appointmentTypes = create_appointment_types(assessments)
+    voucher_types_dict = OrderedDict([('Cognitive Activity', 'CA'), ('Neurofit', 'NF'), ('Mobilfit', 'MF'), ('Diet', 'D'),
+    ('Consulte ORL', 'CORL'), ('Physical Activity', 'PA'), ('Individual Cognitive Training', 'IT'), ('Social', 'S'), ('Test', 'T')])
+    voucher_types = create_voucher_types(voucher_types_dict, study)
+    # process each row
+    for index, row in df.iterrows():
+        parse_row(index, row, visit_columns, appointmentTypes, voucher_types)