Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scheduling-system
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SMASCH
scheduling-system
Commits
b13fe510
Commit
b13fe510
authored
6 years ago
by
Carlos Vega
Browse files
Options
Downloads
Patches
Plain Diff
added vouchers
parent
f2c979ac
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!178
Feature/import data pdp
Pipeline
#7236
passed
6 years ago
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
smash/import_file.py
+369
-31
369 additions, 31 deletions
smash/import_file.py
with
369 additions
and
31 deletions
smash/import_file.py
+
369
−
31
View file @
b13fe510
...
...
@@ -7,41 +7,67 @@ import pandas as pd
import
numpy
as
np
import
logging
import
datetime
import
re
from
operator
import
itemgetter
from
collections
import
OrderedDict
,
defaultdict
import
sys
import
string
from
web.models.constants
import
SEX_CHOICES
,
SEX_CHOICES_MALE
,
SEX_CHOICES_FEMALE
from
web.models.constants
import
VOUCHER_STATUS_IN_USE
,
SUBJECT_TYPE_CHOICES_PATIENT
,
GLOBAL_STUDY_ID
,
SEX_CHOICES
,
SEX_CHOICES_MALE
,
SEX_CHOICES_FEMALE
from
web.algorithm
import
VerhoeffAlgorithm
,
LuhnAlgorithm
from
web.utils
import
is_valid_social_security_number
from
web.models
import
VoucherType
,
Voucher
,
Country
,
AppointmentTypeLink
,
AppointmentType
,
Study
,
Worker
,
Language
,
Subject
,
WorkerStudyRole
,
StudySubject
,
Location
,
FlyingTeam
,
Visit
,
Appointment
,
AppointmentType
from
web.models.worker_study_role
import
WORKER_STAFF
,
ROLE_CHOICES_SECRETARY
,
ROLE_CHOICES_HEALTH_PARTNER
,
\
WORKER_HEALTH_PARTNER
,
ROLE_CHOICES_VOUCHER_PARTNER
,
WORKER_VOUCHER_PARTNER
DEFAULT_LOCATION
=
'
CHL
'
date_regex
=
re
.
compile
(
r
'
\d{1,2}\.\d{1,2}\.\d{4}
'
)
def
itembetter
(
items
,
lst
):
if
len
(
items
)
==
1
:
return
[
itemgetter
(
*
items
)(
lst
)]
else
:
return
list
(
itemgetter
(
*
items
)(
lst
))
def
indexof
(
element
,
l
):
return
[
i
for
i
,
x
in
enumerate
(
l
)
if
x
==
element
]
'''
# Things that *could* aready be on the database:
# Things that *could* a
l
ready be on the database:
- Language
- Country
- A subject with the same SS number
- A subject with the same ND number
- Referals (Health Partner)
- Location
- Flying Team
- Referals (Health Partner)
- A subject with the same SS number, first name and last name
- A studySubject with the same ND number, subject
# Columns to be transformed to a standard format
- Gender
- Language
- Prefered writen language
- Country
- SS number
- Date of birth
- Date added (V1)
- Date added (V1)
# visits might have comments like (Tel)
- ... (V2)
- ... (V3)
- ... (V4)
- Voucher activity (remove cells that include None in any form and split by breakline)
- Voucher reference (split)
Boolean
- Deceased
- Postponed
- Resigned
- Excluded
- PDP 1.0
- PDP 1.0
'''
...
...
@@ -49,21 +75,55 @@ Boolean
Column Converter Functions
'''
# in converters dict
# converters
# Boolean:
# Deceased
# Postponed
# Resigned
# Excluded
# PDP 1.0
# Flying Team (FT)
def
parse_voucher_reference
(
vr
):
vr
=
vr
.
strip
()
#strip spaces
return
vr
.
split
(
'
\n
'
)
if
vr
!=
u
''
else
[]
#if empty string then return empty list, otherwise split by break line
def
parse_column_date_of_birth
(
date
):
return
datetime
.
datetime
.
strptime
(
date
,
'
%d.%m.%Y
'
).
strftime
(
'
%Y-%m-%d
'
)
def
parse_voucher_type
(
vt
):
vt
=
''
if
'
NONE
'
in
vt
.
upper
()
else
vt
#if vt includes none in any form, then return empty
vt
=
vt
.
strip
()
#strip spaces
return
vt
.
split
(
'
\n
'
)
if
vt
!=
u
''
else
[]
#if empty string then return empty list, otherwise split by break line
def
parse_boolean
(
boolean_Y_N
):
'''
Return True if
'
y
'
or
'
Y
'
is found.
Otherwise return False even if it fails
'''
try
:
if
isinstance
(
boolean_Y_N
,
float
)
and
np
.
isnan
(
boolean_Y_N
):
return
False
elif
boolean_Y_N
.
upper
()
==
'
Y
'
:
return
True
else
:
return
False
except
Exception
as
e
:
logging
.
warn
(
'
parse_boolean failed for {}.
'
.
format
(
boolean_Y_N
))
logging
.
warn
(
'
{} {}
'
.
format
(
e
.
message
,
e
.
args
))
return
False
gender_table
=
{
'
m
'
:
SEX_CHOICES_MALE
,
'
f
'
:
SEX_CHOICES_FEMALE
}
# birth date
def
parse_column_date_of_birth
(
date
):
return
datetime
.
datetime
.
strptime
(
date
,
'
%d.%m.%Y
'
).
strftime
(
'
%Y-%m-%d
'
)
# gender
gender_table
=
{
'
m
'
:
SEX_CHOICES_MALE
,
'
f
'
:
SEX_CHOICES_FEMALE
}
def
parse_column_gender
(
gender
):
try
:
return
gender_table
[
gender
.
lower
()]
except
:
return
None
# SS number
def
parse_column_ss_number
(
ss
):
...
...
@@ -80,13 +140,11 @@ def parse_column_ss_number(ss):
if
len
(
ss
)
==
13
:
if
not
is_valid_social_security_number
(
ss
):
logging
.
debug
(
'
Invalid SS number: |{}|
'
.
format
(
ss
))
return
None
else
:
logging
.
debug
(
'
Invalid SS number: (Length not valid) |{}|
'
.
format
(
ss
))
return
ss
return
ss
#
not in coverters dict
#
Language
language_table
=
{
...
...
@@ -98,7 +156,8 @@ language_table = {
'
E
'
:
'
English
'
,
'
P
'
:
'
Portuguese
'
,
'
A
'
:
'
Arabic
'
,
'
SP
'
:
'
Spanish
'
'
SP
'
:
'
Spanish
'
,
'
FIN
'
:
'
Finnish
'
}
language_translation_table
=
{
...
...
@@ -116,7 +175,8 @@ def apply_column_prefered_language(languages):
def
apply_column_languages
(
languages
):
if
type
(
languages
)
!=
float
:
languages
=
languages
.
strip
()
if
type
(
languages
)
!=
float
and
len
(
languages
)
>
0
:
# replacements and transformations
languages
=
unicode
(
languages
).
upper
().
translate
(
language_translation_table
)
...
...
@@ -128,28 +188,306 @@ def apply_column_languages(languages):
return
np
.
array
(
new_list
)
else
:
logging
.
debug
(
'
Parse Languages: Empty or invalid Languages: |{}|
'
.
format
(
languages
))
'
Parse Languages: Empty
, NaN,
or invalid Languages: |{}|
'
.
format
(
languages
))
return
np
.
array
([])
converters
=
{
'
Date of birth
'
:
parse_column_date_of_birth
,
'
Gender
'
:
parse_column_gender
,
'
SS Number
'
:
parse_column_ss_number
# Country
country_table
=
{
'
LUX
'
:
'
Luxembourg
'
}
dtype
=
{
'
Languages
'
:
object
,
'
Prefered writen language
'
:
object
def
apply_column_country
(
country
):
try
:
return
country_table
[
country
]
except
:
logging
.
warn
(
'
Invalid Country: {}
'
.
format
(
country
))
return
country
'''
Instead of using the converters parameter from read_excel method,
we opt for make the transformations later since the read_excel method does not allow
converters that return a list.
'''
converters
=
{
'
DATE OF BIRTH
'
:
parse_column_date_of_birth
,
'
GENDER
'
:
parse_column_gender
,
'
SS NUMBER
'
:
parse_column_ss_number
,
'
COUNTRY
'
:
apply_column_country
,
'
LANGUAGES
'
:
apply_column_languages
,
'
PREFERED WRITEN LANGUAGE
'
:
apply_column_prefered_language
,
'
DECEASED
'
:
parse_boolean
,
'
POSTPONED
'
:
parse_boolean
,
'
RESIGNED
'
:
parse_boolean
,
'
EXCLUDED
'
:
parse_boolean
,
'
PDP 1.0
'
:
parse_boolean
,
'
FLYING TEAM (FT)
'
:
parse_boolean
,
'
VOUCHER ACTIVITY
'
:
parse_voucher_type
,
'
VOUCHER REFERENCE
'
:
parse_voucher_reference
}
# add voucher for subject
voucher_partners
=
{}
voucher_partners
[
'
ZIT
'
]
=
'
Zitha
'
def
add_subject_vouchers
(
voucher_reference
,
referral
,
voucher_types
):
nd_number
,
date
,
voucher_partner
,
voucher_type
,
num
=
voucher_reference
.
split
(
'
-
'
)
issue_date
=
datetime
.
datetime
.
strptime
(
date
,
'
%Y%m%d
'
)
expiry_date
=
issue_date
+
datetime
.
timedelta
(
days
=
365
)
usage_partner
,
created
=
Worker
.
objects
.
update_or_create
(
name
=
voucher_partners
.
get
(
voucher_partner
,
voucher_partner
))
usage_partner
.
roles
.
update
(
role
=
ROLE_CHOICES_VOUCHER_PARTNER
)
# create workerStudyRole
workerStudyRole
,
_
=
WorkerStudyRole
.
objects
.
update_or_create
(
worker
=
usage_partner
,
study_id
=
GLOBAL_STUDY_ID
,
role
=
ROLE_CHOICES_VOUCHER_PARTNER
)
usage_partner
.
voucher_types
.
set
(
voucher_types
.
values
())
usage_partner
.
save
()
if
created
:
logging
.
warn
(
'
New Voucher Partner created: {}
'
.
format
(
voucher_partner
))
vt
=
VoucherType
.
objects
.
get
(
code
=
voucher_type
)
study_subject
=
StudySubject
.
objects
.
get
(
nd_number
=
nd_number
)
voucher
,
created
=
Voucher
.
objects
.
update_or_create
(
number
=
voucher_reference
,
issue_date
=
issue_date
,
expiry_date
=
expiry_date
,
voucher_type
=
vt
,
study_subject
=
study_subject
,
status
=
VOUCHER_STATUS_IN_USE
,
usage_partner
=
usage_partner
,
issue_worker
=
referral
)
logging
.
warn
(
'
New Voucher added: {}
'
.
format
(
voucher_reference
))
return
voucher
def
processFile
(
file
,
converters
=
converters
,
dtype
=
dtype
):
return
pd
.
read_excel
(
file
,
converters
=
converters
,
dtype
=
dtype
)
# create voucher types
def
create_voucher_types
(
voucher_types_dict
,
study
):
voucher_types
=
{}
for
name
,
code
in
voucher_types_dict
.
items
():
voucher_type
,
_
=
VoucherType
.
objects
.
update_or_create
(
code
=
code
,
description
=
name
,
study
=
study
)
voucher_types
[
name
]
=
voucher_type
return
voucher_types
# create appointment types
def
create_appointment_types
(
assessments
):
appointmentTypes
=
[]
for
name
,
duration
in
assessments
.
items
():
code
=
filter
(
str
.
isupper
,
name
)
appointmentType
,
_
=
AppointmentType
.
objects
.
update_or_create
(
code
=
code
,
default_duration
=
duration
,
description
=
name
)
appointmentType
.
save
()
appointmentTypes
.
append
(
appointmentType
)
return
appointmentTypes
def
parse_row
(
index
,
row
,
visit_columns
,
appointmentTypes
,
voucher_types
):
# Languages
if
len
(
row
[
'
LANGUAGES
'
])
==
0
and
len
(
row
[
'
PREFERED WRITEN LANGUAGE
'
])
==
0
:
logging
.
warn
(
'
No Languages available
'
)
elif
len
(
row
[
'
LANGUAGES
'
])
==
0
and
len
(
row
[
'
PREFERED WRITEN LANGUAGE
'
])
>
0
:
row
[
'
LANGUAGES
'
]
=
row
[
'
PREFERED WRITEN LANGUAGE
'
]
elif
len
(
row
[
'
LANGUAGES
'
])
>
0
and
len
(
row
[
'
PREFERED WRITEN LANGUAGE
'
])
==
0
:
row
[
'
PREFERED WRITEN LANGUAGE
'
]
=
row
[
'
LANGUAGES
'
]
languages
=
[]
for
language
in
row
[
'
LANGUAGES
'
]:
lang
,
created
=
Language
.
objects
.
get_or_create
(
name
=
language
)
languages
.
append
(
lang
)
if
created
:
logging
.
warn
(
'
New Language added: {}
'
.
format
(
language
))
lang
.
save
()
for
language
in
row
[
'
PREFERED WRITEN LANGUAGE
'
][:
1
]:
pref_lang
,
created
=
Language
.
objects
.
get_or_create
(
name
=
language
)
if
created
:
logging
.
warn
(
'
New Language (from Prefered) added: {}
'
.
format
(
language
))
pref_lang
.
save
()
# Country
country
=
row
[
'
COUNTRY
'
]
country
,
created
=
Country
.
objects
.
get_or_create
(
name
=
country
)
if
created
:
logging
.
warn
(
'
New Country added: {}
'
.
format
(
row
[
'
COUNTRY
'
]))
country
.
save
()
# Location and Flying Team
# If no FT, then default location is CHL
ft
=
None
location
=
None
if
not
row
[
'
FLYING TEAM (FT)
'
]:
location
,
created
=
Location
.
objects
.
get_or_create
(
name
=
DEFAULT_LOCATION
)
if
created
:
logging
.
warn
(
'
New location added: {}
'
.
format
(
DEFAULT_LOCATION
))
location
.
save
()
else
:
location
,
created
=
Location
.
objects
.
get_or_create
(
name
=
'
Flying Team
'
)
if
created
:
logging
.
warn
(
'
New location added: Flying Team
'
)
location
.
save
()
# Create Flying Team
ft
,
created
=
FlyingTeam
.
objects
.
get_or_create
(
place
=
row
[
'
LOCATION OF FT
'
])
if
created
:
logging
.
warn
(
'
New Flying Team added: {}
'
.
format
(
row
[
'
LOCATION OF FT
'
]))
ft
.
save
()
# Health Partner
# create health partner (Referral)
health_partner
,
created
=
Worker
.
objects
.
get_or_create
(
name
=
row
[
'
REFERRAL
'
])
health_partner
.
roles
.
update
(
role
=
ROLE_CHOICES_HEALTH_PARTNER
)
# create workerStudyRole
workerStudyRole
,
_
=
WorkerStudyRole
.
objects
.
update_or_create
(
worker
=
health_partner
,
study_id
=
GLOBAL_STUDY_ID
,
role
=
ROLE_CHOICES_HEALTH_PARTNER
)
health_partner
.
save
()
if
created
:
logging
.
warn
(
'
New Health Partner added: {}
'
.
format
(
row
[
'
REFERRAL
'
]))
subject
,
created
=
Subject
.
objects
.
get_or_create
(
social_security_number
=
row
[
'
SS NUMBER
'
],
first_name
=
row
[
'
FIRST NAME
'
],
last_name
=
row
[
'
LAST NAME
'
],
defaults
=
{
'
social_security_number
'
:
row
[
'
SS NUMBER
'
],
'
first_name
'
:
row
[
'
FIRST NAME
'
],
'
last_name
'
:
row
[
'
LAST NAME
'
],
'
sex
'
:
row
[
'
GENDER
'
],
'
phone_number
'
:
row
[
'
PHONE NUMBER 1
'
],
'
phone_number_2
'
:
row
[
'
PHONE NUMBER 2
'
],
'
email
'
:
row
[
'
E-MAIL
'
],
'
date_born
'
:
row
[
'
DATE OF BIRTH
'
],
'
address
'
:
row
[
'
ADDRESS
'
],
'
postal_code
'
:
row
[
'
POSTAL CODE
'
],
'
city
'
:
row
[
'
CITY
'
],
'
country
'
:
country
,
'
dead
'
:
row
[
'
DECEASED
'
],
'
default_written_communication_language
'
:
pref_lang
})
subject
.
languages
.
set
(
languages
)
subject
.
save
()
if
created
:
logging
.
warn
(
'
New Subject added with SS number: {}
'
.
format
(
row
[
'
SS NUMBER
'
]))
# StudySubject
study
=
Study
.
objects
.
filter
(
id
=
GLOBAL_STUDY_ID
)[
0
]
studySubject
,
created
=
StudySubject
.
objects
.
get_or_create
(
subject
=
subject
,
nd_number
=
row
[
'
ND NUMBER
'
],
defaults
=
{
'
subject
'
:
subject
,
'
study
'
:
study
,
'
postponed
'
:
row
[
'
POSTPONED
'
],
'
nd_number
'
:
row
[
'
ND NUMBER
'
],
'
resigned
'
:
row
[
'
RESIGNED
'
],
'
resign_reason
'
:
row
[
'
REASON
'
],
'
type
'
:
SUBJECT_TYPE_CHOICES_PATIENT
,
'
excluded
'
:
row
[
'
EXCLUDED
'
],
'
exclude_reason
'
:
row
[
'
REASON.1
'
],
'
previously_in_study
'
:
row
[
'
PDP 1.0
'
],
'
comments
'
:
row
[
'
COMMENT
'
],
'
date_added
'
:
parse_column_date_of_birth
(
row
[
'
DATE ADDED (V1)
'
])
})
#all study subjects can have all voucher types
studySubject
.
voucher_types
.
set
(
voucher_types
.
values
())
studySubject
.
save
()
if
created
:
logging
.
warn
(
'
New StudySubject added with ND number: {}
'
.
format
(
row
[
'
ND NUMBER
'
]))
#VOUCHERS
voucher_references
=
row
[
'
VOUCHER REFERENCE
'
]
for
voucher_reference
in
voucher_references
:
voucher
=
add_subject_vouchers
(
voucher_reference
,
health_partner
,
voucher_types
)
# Visits
# Consider all visits as part of the same visit with multiple appointments
appointments
=
[]
appointment
=
None
'''
map(date_regex.findall gets all the dates in the strings ignoring comments such as Tel
sum(Ans, []) flattens the resulting list from the map since each findall returns a list
map to convert string to datetime
'''
visit_dates
=
map
(
lambda
x
:
datetime
.
datetime
.
strptime
(
x
,
'
%d.%m.%Y
'
),
sum
(
map
(
date_regex
.
findall
,
row
[
visit_columns
].
values
),
[]))
# get first and last elements of the sorted element
datetime_begin
,
datetime_end
=
itemgetter
(
*
[
0
,
-
1
])(
sorted
(
visit_dates
))
datetime_begin
=
datetime_begin
.
strftime
(
'
%Y-%m-%d
'
)
datetime_end
=
datetime_end
.
strftime
(
'
%Y-%m-%d
'
)
visit
,
created
=
Visit
.
objects
.
get_or_create
(
subject
=
studySubject
,
datetime_begin
=
datetime_begin
,
datetime_end
=
datetime_end
,
defaults
=
{
'
is_finished
'
:
True
})
if
created
:
logging
.
warn
(
'
New Visit added for ND number {} starting on {}
'
.
format
(
row
[
'
ND NUMBER
'
],
datetime_begin
))
appointment_types
=
appointmentTypes
[:
len
(
set
(
visit_dates
))]
#in this case appointment types are incremental
visit
.
appointment_types
.
set
(
appointment_types
)
visit
.
save
()
'''
If there are two Vx with the same date we put together the appointment types in the same appointment
'''
for
visit_date
in
set
(
visit_dates
):
datetime_when
=
visit_date
.
strftime
(
'
%Y-%m-%d
'
)
# get the indices of each occurrence of the date and use them to get
# the appointment types
appointment_types
=
itembetter
(
indexof
(
visit_date
,
visit_dates
),
appointmentTypes
)
# creatre appointment
appointment
,
_
=
Appointment
.
objects
.
update_or_create
(
visit
=
visit
,
length
=
sum
(
[
a
.
default_duration
for
a
in
appointment_types
]),
flying_team
=
ft
,
location
=
location
,
status
=
Appointment
.
APPOINTMENT_STATUS_FINISHED
,
datetime_when
=
datetime_when
)
date_when
=
visit_date
.
replace
(
hour
=
9
,
minute
=
0
,
second
=
0
,
microsecond
=
0
)
for
appointment_type
in
appointment_types
:
app_type_link
=
AppointmentTypeLink
(
appointment
=
appointment
,
date_when
=
date_when
,
appointment_type
=
appointment_type
)
date_when
+=
datetime
.
timedelta
(
minutes
=
appointment_type
.
default_duration
)
if
__name__
==
'
__main__
'
:
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
df
=
processFile
(
'
/Users/carlos.vega/ownCloud/Documents/Projects/PDP/copy.xlsx
'
)
df
[
'
Languages
'
]
=
df
[
'
Languages
'
].
apply
(
apply_column_languages
)
df
[
'
Prefered writen language
'
]
=
df
[
'
Prefered writen language
'
].
apply
(
apply_column_prefered_language
)
file
=
'
/Users/carlos.vega/ownCloud/Documents/Projects/PDP/copy.xlsx
'
df
=
pd
.
read_excel
(
file
,
dtype
=
object
)
df
=
df
.
fillna
(
''
).
astype
(
unicode
)
df
.
columns
=
[
c
.
upper
()
for
c
in
df
.
columns
]
# make transformations
for
column
,
function
in
converters
.
items
():
logging
.
warn
(
column
)
df
[
column
]
=
df
[
column
].
apply
(
function
)
# get visits columns
regex
=
re
.
compile
(
r
'
\(V\d\)
'
)
#
study
=
Study
.
objects
.
filter
(
id
=
GLOBAL_STUDY_ID
)[
0
]
#enable vouchers
study
.
columns
.
voucher_types
=
True
study
.
columns
.
vouchers
=
True
study
.
columns
.
save
()
study
.
save
()
#
visit_columns
=
filter
(
regex
.
search
,
df
.
columns
)
assessments
=
OrderedDict
([(
'
Cognitive Test
'
,
180
),
(
'
Risk Factor
'
,
120
),
(
'
Voucher Distribution
'
,
120
),
(
'
Follow Up
'
,
90
)])
appointmentTypes
=
create_appointment_types
(
assessments
)
voucher_types_dict
=
OrderedDict
([(
'
Cognitive Activity
'
,
'
CA
'
),
(
'
Neurofit
'
,
'
NF
'
),
(
'
Mobilfit
'
,
'
MF
'
),
(
'
Diet
'
,
'
D
'
),
(
'
Consulte ORL
'
,
'
CORL
'
),
(
'
Physical Activity
'
,
'
PA
'
),
(
'
Individual Cognitive Training
'
,
'
IT
'
),
(
'
Social
'
,
'
S
'
),
(
'
Test
'
,
'
T
'
)])
voucher_types
=
create_voucher_types
(
voucher_types_dict
,
study
)
# process each row
for
index
,
row
in
df
.
iterrows
():
parse_row
(
index
,
row
,
visit_columns
,
appointmentTypes
,
voucher_types
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment