Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
scheduling-system
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Wiki
Requirements
External wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SMASCH
scheduling-system
Commits
c31c62da
Commit
c31c62da
authored
6 years ago
by
Carlos Vega
Browse files
Options
Downloads
Patches
Plain Diff
WIP: import file for pdp project
parent
db04d75d
No related branches found
No related tags found
1 merge request
!178
Feature/import data pdp
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
smash/import_file.py
+155
-0
155 additions, 0 deletions
smash/import_file.py
with
155 additions
and
0 deletions
smash/import_file.py
0 → 100644
+
155
−
0
View file @
c31c62da
import
os
import
django
from
django.conf
import
settings
os
.
environ
.
setdefault
(
"
DJANGO_SETTINGS_MODULE
"
,
"
smash.settings
"
)
django
.
setup
()
import
pandas
as
pd
import
numpy
as
np
import
logging
import
datetime
import
string
from
web.models.constants
import
SEX_CHOICES
,
SEX_CHOICES_MALE
,
SEX_CHOICES_FEMALE
from
web.algorithm
import
VerhoeffAlgorithm
,
LuhnAlgorithm
from
web.utils
import
is_valid_social_security_number
'''
# Things that *could* aready be on the database:
- Language
- Country
- A subject with the same SS number
- A subject with the same ND number
- Referals (Health Partner)
- Location
- Flying Team
# Columns to be transformed to a standard format
- Gender
- Language
- Prefered writen language
- SS number
- Date of birth
- Date added (V1)
- ... (V2)
- ... (V3)
- ... (V4)
Boolean
- Deceased
- Postponed
- Resigned
- Excluded
- PDP 1.0
'''
'''
Column Converter Functions
'''
# in converters dict
def
parse_column_date_of_birth
(
date
):
return
datetime
.
datetime
.
strptime
(
date
,
'
%d.%m.%Y
'
).
strftime
(
'
%Y-%m-%d
'
)
gender_table
=
{
'
m
'
:
SEX_CHOICES_MALE
,
'
f
'
:
SEX_CHOICES_FEMALE
}
def
parse_column_gender
(
gender
):
try
:
return
gender_table
[
gender
.
lower
()]
except
:
return
None
def
parse_column_ss_number
(
ss
):
ss
=
ss
.
replace
(
'
'
,
''
)
if
not
ss
.
isdigit
():
return
None
if
len
(
ss
)
==
11
:
logging
.
debug
(
'
Old SS number: |{}|
'
.
format
(
ss
))
eleventh_digit
=
VerhoeffAlgorithm
.
calculate_verhoeff_check_sum
(
ss
[
:
11
])
twelfth_digit
=
LuhnAlgorithm
.
calculate_luhn_checksum
(
ss
[:
11
])
ss
=
ss
+
eleventh_digit
+
twelfth_digit
# we revalidate the old ss too after we append the digits
if
len
(
ss
)
==
13
:
if
not
is_valid_social_security_number
(
ss
):
logging
.
debug
(
'
Invalid SS number: |{}|
'
.
format
(
ss
))
return
None
else
:
logging
.
debug
(
'
Invalid SS number: (Length not valid) |{}|
'
.
format
(
ss
))
return
ss
return
ss
# not in coverters dict
language_table
=
{
'
L
'
:
'
Luxembourgish
'
,
'
LT
'
:
'
Lithuanian
'
,
'
IT
'
:
'
Italian
'
,
'
F
'
:
'
French
'
,
'
D
'
:
'
German
'
,
'
E
'
:
'
English
'
,
'
P
'
:
'
Portuguese
'
,
'
A
'
:
'
Arabic
'
,
'
SP
'
:
'
Spanish
'
}
language_translation_table
=
{
# deletions
ord
(
u
'
)
'
):
None
,
ord
(
u
'
(
'
):
None
,
ord
(
u
'
'
):
None
,
# replacements
ord
(
u
'
,
'
):
u
'
;
'
}
def
apply_column_prefered_language
(
languages
):
return
apply_column_languages
(
languages
)[:
1
]
def
apply_column_languages
(
languages
):
if
type
(
languages
)
!=
float
:
# replacements and transformations
languages
=
unicode
(
languages
).
upper
().
translate
(
language_translation_table
)
new_list
=
[]
for
language
in
languages
.
split
(
'
;
'
):
if
language
in
language_table
:
language
=
language_table
[
language
]
new_list
.
append
(
language
)
return
np
.
array
(
new_list
)
else
:
logging
.
debug
(
'
Parse Languages: Empty or invalid Languages: |{}|
'
.
format
(
languages
))
return
np
.
array
([])
converters
=
{
'
Date of birth
'
:
parse_column_date_of_birth
,
'
Gender
'
:
parse_column_gender
,
'
SS Number
'
:
parse_column_ss_number
}
dtype
=
{
'
Languages
'
:
object
,
'
Prefered writen language
'
:
object
}
def
processFile
(
file
,
converters
=
converters
,
dtype
=
dtype
):
return
pd
.
read_excel
(
file
,
converters
=
converters
,
dtype
=
dtype
)
if
__name__
==
'
__main__
'
:
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
df
=
processFile
(
'
/Users/carlos.vega/ownCloud/Documents/Projects/PDP/copy.xlsx
'
)
df
[
'
Languages
'
]
=
df
[
'
Languages
'
].
apply
(
apply_column_languages
)
df
[
'
Prefered writen language
'
]
=
df
[
'
Prefered writen language
'
].
apply
(
apply_column_prefered_language
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment