Commits (13)
......@@ -35,12 +35,14 @@ class BaseImporter:
publish_on_import=False,
exit_on_error=False,
verbose=False,
validate=True
validate=True,
skip_on_exist=True
):
self.verbose = verbose
self.publish_on_import = publish_on_import
self.exit_on_error = exit_on_error
self.validate = validate
self.skip_on_exist = skip_on_exist
@property
def json_schema_validator(self):
......@@ -143,7 +145,7 @@ class BaseImporter:
if self.exit_on_error:
raise e
result = False
self.logger.debug(f'Successfully imported item: {item_name}')
return result
def process_json(self, import_dict):
......
......@@ -7,7 +7,7 @@ from core.models.data_declaration import ConsentStatus, DeidentificationMethod,
ShareCategory, SubjectCategory
from core.models.storage_location import StorageLocationCategory, DataLocation
from core.models.use_restriction import USE_RESTRICTION_CHOICES
from django.db.models import Count
class DatasetsImporter(BaseImporter):
"""
......@@ -33,6 +33,9 @@ class DatasetsImporter(BaseImporter):
try:
dataset = get_dataset(elu_accession = dataset_dict.get('external_id', None), title=title)
title_to_show = title.encode('utf8')
if self.skip_on_exist:
self.logger.warning(f"Dataset with title '{title_to_show}' already found. The update will be skipped")
return True
self.logger.warning(f"Dataset with title '{title_to_show}' already found. It will be updated.")
if dataset.is_published:
raise DatasetImportError(data=f'Updating published entity is not supported - dataset: "{dataset.title}".')
......@@ -424,48 +427,47 @@ class DatasetsImporter(BaseImporter):
"""
This should be called after data-declarations have been processed
(they rely on data-declaration's acronyms to be properly imported)
"""
legal_basis_obj = LegalBasis.objects.filter(
dataset=dataset_object,
remarks=legal_basis.get('legal_basis_notes', '')
) # Note: at this point we can have 0, 1 or more LegalBasis objects
data_declaration_titles = legal_basis.get('data_declarations', [])
data_declarations = [DataDeclaration.objects.get(title=title, dataset=dataset_object) for title in data_declaration_titles]
"""
datasets_legal_bases = LegalBasis.objects.filter(
dataset=dataset_object,
remarks=legal_basis.get('legal_basis_notes', '')
)
# get only those legal bases with matching data types and basis codes
legal_basis_types_titles = legal_basis.get('legal_basis_codes', [])
legal_basis_types = [LegalBasisType.objects.get(code=code) for code in legal_basis_types_titles]
personal_data_types_titles = legal_basis.get('personal_data_codes', [])
personal_data_types = [PersonalDataType.objects.get(code=code) for code in personal_data_types_titles]
if len(legal_basis_obj) == 0:
datasets_legal_bases = datasets_legal_bases.annotate(data_types_count=Count('personal_data_types'),
basis_types_count=Count('legal_basis_types')).filter(
data_types_count=len(personal_data_types),
basis_types_count=len(legal_basis_types)
)
# get only those with same data types
for personal_data_type in personal_data_types:
datasets_legal_bases = datasets_legal_bases.filter(personal_data_types=personal_data_type)
# get only those with same basis types
for legal_basis_type in legal_basis_types:
datasets_legal_bases = datasets_legal_bases.filter(legal_basis_types=legal_basis_type)
data_declaration_titles = legal_basis.get('data_declarations', [])
data_declarations = [DataDeclaration.objects.get(title=title, dataset=dataset_object) for title in data_declaration_titles]
if len(datasets_legal_bases) == 0:
legal_basis_obj = LegalBasis.objects.create(
dataset=dataset_object,
remarks=legal_basis.get('legal_basis_notes', '')
)
elif len(legal_basis_obj) == 1:
legal_basis_obj = legal_basis_obj[0]
legal_basis_obj.data_declarations.set(data_declarations)
legal_basis_obj.legal_basis_types.set(legal_basis_types)
legal_basis_obj.personal_data_types.set(personal_data_types)
legal_basis_obj.save()
else:
# Try looking for the correct LegalBasis
legal_basis_obj = LegalBasis.objects.filter(
dataset=dataset_object,
remarks=legal_basis.get('legal_basis_notes', ''),
data_declarations__in=data_declarations
)
if len(legal_basis_obj) == 1:
legal_basis_obj = legal_basis_obj[0]
else:
legal_basis_obj = LegalBasis.objects.create(
dataset=dataset_object,
remarks=legal_basis.get('legal_basis_notes', '')
)
legal_basis_obj.data_declarations.set(data_declarations)
legal_basis_obj.legal_basis_types.set(legal_basis_types)
legal_basis_obj.personal_data_types.set(personal_data_types)
legal_basis_obj.save()
# Do not add data declaration to global legal basis
legal_basis_obj = datasets_legal_bases[0]
if len(legal_basis_obj.data_declarations.all()) > 0:
for data_declaration in data_declarations:
legal_basis_obj.data_declarations.add(data_declaration)
return legal_basis_obj
......@@ -489,7 +491,7 @@ class DatasetsImporter(BaseImporter):
else:
cohort = Cohort.objects.create(title=name)
cohort.description = description
cohort.comments = description
cohort.ethics_confirmation = has_ethics_approval
cohort.ethics_notes = ethics_approval_notes
cohort.cohort_web_page = url
......
......@@ -28,6 +28,9 @@ class PartnersImporter(BaseImporter):
try:
partner = get_partner(elu_accession=partner_dict.get('external_id', None),
name=partner_dict.get('name'))
if self.skip_on_exist:
self.logger.warning(f'Partner with name \""{partner_dict.get("name")}"\" already found. It will be skipped.')
return True
self.logger.warning(
f'Partner with name \""{partner_dict.get("name")}"\" already found. It will be updated.')
if partner.is_published:
......@@ -46,6 +49,7 @@ class PartnersImporter(BaseImporter):
partner.updated = True
if self.publish_on_import:
self.publish_object(partner)
return True
@staticmethod
def process_sector_category(partner_dict):
......
......@@ -70,6 +70,9 @@ class ProjectsImporter(BaseImporter):
)
else:
acronym_to_show = acronym.encode('utf8')
if self.skip_on_exist:
self.logger.warning(f"Project with acronym '{acronym_to_show}' already found. It will be skipped.")
return True
self.logger.warning(f"Project with acronym '{acronym_to_show}' already found. It will be updated.")
project.title = name
project.description = description
......
......@@ -27,6 +27,12 @@ class ImportBaseCommand(BaseCommand):
help="Entities are published on import. The identifiers are taken from JSON file or generated automatically if missing.",
dest='publish_on_import'
)
parser.add_argument(
'--skip-on-exist',
action='store_true',
help="When a matching entity already exists in database it is updated by default (all attributes present in JSON are updated). Use this flag to skip entity import on match (update of sub-entities is also skipped)",
dest='skip_on_exist'
)
parser.add_argument(
'--verbose',
action='store_true',
......@@ -53,15 +59,14 @@ class ImportBaseCommand(BaseCommand):
path_to_json_file = options.get('file')
validate = not(options.get('no_validation'))
path_to_json_directory = options.get('directory')
skip_on_exist = options.get('skip_on_exist')
importer = self.get_importer(
publish_on_import = publish_on_import,
exit_on_error = exit_on_error,
verbose = verbose,
validate = validate
)
validate = validate,
skip_on_exist = skip_on_exist )
if not(path_to_json_directory or path_to_json_file):
raise CommandError('Either directory (--directory) or file (--file) argument must be specified!')
......@@ -86,9 +91,8 @@ class ImportBaseCommand(BaseCommand):
publish_on_import=False,
exit_on_error=False,
verbose=False,
validate=True
validate=True,
skip_on_exist=False
):
raise NotImplementedError("Abstract method: Implement this method in the child class.")
......
......@@ -10,6 +10,7 @@ class Command(ImportBaseCommand):
publish_on_import=False,
exit_on_error=False,
verbose=False,
validate=True
validate=True,
skip_on_exist=False
):
return DatasetsImporter(publish_on_import, exit_on_error, verbose, validate)
return DatasetsImporter(publish_on_import, exit_on_error, verbose, validate, skip_on_exist)
......@@ -10,6 +10,7 @@ class Command(ImportBaseCommand):
publish_on_import=False,
exit_on_error=False,
verbose=False,
validate=True
validate=True,
skip_on_exist=False
):
return PartnersImporter(publish_on_import, exit_on_error, verbose, validate)
return PartnersImporter(publish_on_import, exit_on_error, verbose, validate, skip_on_exist)
......@@ -10,6 +10,7 @@ class Command(ImportBaseCommand):
publish_on_import=False,
exit_on_error=False,
verbose=False,
validate=True
validate=True,
skip_on_exist=False
):
return ProjectsImporter(publish_on_import, exit_on_error, verbose, validate)
return ProjectsImporter(publish_on_import, exit_on_error, verbose, validate, skip_on_exist)