diff --git a/2021/2021-04-20_IT101-DM/slides/data-housekeeping.md b/2021/2021-04-20_IT101-DM/slides/data-housekeeping.md index ab2cca61ce38875fd70e9cd34ee325d0c4160b7a..8cb4d014ea2b7732e1aa2b6913379a74021cf527 100644 --- a/2021/2021-04-20_IT101-DM/slides/data-housekeeping.md +++ b/2021/2021-04-20_IT101-DM/slides/data-housekeeping.md @@ -73,13 +73,6 @@ From Jenny Bryan by CC-BY * Separate files you are actively working from the old ones * Orient newcomers to the group's conventions -<div style="position:absolute;left:43%;top:10%"> -<img src="slides/img/folder_structure.png" height="700px"> -</div> -<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> -<a href="https://riojournal.com/article/56508/" style="color:grey; font-size:0.8em;">Foundational Practices of Research Data Management</a> -</div> - # Data housekeeping diff --git a/2021/2021-04-20_IT101-DM/slides/ingestion.md b/2021/2021-04-20_IT101-DM/slides/ingestion.md index 8e377655a556d2575809edaa63de35fe63d948ee..7d168db8c9e03e261196b436ccae1b460f49fc4e 100644 --- a/2021/2021-04-20_IT101-DM/slides/ingestion.md +++ b/2021/2021-04-20_IT101-DM/slides/ingestion.md @@ -4,34 +4,50 @@ <img src="slides/img/LCSB_storages_full.png" height="750px"> </div> -<div class='fragment' style="position:relative"> +<div class='fragment' style="position:absolute"> <img src="slides/img/LCSB_storages_personal-crossed.png" height="750px"> +</div> + +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> +</div> + + + +# Data ingestion/transfer +## Receiving and sending data -<div style="position:absolute;left:65%;top:60%"> +<img height="400px" style="position:relative;left:10%" src="slides/img/banned_exchange_channels.png"><br> +<div style="position:absolute; left:10%;width:30%"> -* Unless consortium/project has formally agreed to use a secure commercial cloud +## E-mail is not for data transfer + +* Avoid transfer of any data by e-mail +* E-mail is a poor repository +* Data can be read on passage </div> +<div class="fragment" style="left:50%; width:30%; position:absolute"> +## Exchanging data +* Share on Atlas server +* OwnCloud share (LCSB - BioCore) +* DropIt service (SIU) +* LFT (IBM Aspera) share for sensitive data +</div> </div> <div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> -<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> +<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-05" style="color:grey; font-size:0.8em;">Research Human Data Sharing Policy</a> </div> - -# Data ingestion: Transfer and Integrity - * When sending data: <font color="red">Do not use emails, use secure platforms (Cloud, Aspera, Atlas share...)!</font> - -<div class="fragment"> +# Data ingestion/transfer Data can be corrupted: - * (non-)malicious modification * faulty file transfer * disk corruption -</div> <div class="fragment"> @@ -39,8 +55,8 @@ Data can be corrupted: * disable write access to the source data * generate checksums! - -<div style="position:absolute;left:40%;top:30%"> + +<div style="position:absolute;left:40%"> <img src="slides/img/checksum.png" width="500px"> </div> </div> @@ -61,33 +77,3 @@ Data can be corrupted: <div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> <a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> </div> - - - -# Data ingestion/Integrity -## Encryption -<div class='fragment' style="position:relative;left:25%;top:60%"> -<img align="middle" height="300px" src="slides/img/encryption.png"> -</div> -<div class='fragment'> - -* Sensitive data protected by encryption -</div> -<div class='fragment'> - -* Guaranted confidentiality -</div> -<div class='fragment'> - -* Encryption key which need to be kept safe -</div> -<div class='fragment'> - -* <font color= red>Loosing your encryption key means loosing your data!</font> -</div> -<div class='fragment'> - -* Make a off-site backup of your data -</div> - - diff --git a/2021/2021-04-20_IT101-DM/slides/introduction.md b/2021/2021-04-20_IT101-DM/slides/introduction.md index fed0b0971b7d1f868f2fd8213461c724834da16a..95e30815a03c5be0f1dd409701ec0edc15d2aa73 100644 --- a/2021/2021-04-20_IT101-DM/slides/introduction.md +++ b/2021/2021-04-20_IT101-DM/slides/introduction.md @@ -24,5 +24,3 @@ Prof. Dr. Rudi Balling, director * Technicians * Administrators </div> - - diff --git a/2021/2021-04-20_IT101-DM/slides/list.json b/2021/2021-04-20_IT101-DM/slides/list.json index 17136d973bc9a81e70687fa1934d0f5b4f9c5baf..5dbc2360d7b5b8d68580868e6431ce26e8746eb1 100644 --- a/2021/2021-04-20_IT101-DM/slides/list.json +++ b/2021/2021-04-20_IT101-DM/slides/list.json @@ -1,19 +1,17 @@ [ - { "filename": "index.md" }, - { "filename": "introduction.md" }, - { "filename": "access_management.md" }, - { "filename": "data-introduction.md" }, - { "filename": "data_flow.md" }, - { "filename": "ingestion.md" }, - { "filename": "storage_setup.md" }, - { "filename": "data-housekeeping.md" }, - { "filename": "howtos.md" }, - { "filename": "reproducibility.md" }, - { "filename": "code_versioning.md" }, - { "filename": "visualization.md" }, - { "filename": "data_life_cycle.md" }, - { "filename": "problem_solving.md" }, - { "filename": "fair-principles.md" }, - { "filename": "r3_group.md" }, - { "filename": "thanks.md" } -] \ No newline at end of file + {"filename": "index.md"}, + {"filename": "introduction.md"}, + {"filename": "data-introduction.md"}, + {"filename": "data_flow.md"}, + {"filename": "ingestion.md"}, + {"filename": "storage_setup.md"}, + {"filename": "data-housekeeping.md"}, + {"filename": "howtos.md"}, + {"filename": "reproducibility.md"}, + {"filename": "code_versioning.md"}, + {"filename": "visualization.md"}, + {"filename": "problem_solving.md"}, + {"filename": "fair-principles.md"}, + {"filename": "r3_group.md"}, + {"filename": "thanks.md"} +] diff --git a/2021/2021-04-20_IT101-DM/slides/reproducibility.md b/2021/2021-04-20_IT101-DM/slides/reproducibility.md index b93079f25ad16151f5e2db41b3d55f4c8dd6cb54..a73310ceaf1031a2e79e0f55083319e52680f5cf 100644 --- a/2021/2021-04-20_IT101-DM/slides/reproducibility.md +++ b/2021/2021-04-20_IT101-DM/slides/reproducibility.md @@ -94,4 +94,3 @@ <img src="slides/img/red-cross.png" width="700px"><br> </div> </div> - diff --git a/2021/2021-04-20_IT101-DM/slides/storage_setup.md b/2021/2021-04-20_IT101-DM/slides/storage_setup.md index d321b5ccd652709e969a2027cac2d7c3a7c69a6a..c4f5e9070cec562fe4ecf14589be1a07c99c647f 100644 --- a/2021/2021-04-20_IT101-DM/slides/storage_setup.md +++ b/2021/2021-04-20_IT101-DM/slides/storage_setup.md @@ -4,8 +4,12 @@ * Regularly update your SW/OS * Encrypt movable media +### Passwords -<div class="fragment" > +* Strong passwords +* Password manager +* Safe password exchange channels +* Expiration time on password share ### Backup * take care of your own backups! @@ -20,35 +24,6 @@ <a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> </div> -</div> - -<div class="fragment"> - -### Passwords - -* Strong passwords -* Password manager -* Safe password exchange channels -* Expiration time on password share -</div> - - - -# Storage set-up -## Password exchange channels -<div style="position:relative"> -<img src="slides/img/privateBin.png" height="350px"> -</div> -<div style="position:absolute;left:65%;top:85%"> - - -* Free service provided by LSCB at <a href="https://privatebin.lcsb.uni.lu" style="color:blue; font-size:0.8em;">privatebin.lcsb.uni.lu</a> -* **LUMS** account is required -* Set expiry period -* Can expire upon first access -* Password only accessible by sender and recipient -</div> - # Storage set-up diff --git a/2021/2021-07-27_IT101-DM/slides/code_versioning.md b/2021/2021-07-27_IT101-DM/slides/code_versioning.md deleted file mode 120000 index b5c7e06fec7c70db4c46438f42df5aada0c4e3b6..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/code_versioning.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/code_versioning.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/code_versioning.md b/2021/2021-07-27_IT101-DM/slides/code_versioning.md new file mode 100644 index 0000000000000000000000000000000000000000..eccd4a189300591de6be79ffbee91fcc987c631c --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/code_versioning.md @@ -0,0 +1,33 @@ +# Code versioning +<div style="position:absolute; width:40%"> + +**git** + + * Current standard for code versioning + * Maintain versions of your code as it develops + * Local system, which does not require an online repository + * Repositories allow distributed development + +<img align="middle" height="300px" src="slides/img/Git-logo.png"> +</div> + +<div class="fragment" style="position:absolute; left:50%; width:40%""> + +**git@lcsb** +* Recommended, supported repository +* Allows tracking of issues +* Ready for continous integration - code checked on commits to the repository. +* [https://git-r3lab.uni.lu](https://git-r3lab.uni.lu) + + + **Use at LCSB** + + * All analyses code should be in a repository + * Minimally at submission of a manuscript + * Better daily + * Even better "analyses chunkwise" +</div> + +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href="https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-07" style="color:grey; font-size:0.8em;">LCSB-POL-BIC-07 Source Code Management Policy</a> +</div> diff --git a/2021/2021-07-27_IT101-DM/slides/data-housekeeping.md b/2021/2021-07-27_IT101-DM/slides/data-housekeeping.md deleted file mode 120000 index 6146c0087b6fb77fe25b41811ba800db5a2f6f45..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/data-housekeeping.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/data-housekeeping.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/data-housekeeping.md b/2021/2021-07-27_IT101-DM/slides/data-housekeeping.md new file mode 100644 index 0000000000000000000000000000000000000000..ab2cca61ce38875fd70e9cd34ee325d0c4160b7a --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/data-housekeeping.md @@ -0,0 +1,143 @@ +# Data housekeeping +## File names +<div style="display:flex; position:static; width:100%"> +<div class="fragment" data-fragment-index="0" style="position:static; width:30%"> + +### General pricinples + * Machine readable + * Human readable + * Plays well with default ordering +</div> +<div class="fragment" data-fragment-index="1" style="position:absolute; left:33%; width:30%"> + +### Separators + * No spaces + * Underscore to separate + * Hyphen to combine + +</div> +<div class="fragment" data-fragment-index="2" style="position:absolute; left:66%; width:30%"> + +### Date format follows **ISO 8601**<br> + + 2018-12-03<br> + 2018-12-06_1700 + +</div> +</div> + + +<div class="fragment" data-fragment-index="3" style="width:100%; position:static"> +<div style="position:absolute;width:55%"> +<b>Bad</b> names + +```bash + PhD-project-Jan19 alldata_final.foo + Finacial detailes BIocore 19/11/12.xls + ATACseq1Londonmapped.bam + Hlad.jez.M-L-průtoky JÃObj.z OhÅ™e-od 10-2011.xlsx +``` +</div> +<div style="position:relative;width:55%; bottom:20%; left:50%"> +<b>Good</b> names + +```bash +Iris-setosa_samples_1927-05-12.csv +PI102_Mouse12_EEG_2018-11-03_1245.tsv +Bioinfiniti_FullProposal_2018-11-15_1655.do +``` +</div> +</div> +<br> +<br> +<div class="fragment" data-fragment-index="3" style="width:100%;"> +From Jenny Bryan by CC-BY +(https://speakerdeck.com/jennybc/how-to-name-files) +</div> + + + +# Data housekeeping +## File organization +* Have folder organization conventions for your **group** + * Per Paper + * Per Study/Project + * Per Collaborator +* Keep <b>readme files</b> for data + * Title + * Date of Creation/Receipt + * Instrument or software specific information + * People involved + * Relations between multiple files/folders + +* Separate files you are actively working from the old ones +* Orient newcomers to the group's conventions + +<div style="position:absolute;left:43%;top:10%"> +<img src="slides/img/folder_structure.png" height="700px"> +</div> +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href="https://riojournal.com/article/56508/" style="color:grey; font-size:0.8em;">Foundational Practices of Research Data Management</a> +</div> + + + +# Data housekeeping +<div style="position:absolute"> + +## When working + * Clarify and separate source and intermediate data + * Keep data copies to a **minimum** + * Cleanup post-analysis + * Cleanup copies created for presentations or for sharing +</div> +<div style="position:relative;left:50%; width:40%"> +<img src="slides/img/cleaning-table.jpg" height="450px"> +</div> + + + +# Data housekeeping +## End of project + * handover data to a new responsible when leaving + * data should be kept as a single copy on server-side storage + * no copies on desktops and external devices + * non-proprietary formats + * minimal metadata + * sensitive data (e.g. whole genome) **must** be encrypted + <br/> + <br/> + * If not specified otherwise, data must be kept for **10 years** following project end for reproducibility purposes +<aside class="notes"> +Note: sometimes it is hard to find/understand dataset 10 days old +</aside> + +## In doubt on data archival? +Contact R<sup>3</sup> for support on archival of datasets using tickets: + * https://service.uni.lu/sp + * Home > Catalog > LCSB > Biocore: Application services > Request for: Support + +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-03" style="color:grey; font-size:0.8em;">Research Data Retention and Archival Policy</a> +</div> + + + +# Data housekeeping - Summary +## Server is your friend! + * Allows a consistent backup policy for your datasets + * Keeps number of copies to minimum + * Specification of clear access rights + * High accessibility + * Data are discoverable + * Server can't be stolen + +## General guidelines + * Use institutional media for storage of **all** data + * Research data (particularly sensitive data) should be in a single source location + * Enable encryption for data stored on movable media + * Clarify and separate source and intermediate data + * Disable write access to relevant source data (read-only) + * Backup research data! + * Download Anti-virus software + * Generate checksums diff --git a/2021/2021-07-27_IT101-DM/slides/data-introduction.md b/2021/2021-07-27_IT101-DM/slides/data-introduction.md deleted file mode 120000 index a38b5e87a9dad6508aabfeb86be6ee262cfe2dfc..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/data-introduction.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/data-introduction.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/data-introduction.md b/2021/2021-07-27_IT101-DM/slides/data-introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..c269d80286a4bb458eec2d5640664eeb1e718713 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/data-introduction.md @@ -0,0 +1,86 @@ +# Data and metadata +<div style="display:grid;grid-gap:100px;grid-template-columns: 40% 40%"> +<div > + +## Data + * "*information in digital form that can be transmitted or processed*" + <p align="right">-- Merriam-Webster dictionary</p> + * "*information in an electronic form that can be stored and processed by a computer*" + <p align="right">--Cambridge dictionary</p> + +</div> +<div> + +## Metadata + * data describing other data + * information that is given to describe or help you use other information + * metadata are data + * can be processed and analyzed +</div> +</div> + +<div class="fragment"> + +## Metadata examples: +<div style="position:absolute"> + <ul> + <li> LabBook </li> + <li> author/owner of the data</li> + <li> origin of the data + <li> data type + </ul> +</div> +<div style="position:absolute;left:25%"> + <ul> + <li> description of content </li> + <li> modification date </li> + <li> description of modification </li> + <li> location </li> + </ul> +</div> +<div style="position:relative;left:50%;top:0.7em"> + <ul> + <li> calibration readings</li> + <li> software/firmware version</li> + <li> data purpose</li> + <li> means of creation</li> + </ul> +</div> +</div> + +<div class="fragment"> +<br> +</center> +<center style="color:red">!Insufficient metadata make the data useless!</center> +</div> +<aside class="notes"> +Sometimes metadata collection takes more time than data collection +</aside> + + + +# LCSB research data +three categories: + * **Primary data** + * scientific data + * measurements, images, observations, notes, surveys, ... + * models, software codes, libraries, ... + * metadata directly describing the data + * data dictionaries + * format, version, coverage descriptions, ... + + * **Research record** + * description of the research process, including experiment + * experiment set-up + * followed protocols + * ... + + * **Project accompanying documentation** + * ethical approvals, information on the consent + * collaboration agreements + * intellectual property ownership + * other relevant documentation + +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right;"> +<a href="https://howto.lcsb.uni.lu/internal/policies/LCSB-POL-BIC-03/" style="color:grey; font-size:0.8em;">LCSB-POL-BIC-03 Research Data Retention and Archival Policy</a> +</div> \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/data_flow.md b/2021/2021-07-27_IT101-DM/slides/data_flow.md deleted file mode 120000 index b7a2b3166efee639efba7cbf54e7f69b9d69f53f..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/data_flow.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/data_flow.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/data_flow.md b/2021/2021-07-27_IT101-DM/slides/data_flow.md new file mode 100644 index 0000000000000000000000000000000000000000..396c1ec70825f3695e46344ac94dc20617c71c2f --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/data_flow.md @@ -0,0 +1,106 @@ +# Typical flow of data + +<div style="display:grid;grid-gap:10px;grid-template-columns: 30% 20% 30%; + grid-auto-flow:column;grid-template-rows: repeat(4,auto);position:relative;left:8%"> + +<div class="content-box fragment" data-fragment-index="1"> + <div class="box-title red">Source data</div> + <div class="content"> + + * Experimental results + * Large data sets + * Manually collected data + * External + + </div> +</div> + +<div class="content-box fragment" data-fragment-index="2"> + <div class="box-title yellow">Intermediate</div> + <div class="content"> + + * Derived data + * Tidy data + * Curated sets + + </div> +</div> + +<div class="content-box fragment" data-fragment-index="3"> + <div class="box-title blue">Analyses</div> + <div class="content"> + + * Exploratory + * Model building + * Hypothesis testing + + </div> +</div> + +<div class="content-box fragment" data-fragment-index="4"> + <div class="box-title green">Dissemination</div> + <div class="content"> + + * Manuscript, report, presentation, ... + + </div> +</div> +<center> +<img src="slides/img/data-flow_sources.png" height=60%> +</center> +<center> +<img src="slides/img/data-flow_transformation.png" height=60%> +</center> +<center> +<img src="slides/img/data-flow_chart.png" height=60%> +</center> +<center> +<img src="slides/img/data-flow_paper.png" height=60%> +</center> + +<div class="content-box fragment" data-fragment-index="5"> +<div class="box-title red">Preserve</div> + <div class="content"> + + * Version data sets + * Backup + * Protect + + </div> +</div> + +<div class="content-box fragment" data-fragment-index="6"> + <div class="box-title yellow">Reproduce</div> + <div class="content"> + + * Automate your builds + * Use workflow tools (e.g. Snakemake) + + </div> +</div> + + +<div class="content-box fragment" data-fragment-index="7"> +<div class="box-title blue">Trace</div> + <div class="content"> + + * Multiple iterations. + * Code versioning (Git) + + </div> +</div> + +<div class="content-box fragment" data-fragment-index="8"> +<div class="box-title green">Track</div> + <div class="content"> + + * Through multiple versions + + </div> +</div> + +</div> +<aside class="notes"> +flow of the data is downstream (mostly), but you are going back and forth +applies to all data (financial report, lab safety assessment) +</aside> diff --git a/2021/2021-07-27_IT101-DM/slides/data_life_cycle.md b/2021/2021-07-27_IT101-DM/slides/data_life_cycle.md index a1f1d84bd80a57b481a8c63d2aa70507731335e3..1b538d2e29fe326527e7b41394ff0258b0df2ce0 100644 --- a/2021/2021-07-27_IT101-DM/slides/data_life_cycle.md +++ b/2021/2021-07-27_IT101-DM/slides/data_life_cycle.md @@ -1,14 +1,14 @@ # Some practical recommandations -* Plan your data walking along the data life cycle +* Do your data processing according to the data life cycle steps <div class='fragment' style="position:relative;left:25%;top:60%"> <img align="middle" height="300px" src="slides/img/rdm-cycle.png"> </div> <div class="fragment"> -* Use data management tools: +* Use data management planning tools: * DMPonline <a href="https://dmponline.elixir-luxembourg.org/" style="color:blue; font-size:0.8em;">https://dmponline.elixir-luxembourg.org/</a> <img src="slides/img/dmponline_logo.png" height="50px"> * DS Wizard <a href="https://learning.ds-wizard.org/" style="color:blue; font-size:0.8em;">https://learning.ds-wizard.org/</a> diff --git a/2021/2021-07-27_IT101-DM/slides/fair-principles.md b/2021/2021-07-27_IT101-DM/slides/fair-principles.md deleted file mode 120000 index cb5bc476d4cec8f2137074d0b3f71da0653d415a..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/fair-principles.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/fair-principles.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/fair-principles.md b/2021/2021-07-27_IT101-DM/slides/fair-principles.md new file mode 100644 index 0000000000000000000000000000000000000000..be70f11a6f0b941a0e7f7b86b990a1c098096282 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/fair-principles.md @@ -0,0 +1,12 @@ +# FAIR (meta)data principles + * dates back to 2014 + * well accepted by scientific community + * necessity in data driven science + * officially embraced by EU and G20 + * required by funding agencies and journal publishers + +<center> +<img src="slides/img/fair-principles.png" height="400px"> +</center> +<br> +<br> diff --git a/2021/2021-07-27_IT101-DM/slides/howtos.md b/2021/2021-07-27_IT101-DM/slides/howtos.md deleted file mode 120000 index 4ff818a5d3cb5e82adddc255ff18c2cf4fc89ae5..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/howtos.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/howtos.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/howtos.md b/2021/2021-07-27_IT101-DM/slides/howtos.md new file mode 100644 index 0000000000000000000000000000000000000000..34998e54e7604a3ee50213dfe9650785c9ced8ed --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/howtos.md @@ -0,0 +1,6 @@ +# LCSB How-Tos +<br> +https://howto.lcsb.uni.lu/ +<center> + <img src="slides/img/howtocard.png" width="50%"> +</center> diff --git a/2021/2021-07-27_IT101-DM/slides/img b/2021/2021-07-27_IT101-DM/slides/img deleted file mode 120000 index ad68bb48c4730adc2a31dfcacfca1ae5bebc8311..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/img +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/img \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/3pillars-full.png b/2021/2021-07-27_IT101-DM/slides/img/3pillars-full.png new file mode 120000 index 0000000000000000000000000000000000000000..749a0e275e9ae1d6c335d67989125317ee6a48d8 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/3pillars-full.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/3pillars-full.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/DinoSequentialSmaller.gif b/2021/2021-07-27_IT101-DM/slides/img/DinoSequentialSmaller.gif new file mode 120000 index 0000000000000000000000000000000000000000..1cec6b3fb31b7f7c3c1a3a04c71d3be0c343ddcc --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/DinoSequentialSmaller.gif @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/DinoSequentialSmaller.gif \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/Git-logo.png b/2021/2021-07-27_IT101-DM/slides/img/Git-logo.png new file mode 120000 index 0000000000000000000000000000000000000000..ee62be3fb0054eb618a48667c6ddca3fde5b49cc --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/Git-logo.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/Git-logo.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_backed-up.png b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_backed-up.png new file mode 120000 index 0000000000000000000000000000000000000000..f7c417b69691c5a71f15b059b08cab8216d3db5f --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_backed-up.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/LCSB_storages_backed-up.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_backup.png b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_backup.png new file mode 120000 index 0000000000000000000000000000000000000000..87637b1fcf17e6367c1cee4013682143bb2443aa --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_backup.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/LCSB_storages_backup.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_full.png b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_full.png new file mode 120000 index 0000000000000000000000000000000000000000..260f14cef8f5023868ba5d1a869020107cd6a580 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_full.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/LCSB_storages_full.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_personal-crossed.png b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_personal-crossed.png new file mode 120000 index 0000000000000000000000000000000000000000..4a418faa133976e0e582560fa7ca38ccc9650093 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/LCSB_storages_personal-crossed.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/LCSB_storages_personal-crossed.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/R3_profile_pictures b/2021/2021-07-27_IT101-DM/slides/img/R3_profile_pictures new file mode 120000 index 0000000000000000000000000000000000000000..4468ce0577c9ede76f3aad89ff95c422b3b43d03 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/R3_profile_pictures @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/R3_profile_pictures \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/access.png b/2021/2021-07-27_IT101-DM/slides/img/access.png new file mode 100644 index 0000000000000000000000000000000000000000..cd93e7f70a6d3cd4cdca2272e4d161215c5ba335 Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/access.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/banned_exchange_channels.png b/2021/2021-07-27_IT101-DM/slides/img/banned_exchange_channels.png new file mode 120000 index 0000000000000000000000000000000000000000..70d74b515464b7e517093eeb65394b1f46eaf531 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/banned_exchange_channels.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/banned_exchange_channels.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/checksum.png b/2021/2021-07-27_IT101-DM/slides/img/checksum.png new file mode 120000 index 0000000000000000000000000000000000000000..3896df65916c1bd2be8867e3e105ad14771c3e75 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/checksum.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/checksum.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/cleaning-table.jpg b/2021/2021-07-27_IT101-DM/slides/img/cleaning-table.jpg new file mode 120000 index 0000000000000000000000000000000000000000..db0fa0e61145e281011cf2ed44b741818a8f40e2 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/cleaning-table.jpg @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/cleaning-table.jpg \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/code-example.png b/2021/2021-07-27_IT101-DM/slides/img/code-example.png new file mode 120000 index 0000000000000000000000000000000000000000..665cd8f1622b51147db401cfffff8ce5ceaae1c5 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/code-example.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/code-example.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/data-flow_chart.png b/2021/2021-07-27_IT101-DM/slides/img/data-flow_chart.png new file mode 120000 index 0000000000000000000000000000000000000000..1b4a4ca51216007724d1e41872c152457f271e18 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/data-flow_chart.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/data-flow_chart.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/data-flow_paper.png b/2021/2021-07-27_IT101-DM/slides/img/data-flow_paper.png new file mode 120000 index 0000000000000000000000000000000000000000..da12f4563fe3795416fd67208e0af536752b87bd --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/data-flow_paper.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/data-flow_paper.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/data-flow_sources.png b/2021/2021-07-27_IT101-DM/slides/img/data-flow_sources.png new file mode 120000 index 0000000000000000000000000000000000000000..8879a37ab94497a4210bbc08a480bbb091553291 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/data-flow_sources.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/data-flow_sources.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/data-flow_transformation.png b/2021/2021-07-27_IT101-DM/slides/img/data-flow_transformation.png new file mode 120000 index 0000000000000000000000000000000000000000..0c70946a15c6e66959826827a4fa3c01eb10848d --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/data-flow_transformation.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/data-flow_transformation.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/dmponline_logo.png b/2021/2021-07-27_IT101-DM/slides/img/dmponline_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..37ee4d6cc72a718e9dd1c99027c744d82ea3bb2d Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/dmponline_logo.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/dsw_logo.png b/2021/2021-07-27_IT101-DM/slides/img/dsw_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..353253f89e95e9bdd2cc5fe5fb3fc5dc95b1a82a Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/dsw_logo.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/encryption.png b/2021/2021-07-27_IT101-DM/slides/img/encryption.png new file mode 100644 index 0000000000000000000000000000000000000000..15e6b084f7bd8cd7e2e856e47509b32c280697af Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/encryption.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/excel_analyses-sheet.jpeg b/2021/2021-07-27_IT101-DM/slides/img/excel_analyses-sheet.jpeg new file mode 120000 index 0000000000000000000000000000000000000000..d77fff5d9f1e3dd27edbe0bd2e03ff39953e667e --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/excel_analyses-sheet.jpeg @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/excel_analyses-sheet.jpeg \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/excel_data-sheet.png b/2021/2021-07-27_IT101-DM/slides/img/excel_data-sheet.png new file mode 120000 index 0000000000000000000000000000000000000000..69cebdf2b4855bef71da01c4a376a8d9cf5a575a --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/excel_data-sheet.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/excel_data-sheet.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/fair-principles.png b/2021/2021-07-27_IT101-DM/slides/img/fair-principles.png new file mode 120000 index 0000000000000000000000000000000000000000..002ee86186db99a66fc4e4710d73f1a75b938ab8 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/fair-principles.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/fair-principles.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/favicon.ico b/2021/2021-07-27_IT101-DM/slides/img/favicon.ico new file mode 120000 index 0000000000000000000000000000000000000000..ef58feaef2df4b5cc84ce54353556dcfd5863401 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/favicon.ico @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/favicon.ico \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/folder_structure.png b/2021/2021-07-27_IT101-DM/slides/img/folder_structure.png new file mode 100644 index 0000000000000000000000000000000000000000..812d582c32374d21f98159fdfedbfc981688b3a0 Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/folder_structure.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/howtocard.png b/2021/2021-07-27_IT101-DM/slides/img/howtocard.png new file mode 120000 index 0000000000000000000000000000000000000000..12fc285da97bbb0aad85e8afbd464d074a53b96c --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/howtocard.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/howtocard.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/lums.png b/2021/2021-07-27_IT101-DM/slides/img/lums.png new file mode 100644 index 0000000000000000000000000000000000000000..3173510a11a0b6fff89d82e2e3c22fae92ff6816 Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/lums.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/plot-data.png b/2021/2021-07-27_IT101-DM/slides/img/plot-data.png new file mode 120000 index 0000000000000000000000000000000000000000..ee26c4a603e6a09cb5887d1bae05939ac4b9d4e6 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/plot-data.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/plot-data.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/privateBin.png b/2021/2021-07-27_IT101-DM/slides/img/privateBin.png new file mode 100644 index 0000000000000000000000000000000000000000..b79412bbe344a3b0352c61102426e4e6de7a7433 Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/privateBin.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/r3-training-logo.png b/2021/2021-07-27_IT101-DM/slides/img/r3-training-logo.png new file mode 120000 index 0000000000000000000000000000000000000000..cde5ac94110a9e1ef923289d8c4d7c5e4cbeb2b8 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/r3-training-logo.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/r3-training-logo.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/rdm-cycle.png b/2021/2021-07-27_IT101-DM/slides/img/rdm-cycle.png new file mode 100644 index 0000000000000000000000000000000000000000..7ed174c35f7a36c5a376725a8f2fa9b757327874 Binary files /dev/null and b/2021/2021-07-27_IT101-DM/slides/img/rdm-cycle.png differ diff --git a/2021/2021-07-27_IT101-DM/slides/img/red-cross.png b/2021/2021-07-27_IT101-DM/slides/img/red-cross.png new file mode 120000 index 0000000000000000000000000000000000000000..0ae82e444e06be1a7d513b52fe869a13f2441e3c --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/red-cross.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/red-cross.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/reproducibility_nature.png b/2021/2021-07-27_IT101-DM/slides/img/reproducibility_nature.png new file mode 120000 index 0000000000000000000000000000000000000000..b112ef9beb325d3e7a10173c0381889644b427e3 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/reproducibility_nature.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/reproducibility_nature.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/rudi_balling.jpg b/2021/2021-07-27_IT101-DM/slides/img/rudi_balling.jpg new file mode 120000 index 0000000000000000000000000000000000000000..e3f0571c956ef95bcdb91cce545604e800dee976 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/rudi_balling.jpg @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/rudi_balling.jpg \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/scripts b/2021/2021-07-27_IT101-DM/slides/img/scripts new file mode 120000 index 0000000000000000000000000000000000000000..bd2e3f8b0f4f6d58ffe3741284d1175bd5b8f6bb --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/scripts @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/scripts \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/undraw_secure_server_s9u8.png b/2021/2021-07-27_IT101-DM/slides/img/undraw_secure_server_s9u8.png new file mode 120000 index 0000000000000000000000000000000000000000..d70265f26d47f676feab2820d731082dde885bd1 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/undraw_secure_server_s9u8.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/undraw_secure_server_s9u8.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/img/wordcloud.png b/2021/2021-07-27_IT101-DM/slides/img/wordcloud.png new file mode 120000 index 0000000000000000000000000000000000000000..413644040e4b13e1f3f2cda1f62fdba2f224cec2 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/img/wordcloud.png @@ -0,0 +1 @@ +../../../2021-04-20_IT101-DM/slides/img/wordcloud.png \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/ingestion.md b/2021/2021-07-27_IT101-DM/slides/ingestion.md deleted file mode 120000 index 57dfd695c79678991180d8e7cbd8a899098989ec..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/ingestion.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/ingestion.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/ingestion.md b/2021/2021-07-27_IT101-DM/slides/ingestion.md new file mode 100644 index 0000000000000000000000000000000000000000..383467f0027ee1671425311aa48254ff9207ea4d --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/ingestion.md @@ -0,0 +1,90 @@ +# Data housekeeping +## Available data storage +<div class='fragment' style="position:absolute"> +<img src="slides/img/LCSB_storages_full.png" height="750px"> +</div> + +<div class='fragment' style="position:relative"> +<img src="slides/img/LCSB_storages_personal-crossed.png" height="750px"> + +<div style="position:absolute;left:65%;top:60%"> + +* Unless consortium/project has formally agreed to use a secure commercial cloud + +</div> + +</div> + +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> +</div> + + + + +# Data ingestion: Transfer and Integrity + * When sending data: <font color="red">Do not use emails, use secure platforms (Cloud, Aspera, Atlas share...)!</font> + +<div class="fragment"> +Data can be corrupted: + + * (non-)malicious modification + * faulty file transfer + * disk corruption +</div> + +<div class="fragment"> + +### Solution + + * disable write access to the source data + * generate checksums! + +<div style="position:absolute;left:40%;top:30%"> +<img src="slides/img/checksum.png" width="500px"> +</div> +</div> + +<div class="fragment" style="position:relative; left:0%"> + + +## When to generate checksums? +* before data transfer + - new dataset from collaborator + - upload to remote repository + +* long term storage + - master version of dataset + - snapshot of data for publication +</div> + +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> +</div> + + + +# Data ingestion/Integrity +## Encryption +<div class='fragment' style="position:relative;left:25%;top:60%"> +<img align="middle" height="300px" src="slides/img/encryption.png"> +</div> + +<div class='fragment'> + +* Guaranted confidentiality +</div> +<div class='fragment'> + +* Encryption key need to be kept safe +</div> +<div class='fragment'> + +* <font color= red>Loosing your encryption key means loosing your data!</font> +</div> +<div class='fragment'> + +* Make a off-site backup of your data +</div> + + diff --git a/2021/2021-07-27_IT101-DM/slides/introduction.md b/2021/2021-07-27_IT101-DM/slides/introduction.md deleted file mode 120000 index f4837a3cd8c480b8e28ff81a28ba9d69228b8524..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/introduction.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/introduction.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/introduction.md b/2021/2021-07-27_IT101-DM/slides/introduction.md new file mode 100644 index 0000000000000000000000000000000000000000..fed0b0971b7d1f868f2fd8213461c724834da16a --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/introduction.md @@ -0,0 +1,28 @@ +# Introduction +<div class="fragment" style="position:absolute"> +<img height="450px" src="slides/img/wordcloud.png"><br> + +## Learning objectives + + * How to manage your data + * How to look and analyze your data + * Solving issues with computers + * Reproduciblity in the research data life cycle + +</div> +<div class="fragment" style="position:relative;left:50%; width:40%"> +<div > +<center> +<img height="405px" src="slides/img/rudi_balling.jpg"><br> +Prof. Dr. Rudi Balling, director +</center> +</div> + +## Pertains to practically all people at LCSB + * Scientists + * PhD candidates + * Technicians + * Administrators +</div> + + diff --git a/2021/2021-07-27_IT101-DM/slides/list.json b/2021/2021-07-27_IT101-DM/slides/list.json deleted file mode 120000 index df07566dc8dc33d5b4f759a23c0abbac0d2ea3f8..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/list.json +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/list.json \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/list.json b/2021/2021-07-27_IT101-DM/slides/list.json new file mode 100644 index 0000000000000000000000000000000000000000..17136d973bc9a81e70687fa1934d0f5b4f9c5baf --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/list.json @@ -0,0 +1,19 @@ +[ + { "filename": "index.md" }, + { "filename": "introduction.md" }, + { "filename": "access_management.md" }, + { "filename": "data-introduction.md" }, + { "filename": "data_flow.md" }, + { "filename": "ingestion.md" }, + { "filename": "storage_setup.md" }, + { "filename": "data-housekeeping.md" }, + { "filename": "howtos.md" }, + { "filename": "reproducibility.md" }, + { "filename": "code_versioning.md" }, + { "filename": "visualization.md" }, + { "filename": "data_life_cycle.md" }, + { "filename": "problem_solving.md" }, + { "filename": "fair-principles.md" }, + { "filename": "r3_group.md" }, + { "filename": "thanks.md" } +] \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/overview.md b/2021/2021-07-27_IT101-DM/slides/overview.md deleted file mode 120000 index 50f8000fb02fa74861ecd6e1f32132f206276317..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/overview.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/overview.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/overview.md b/2021/2021-07-27_IT101-DM/slides/overview.md new file mode 100644 index 0000000000000000000000000000000000000000..6f9143c7c64fa539eb44c3ea5ff9eec076701879 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/overview.md @@ -0,0 +1,25 @@ +## Overview + +0. Introduction - learning objectives + targeted audience +1. Data workflow +1. Ingestion: + * receiving/sending/sharing data + * file naming + * checksums + * backup + 1. making data tidy + * what is table + * + 1. Learning to code workflows and analyses - excel files, coding + 1. Code versioning and reproducibility + 1. Visualization + * see the data + 1. problem solving + * guide + * rubberducking + * google for help + * oracle + 1. R3 team + 1. Acknowledgment + + 1. data minimization diff --git a/2021/2021-07-27_IT101-DM/slides/reproducibility.md b/2021/2021-07-27_IT101-DM/slides/reproducibility.md deleted file mode 120000 index 8f6e4d1f386bc7a0ca5a6939191cc7a5b7b50f89..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/reproducibility.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/reproducibility.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/reproducibility.md b/2021/2021-07-27_IT101-DM/slides/reproducibility.md new file mode 100644 index 0000000000000000000000000000000000000000..a21df4faf8d6981a4911ac5060a78717c2656b53 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/reproducibility.md @@ -0,0 +1,97 @@ +# Reproducibility +* ensures credibility +* key requirement for follow-up and collaborative studies + +<div style="position:absolute"> +<img src="slides/img/reproducibility_nature.png" height="650px"> +</div> + +<div class="fragment" style="position:relative;left:50%"> + +## Why is our workflow not reproducible? + + Lack of provenance: + * Input data downloaded from “some website†+ * Copy & paste operations + * Manual text entry + * Analysis not coded + +</div> + + + +# Reproducibility +## Learning to code workflows and analyses +<div style="display:inline-grid;grid-gap: 40px;grid-template-columns: auto auto;position:relative;left:12%"> +<div class="fragment"> +<div class="content-box"> +<div class="box-title red">Spreadsheets alone</div> +<div class="content"> + + * Is great for looking at data. + * Data entry is fast. + * Analysis flow is hidden and not in focus. + +</div> +</div> +<div style="text-align:center"> +<img src="slides/img/excel_data-sheet.png" height="280px"> +</div> +</div> + +<div class="fragment"> +<div class="content-box"> +<div class="box-title">Coding</div> +<div class="content"> + + * Is great for controlling analysis + * Data is hidden. + * Flow is visible. +</div> +</div> +<img src="slides/img/code-example.png" height="280px"> +</div> +</div> + +<div class="content-box fragment" style="left:15%;width:60%;position:relative"> +<div class="box-title green">Develop data science skills</div> +<div class="content"> + + * Develop good data management and analysis habits. + * Start coding your analysis within spreadsheets. + * Make yourself familiar with a statistics environment such as R, Python or Matlab + * No need to learn a high level programming language such as C++ or Java. + +</div> +</div> + +</div> + + + +# Table +<div style="position:absolute"> +"Tabular format of data" + +### Header + + * one line! + * **good** names of columns + +### Rows + * represent observations/entities + +### Columns + * represent property of the observations + * one data type +</div> +<div style="left:50%; position:relative; top:-2em"> +<img src="slides/img/excel_data-sheet.png" width="700px"> +<div class="fragment" data-fragment-index="3" style="position:absolute"> +<img src="slides/img/excel_analyses-sheet.jpeg" width="700px"><br> +</div> +<div class="fragment" data-fragment-index="4" style="position:relative"> +<img src="slides/img/red-cross.png" width="700px"><br> +</div> +</div> + diff --git a/2021/2021-07-27_IT101-DM/slides/storage_setup.md b/2021/2021-07-27_IT101-DM/slides/storage_setup.md deleted file mode 120000 index 9fe2d27f633b272789cb2dd57b3d963895e2643d..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/storage_setup.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/storage_setup.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/storage_setup.md b/2021/2021-07-27_IT101-DM/slides/storage_setup.md new file mode 100644 index 0000000000000000000000000000000000000000..d321b5ccd652709e969a2027cac2d7c3a7c69a6a --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/storage_setup.md @@ -0,0 +1,79 @@ +# Storage set-up + +* Download Anti-virus software +* Regularly update your SW/OS +* Encrypt movable media + + +<div class="fragment" > + +### Backup + * take care of your own backups! + * don't work on your backup copy! + * minimum is <b>3-2-1 backup rule</b> + + +<div style="position:absolute;right:10%;top:10%"> +<img src="slides/img/undraw_secure_server_s9u8.png" height="750px"> +</div> +<div style="position:absolute; width:45%; left:50%; top:28em; text-align:right"> +<a href=" https://howto.lcsb.uni.lu/?policies:LCSB-POL-BIC-02" style="color:grey; font-size:0.8em;">Data Storage and Backup Policy</a> +</div> + +</div> + +<div class="fragment"> + +### Passwords + +* Strong passwords +* Password manager +* Safe password exchange channels +* Expiration time on password share +</div> + + + +# Storage set-up +## Password exchange channels +<div style="position:relative"> +<img src="slides/img/privateBin.png" height="350px"> +</div> +<div style="position:absolute;left:65%;top:85%"> + + +* Free service provided by LSCB at <a href="https://privatebin.lcsb.uni.lu" style="color:blue; font-size:0.8em;">privatebin.lcsb.uni.lu</a> +* **LUMS** account is required +* Set expiry period +* Can expire upon first access +* Password only accessible by sender and recipient +</div> + + + +# Storage set-up +## Backup - Central IT/LCSB +<div style="position:relative"> +<img src="slides/img/LCSB_storages_backed-up.png" height="750px"> +</div> +<div style="position:absolute;left:65%;top:60%"> + +Server administrators take care of: +* server backups +* LCSB OwnCloud backups +* group/application server backups (not always) + +</div> + + + +# Storage set-up +## Backup - personal research data +<div style="position:relative"> +<img src="slides/img/LCSB_storages_backup.png" height="750px"> +</div> +<div style="position:absolute;left:55%;top:70%"> + +<font color="red">One version should reside on Atlas!</font> + +</div> diff --git a/2021/2021-07-27_IT101-DM/slides/thanks.md b/2021/2021-07-27_IT101-DM/slides/thanks.md deleted file mode 120000 index 85dc36b89cdbfa538b1619e7b429499db2196fac..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/thanks.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/thanks.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/thanks.md b/2021/2021-07-27_IT101-DM/slides/thanks.md new file mode 100644 index 0000000000000000000000000000000000000000..0db75ff6af3e6c451f3171b3faf2c558d7392f61 --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/thanks.md @@ -0,0 +1,42 @@ +# Thank you.<sup> </sup> +<center><img src="slides/img/r3-training-logo.png" height="200px"></center> +<br> +<br> +<br> +<br> +<center> +Contact us if you need help: + +<a href="mailto:lcsb-r3@uni.lu">lcsb-r3@uni.lu</a> +</center> +<div style="position:absolute"> +Links: + +HowTo Cards / Policies: https://howto.lcsb.uni.lu/ + +Course Slides: https://courses.lcsb.uni.lu/ + +Internal Presentations: https://presentations.lcsb.uni.lu/ + +LCSB GitLab: https://gitlab.lcsb.uni.lu/ + +HPC: https://hpc.uni.lu/ + +Service Portal: https://service.uni.lu/sp + +LCSB intranet: https://intranet.uni.lux +</div> +<div style="position:relative;top:1.5em;left:55%;width:45%"> +Avalable SW and tools: +<div style="margin-left: 20px;"> +SIU managed: + +  - Service Portal > All Catalogs > IT > Softwares +</div> +<div style="margin-left: 20px;"> +LCSB managed: + +  - Service Portal > Knowledge > FAQ - Corporate Software\ +  - LCSB intranet > Science tab > Tools +</div> +</div> diff --git a/2021/2021-07-27_IT101-DM/slides/visualization.md b/2021/2021-07-27_IT101-DM/slides/visualization.md deleted file mode 120000 index d946af686d2d755178f317b134b6f102f7d52e0a..0000000000000000000000000000000000000000 --- a/2021/2021-07-27_IT101-DM/slides/visualization.md +++ /dev/null @@ -1 +0,0 @@ -../../2021-04-20_IT101-DM/slides/visualization.md \ No newline at end of file diff --git a/2021/2021-07-27_IT101-DM/slides/visualization.md b/2021/2021-07-27_IT101-DM/slides/visualization.md new file mode 100644 index 0000000000000000000000000000000000000000..78847e73a297f77e3753623d7e546dcf499d4e2f --- /dev/null +++ b/2021/2021-07-27_IT101-DM/slides/visualization.md @@ -0,0 +1,21 @@ +# Visualization +<center> + +**Plot your data!** +<figure> + <img src="slides/img/DinoSequentialSmaller.gif" height="500px"> + <blockquote>"never trust summary statistics alone; always visualize your data"</blockquote> + <figcaption>--Alberto Cairo</figcaption> +</figure> +</center> + + + +# Visualization +<center> + +**Plot your data!** +<figure> + <img src="slides/img/plot-data.png" height="800px"> +</figure> +</center>