Skip to content
Snippets Groups Projects
Commit 57d2483d authored by Francois Ancien's avatar Francois Ancien
Browse files

Adding scripts to aggregate data and edc-client references

parent bb906a13
No related branches found
No related tags found
No related merge requests found
...@@ -28,8 +28,9 @@ inputs: ...@@ -28,8 +28,9 @@ inputs:
outputs: outputs:
- id: final - id: final
type: stdout type: File
doc: "The aggregated values from all partial data" doc: "The aggregated values from all partial data"
outputBinding:
glob: avg-ages.json
stdout: stdout.txt baseCommand: ["aggregate_central"]
baseCommand: ["python", "aggregate-central.py"]
...@@ -7,7 +7,7 @@ doc: "Task to pull data from EDC" ...@@ -7,7 +7,7 @@ doc: "Task to pull data from EDC"
hints: hints:
- class: DockerRequirement - class: DockerRequirement
dockerPull: gitlab.lcsb.uni.lu:4567/luca.bolzani/iderha-test-deployment/test-aggregate-remote dockerPull: gitlab.lcsb.uni.lu:4567/luca.bolzani/iderha-test-deployment/edc-client
inputs: inputs:
- id: input - id: input
...@@ -16,8 +16,9 @@ inputs: ...@@ -16,8 +16,9 @@ inputs:
position: 1 position: 1
outputs: outputs:
- id: datalink datalink:
type: stdout type: File
outputBinding:
glob: aggregated-ages.json
stdout: stdout.txt baseCommand: ["/app/edc_client.sh"]
baseCommand: ["aggregate_remote"]
FROM repomanager.lcsb.uni.lu:9999/python:3.9 FROM repomanager.lcsb.uni.lu:9999/python:3.9
LABEL authors="francois.ancien" LABEL authors="francois.ancien"
WORKDIR /
COPY aggregate-central.py /aggregate-central.py COPY aggregate-central.py /aggregate-central.py
COPY aggregate-central.sh /usr/local/bin/aggregate_central
RUN chmod +x /usr/local/bin/aggregate_central
ENTRYPOINT ["/usr/bin/bash"] ENTRYPOINT ["/usr/bin/bash"]
#!/usr/bin/env python3.9 #!/usr/bin/env python3.9
def main(): import argparse
print("Hello world! From central node!") import json
import sys
def main(args=None):
if args is None:
args = sys.argv[1:]
parser = argparse.ArgumentParser()
parser.add_argument("infiles", metavar="Input", type=str, nargs="+", help="PATH to an input file")
args = parser.parse_args(args)
total_sum = 0.0
total_count = 0.0
for file_path in args.infiles:
with open(file_path, "r") as f:
data = json.load(f)
total_sum += data["sum"]
total_count += data["count"]
with open("avg-ages.json", "w") as f:
json.dump({"avg": total_sum / total_count}, f)
if __name__ == "__main__": if __name__ == "__main__":
main() sys.exit(main())
#!/bin/bash
python /aggregate-central.py $1
\ No newline at end of file
FROM repomanager.lcsb.uni.lu:9999/python:3.9 FROM repomanager.lcsb.uni.lu:9999/python:3.9
LABEL authors="francois.ancien" LABEL authors="francois.ancien"
RUN pip install requests --no-cache RUN pip install --no-cache requests
WORKDIR / WORKDIR /
COPY aggregate-remote.py /aggregate-remote.py COPY aggregate-remote.py /aggregate-remote.py
COPY aggregate-remote.sh /usr/local/bin/aggregate_remote COPY aggregate-remote.sh /usr/local/bin/aggregate_remote
......
#!/usr/bin/env python #!/usr/bin/env python
import argparse import argparse
import requests import json
from ftplib import FTP
import sys import sys
...@@ -9,20 +10,47 @@ def main(args=None): ...@@ -9,20 +10,47 @@ def main(args=None):
if args is None: if args is None:
args = sys.argv[1:] args = sys.argv[1:]
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("-i", "--input", help="URL to input file") parser.add_argument("-i", "--input", help="PATH to input file")
parsed = parser.parse_args(args) parsed = parser.parse_args(args)
res = requests.get(parsed.input) data = None
if not res.ok: ftp_path = None
issue = res.text with open(parsed.input, "r") as f:
raise requests.HTTPError(f"Error {res.status_code}: {issue}") try:
data = json.load(f)
try: ftp_path = data["FTP_DATA_ADDRESS"]
data = res.json() except json.JSONDecodeError as e:
print(data) raise json.JSONDecodeError(f"Impossible to parse data in {parsed.input}. Not a valid json.") from e
except requests.JSONDecodeError:
print(f"Issue with data in {res.text}. Not a valid json") # Getting data from ftp source
ftp_host = "http://ftp-upload-service/api/ftp/upload"
ftp = FTP(ftp_host)
ftp.login(user="ftp_iderha_user", passwd="ftp_iderha_pass")
tmp_path = "tmp_data.json"
with open(tmp_path, "wb") as fh:
ftp.retrbinary(ftp_path, fh.write)
# Extracting data from json
with open(tmp_path, "rb") as f:
try:
data = json.load(f)
except json.JSONDecodeError as e:
raise json.JSONDecodeError(f"Impossible to parse data in {tmp_path}. Not a valid json.") from e
# Calculating avg username lengths
sum_usrname = 0
for row in data:
try:
sum_usrname += len(row["username"])
except (AttributeError, KeyError):
continue
count_rows = len(data)
# Saving
with open("aggregated-ages.json", "w") as f:
json.dump({"sum": sum_usrname, "count": count_rows}, f)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -10,5 +10,6 @@ ...@@ -10,5 +10,6 @@
"EDC_PROVIDER_PROTOCOL_URL": "http://edc-provider:19194/protocol", "EDC_PROVIDER_PROTOCOL_URL": "http://edc-provider:19194/protocol",
"EDC_PROVIDER_PUBLIC_URL": "http://edc-provider:19291/public", "EDC_PROVIDER_PUBLIC_URL": "http://edc-provider:19291/public",
"EDR_ENDPOINT_URL": "http://edc-consumer:29191/api/edr/query", "EDR_ENDPOINT_URL": "http://edc-consumer:29191/api/edr/query",
"DATA_DESTINATION_URL": "http://ftp-upload-service/api/ftp/upload" "DATA_DESTINATION_URL": "http://ftp-upload-service/api/ftp/upload",
"FTP_DATA_ADDRESS": "/home/ftp_iderha_user/isst-data.json"
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment