From f74f08a02ca4a141845781a170496d02938c1f5a Mon Sep 17 00:00:00 2001 From: Piotr Gawron <piotr.gawron@uni.lu> Date: Mon, 6 Mar 2017 15:14:52 +0100 Subject: [PATCH] parser for LIH calendar data --- .../parse/AppointmentTypeCollection.java | 2 +- .../smash/appointment/parse/CellParser.java | 2 +- .../parse/RedcapCalendarParser.java | 148 ++++++++++++++++++ .../smash/appointment/parse/SubjectDao.java | 2 +- .../parse/RedcapCalendarParserTest.java | 76 +++++++++ .../testFiles/redcap_calendar.sql | 68 ++++++++ 6 files changed, 295 insertions(+), 3 deletions(-) create mode 100644 appointment-import/src/main/java/smash/appointment/parse/RedcapCalendarParser.java create mode 100644 appointment-import/src/test/java/smash/appointment/parse/RedcapCalendarParserTest.java create mode 100644 appointment-import/testFiles/redcap_calendar.sql diff --git a/appointment-import/src/main/java/smash/appointment/parse/AppointmentTypeCollection.java b/appointment-import/src/main/java/smash/appointment/parse/AppointmentTypeCollection.java index c4fdcc15..63797a4d 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/AppointmentTypeCollection.java +++ b/appointment-import/src/main/java/smash/appointment/parse/AppointmentTypeCollection.java @@ -48,7 +48,7 @@ public enum AppointmentTypeCollection { new String[] { "evel B" }), // LEVEL_B_M_POWER(new AppointmentType[] { AppointmentType.LEVEL_B_M_POWER }, // new String[] { "mPower" }), // - OTHER(new AppointmentType[] {}, // + OTHER(new AppointmentType[] {AppointmentType.OTHER}, // new String[] {}), // ; private String[] queryStrings; diff --git a/appointment-import/src/main/java/smash/appointment/parse/CellParser.java b/appointment-import/src/main/java/smash/appointment/parse/CellParser.java index 3780a800..a3fcd104 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/CellParser.java +++ b/appointment-import/src/main/java/smash/appointment/parse/CellParser.java @@ -54,7 +54,7 @@ public class CellParser { return result; } - private AppointmentTypeCollection extractType(String query) { + public AppointmentTypeCollection extractType(String query) { String simplifiedQuery = Utils.simplifyString(query); diff --git a/appointment-import/src/main/java/smash/appointment/parse/RedcapCalendarParser.java b/appointment-import/src/main/java/smash/appointment/parse/RedcapCalendarParser.java new file mode 100644 index 00000000..ac392d98 --- /dev/null +++ b/appointment-import/src/main/java/smash/appointment/parse/RedcapCalendarParser.java @@ -0,0 +1,148 @@ +package smash.appointment.parse; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.apache.log4j.Logger; + +public class RedcapCalendarParser { + Logger logger = Logger.getLogger(RedcapCalendarParser.class); + + private SubjectDao subjectDao; + + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd"); + + public List<AppointmentEntry> parse(String filename, Calendar minDate) throws FileNotFoundException, IOException { + List<AppointmentEntry> result = new ArrayList<>(); + try (BufferedReader br = new BufferedReader(new FileReader(filename))) { + String line; + while ((line = br.readLine()) != null) { + if (!line.startsWith("INSERT INTO")) { + continue; + } + String tmp[] = line.substring(line.indexOf("(")).split("\\),\\(", -1); + for (String string : tmp) { + AppointmentEntry entry = processEntry(string, minDate); + if (entry != null) { + result.add(entry); + } + } + } + } + return result; + } + + private AppointmentEntry processEntry(String string, Calendar minDate) { + AppointmentEntry result = new AppointmentEntry(); + if (string.startsWith("(")) { + string = string.substring(1); + } + if (string.endsWith(")")) { + string = string.substring(0, string.length() - 1); + } + string = string.replaceAll("\\\\'", "__quota__"); + string = string.replaceAll("'", "\""); + string = string.replaceAll("__quota__", "'"); + String fields[] = string.split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1); + String ndNumber = fields[1].replaceAll("\"", ""); + String day = fields[6].replaceAll("\"", ""); + String time = fields[7].replaceAll("\"", ""); + + String query = fields[10].replaceAll("\"", ""); + + if (query.equals("") || query.equals("NULL")) { + return null; + } + + if (minDate != null) { + if (day.compareTo(dateFormatter.format(minDate.getTime())) < 0) { + return null; + } + } + if (ndNumber.equals("NDtest_internal") || ndNumber.equals("NDtest_external")) { + return null; + } + + Subject subject = null; + if (!ndNumber.equalsIgnoreCase("NULL")) { + subject = subjectDao.getByNdNumber(ndNumber); + if (subject == null) { + logger.warn("Cannot find subject with nd number: " + ndNumber); + } + } + if (subject == null) { + subject = findSubject(query); + } + if (subject != null && !subject.getToBeSeenAt().equalsIgnoreCase("LIH")) { + return null; + } + result.setDay(day); + result.setTime(time); + result.setSource("From redcap: " + query); + result.setSubject(subject); + result.addTypes(getTypes(query)); + if (result.getTypes().contains(AppointmentType.OTHER)) { + logger.warn("Cannot find types for: " + query); + } + return result; + } + + CellParser cellParser = new CellParser(); + + private List<AppointmentType> getTypes(String query) { + List<AppointmentType> result = new ArrayList<>(); + AppointmentTypeCollection collection = cellParser.extractType(query); + if (collection == null) { + int index = query.indexOf("_"); + if (index >= 0) { + query = query.substring(index); + if (query.startsWith("_lev a_")) { + collection = AppointmentTypeCollection.LEVEL_A; + } + } + if (collection == null) { + collection = AppointmentTypeCollection.OTHER; + } + } + for (AppointmentType appointmentType : collection.getTypes()) { + result.add(appointmentType); + } + + return result; + } + + private Subject findSubject(String query) { + String id = query.split("_")[0]; + id = "L-" + id; + Subject result = subjectDao.getByScreeningNumber(id); + if (result == null) { + logger.warn("Cannot find subject for query: " + query); + } + return result; + } + + /** + * @return the subjectDao + * @see #subjectDao + */ + public SubjectDao getSubjectDao() { + return subjectDao; + } + + /** + * @param subjectDao + * the subjectDao to set + * @see #subjectDao + */ + public void setSubjectDao(SubjectDao subjectDao) { + this.subjectDao = subjectDao; + } +} diff --git a/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java b/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java index efe0000b..3cbfc396 100644 --- a/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java +++ b/appointment-import/src/main/java/smash/appointment/parse/SubjectDao.java @@ -30,7 +30,7 @@ public class SubjectDao { } } - private Subject getByScreeningNumber(String screeningNumber) { + Subject getByScreeningNumber(String screeningNumber) { for (Subject s : subjects) { if (screeningNumber.equals(s.getScreeningNumber())) { return s; diff --git a/appointment-import/src/test/java/smash/appointment/parse/RedcapCalendarParserTest.java b/appointment-import/src/test/java/smash/appointment/parse/RedcapCalendarParserTest.java new file mode 100644 index 00000000..012ffcff --- /dev/null +++ b/appointment-import/src/test/java/smash/appointment/parse/RedcapCalendarParserTest.java @@ -0,0 +1,76 @@ +package smash.appointment.parse; + +import static org.junit.Assert.*; + +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; + +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.Test; + +public class RedcapCalendarParserTest extends TestBase{ + RedcapCalendarParser parser = new RedcapCalendarParser(); + + @AfterClass + public static void tearDownAfterClass() throws Exception { + } + + @Before + public void setUp() { + super.setUp(); + parser.setSubjectDao(subjectDao); + List<Subject> subjects = new ArrayList<>(); + for (int count =0 ;count<10000;count++) { + String nd = count+""; + while (nd.length()<4) { + nd = "0"+nd; + } + Subject subject = new Subject("name "+nd, "surname"+nd, "ND"+nd, count+""); + + String screening = nd; + if (screening.startsWith("0")) { + screening = screening.substring(1); + } + subject.setScreeningNumber("P-"+screening); + subject.setToBeSeenAt("PRC"); + if (count>=3000) { + screening=screening.substring(1); + subject.setScreeningNumber("L-"+screening); + subject.setToBeSeenAt("LIH"); + } + if (count>=7000) { + subject.setScreeningNumber("F-"+nd); + subject.setToBeSeenAt("FLYING TEAM"); + } + subjects.add(subject); + } + subjectDao.setSubjects(subjects); + } + + @After + public void tearDown() throws Exception { + } + + @Test + public void testParse() throws Exception{ + List<AppointmentEntry> result = parser.parse("testFiles/redcap_calendar.sql", null); + assertEquals(2, result.size()); + } + + @Test + public void test() throws Exception{ + List<AppointmentEntry> result = parser.parse("c:/Users/piotr.gawron/Desktop/tmp/prc/redcap_events_calendar.sql", Calendar.getInstance()); + } + + @Test + public void testParse2() throws Exception{ + Calendar future =Calendar.getInstance(); + future.set(Calendar.YEAR, 4000); + List<AppointmentEntry> result = parser.parse("testFiles/redcap_calendar.sql", future); + assertEquals(0, result.size()); + } + +} diff --git a/appointment-import/testFiles/redcap_calendar.sql b/appointment-import/testFiles/redcap_calendar.sql new file mode 100644 index 00000000..c6586541 --- /dev/null +++ b/appointment-import/testFiles/redcap_calendar.sql @@ -0,0 +1,68 @@ +-- MySQL dump 10.13 Distrib 5.5.54, for debian-linux-gnu (x86_64) +-- +-- Host: localhost Database: redcap +-- ------------------------------------------------------ +-- Server version 5.5.54-0ubuntu0.14.04.1 + +/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; +/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; +/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; +/*!40101 SET NAMES utf8 */; +/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */; +/*!40103 SET TIME_ZONE='+00:00' */; +/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */; +/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */; +/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */; +/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */; + +-- +-- Table structure for table `redcap_events_calendar` +-- + +DROP TABLE IF EXISTS `redcap_events_calendar`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `redcap_events_calendar` ( + `cal_id` int(10) NOT NULL AUTO_INCREMENT, + `record` varchar(50) COLLATE utf8_unicode_ci DEFAULT NULL, + `project_id` int(10) DEFAULT NULL, + `event_id` int(10) DEFAULT NULL, + `baseline_date` date DEFAULT NULL, + `group_id` int(10) DEFAULT NULL, + `event_date` date DEFAULT NULL, + `event_time` varchar(5) COLLATE utf8_unicode_ci DEFAULT NULL COMMENT 'HH:MM', + `event_status` int(2) DEFAULT NULL COMMENT 'NULL=Ad Hoc, 0=Due Date, 1=Scheduled, 2=Confirmed, 3=Cancelled, 4=No Show', + `note_type` int(2) DEFAULT NULL, + `notes` text COLLATE utf8_unicode_ci, + `extra_notes` text COLLATE utf8_unicode_ci, + PRIMARY KEY (`cal_id`), + KEY `event_id` (`event_id`), + KEY `group_id` (`group_id`), + KEY `project_date` (`project_id`,`event_date`), + KEY `project_record` (`project_id`,`record`), + CONSTRAINT `redcap_events_calendar_ibfk_1` FOREIGN KEY (`event_id`) REFERENCES `redcap_events_metadata` (`event_id`) ON DELETE CASCADE ON UPDATE CASCADE, + CONSTRAINT `redcap_events_calendar_ibfk_2` FOREIGN KEY (`group_id`) REFERENCES `redcap_data_access_groups` (`group_id`) ON DELETE SET NULL ON UPDATE CASCADE, + CONSTRAINT `redcap_events_calendar_ibfk_3` FOREIGN KEY (`project_id`) REFERENCES `redcap_projects` (`project_id`) ON DELETE CASCADE ON UPDATE CASCADE +) ENGINE=InnoDB AUTO_INCREMENT=2107 DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci COMMENT='Calendar Data'; +/*!40101 SET character_set_client = @saved_cs_client */; + +-- +-- Dumping data for table `redcap_events_calendar` +-- + +LOCK TABLES `redcap_events_calendar` WRITE; +/*!40000 ALTER TABLE `redcap_events_calendar` DISABLE KEYS */; +INSERT INTO `redcap_events_calendar` VALUES (7,NULL,12,NULL,NULL,NULL,'2015-05-06','08:00',NULL,NULL,'Patient, Level A',NULL),(8,NULL,12,NULL,NULL,NULL,'2015-05-06','12:00',NULL,NULL,'Patient Level A',NULL),(1336,'ND0333',12,41,'2016-10-27',NULL,'2018-09-07','',0,NULL,'',NULL); +/*!40000 ALTER TABLE `redcap_events_calendar` ENABLE KEYS */; +UNLOCK TABLES; +/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */; + +/*!40101 SET SQL_MODE=@OLD_SQL_MODE */; +/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */; +/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */; +/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; +/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */; +/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */; +/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */; + +-- Dump completed on 2017-03-03 15:52:47 -- GitLab