Skip to content
Snippets Groups Projects
Commit d33e0a5b authored by Piotr Gawron's avatar Piotr Gawron
Browse files

functionality to remove duplicates added

parent 7d59df2f
No related branches found
No related tags found
1 merge request!12Duplicates removal
Showing
with 238 additions and 19 deletions
package smash.appointment.parse;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
public class DuplicateRemoveParser {
private SubjectDao subjectDao;
public void removeDuplicates(String filename) throws IOException {
try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
String line;
while ((line = br.readLine()) != null) {
String tmp[] = line.split("\t");
Subject subject = subjectDao.getByScreeningNumber(tmp[0]);
if (subject == null) {
throw new InvalidArgumentException("Cannot find subject with id: " + tmp[0]);
}
for (int i = 1; i < tmp.length; i++) {
Subject duplicate = subjectDao.getByScreeningNumber(tmp[i]);
if (duplicate == null) {
throw new InvalidArgumentException("Cannot find subject with id: " + tmp[i]);
}
subjectDao.removeDuplicate(subject, duplicate, "DUPLICATES: " + tmp[0] + ", " + tmp[i]);
}
}
}
}
/**
* @return the subjectDao
* @see #subjectDao
*/
public SubjectDao getSubjectDao() {
return subjectDao;
}
/**
* @param subjectDao
* the subjectDao to set
* @see #subjectDao
*/
public void setSubjectDao(SubjectDao subjectDao) {
this.subjectDao = subjectDao;
}
}
package smash.appointment.parse;
public class InvalidArgumentException extends RuntimeException {
public InvalidArgumentException(String string) {
super(string);
}
/**
*
*/
private static final long serialVersionUID = 1L;
}
......@@ -101,6 +101,18 @@ public class LihControlParser extends SubjectParser {
@Override
protected String parseCountry(Row row) {
String zipCode = parseZipCode(row);
if (zipCode!=null) {
if (zipCode.startsWith("L")) {
return "Luxembourg";
} else if (zipCode.startsWith("D")){
return "Germany";
} else if (zipCode.startsWith("F")){
return "France";
} else if (zipCode.startsWith("B")){
return "Belgium";
}
}
return "";
}
......
package smash.appointment.parse;
import java.text.SimpleDateFormat;
import java.io.IOException;
import java.util.Calendar;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
......@@ -31,6 +29,7 @@ public class Main {
Option lihControls = Option.builder().required().argName("file").hasArg().desc("LIH controls").longOpt("lih-controls").build();
Option lihMappingControls = Option.builder().required().argName("file").hasArg().desc("LIH controls mapping").longOpt("lih-mapping").build();
Option redCap = Option.builder().required().argName("file").hasArg().desc("RedCap appointments").longOpt("red-cap").build();
Option duplicates = Option.builder().required().argName("file").hasArg().desc("duplicates").longOpt("duplicates").build();
options.addOption(agenda);
options.addOption(subjects);
options.addOption(controls);
......@@ -38,6 +37,7 @@ public class Main {
options.addOption(lihControls);
options.addOption(lihMappingControls);
options.addOption(redCap);
options.addOption(duplicates);
CommandLineParser parser = new DefaultParser();
try {
......@@ -70,6 +70,9 @@ public class Main {
.addSubject(subject, "[" + lihControlsFile + ";" + subject.getScreeningNumber() + ";" + subject.getName() + " " + subject.getSurname() + "]");
}
String duplicatesFile = line.getOptionValue("duplicates");
removeDuplicates(duplicatesFile);
subjectDao.addSubject(Visit.UNKNOWN, "");
String agendaFile = line.getOptionValue("agenda");
......@@ -86,7 +89,7 @@ public class Main {
System.out.println("delete from web_appointment;");
System.out.println("delete from web_visit;");
System.out.println("delete from web_subject;");
SubjectSqlExporter subjectSqlExporter = new SubjectSqlExporter();
// logger.debug("SUBJECTS: ");
for (Subject subject : subjectDao.getSubjects()) {
......@@ -107,7 +110,13 @@ public class Main {
}
}
private List<AppointmentEntry> processRedCapAppointments(String agendaFile) throws Exception{
private void removeDuplicates(String duplicatesFile) throws IOException {
DuplicateRemoveParser parser = new DuplicateRemoveParser();
parser.setSubjectDao(subjectDao);
parser.removeDuplicates(duplicatesFile);
}
private List<AppointmentEntry> processRedCapAppointments(String agendaFile) throws Exception {
RedcapParser parser = new RedcapParser();
parser.setSubjectDao(subjectDao);
return parser.parse(agendaFile);
......
......@@ -289,8 +289,8 @@ public class Subject {
public void setPhone1(String phone1) {
if (phone1 != null && phone1.length() > 20) {
logger.warn("Invalid phone. Ignoring: " + phone1);
} else {
this.phone1 = phone1;
} else if (phone1 != null) {
this.phone1 = phone1.replace(",", "");
}
}
......@@ -310,8 +310,8 @@ public class Subject {
public void setPhone2(String phone2) {
if (phone2 != null && phone2.length() > 20) {
logger.warn("Invalid phone. Ignoring: " + phone2);
} else {
this.phone2 = phone2;
} else if (phone2 != null) {
this.phone2 = phone2.replace(",", "");
}
}
......@@ -331,8 +331,8 @@ public class Subject {
public void setPhone3(String phone3) {
if (phone3 != null && phone3.length() > 20) {
logger.warn("Invalid phone. Ignoring: " + phone3);
} else {
this.phone3 = phone3;
} else if (phone3!=null){
this.phone3 = phone3.replace(",", "");
}
}
......@@ -477,11 +477,11 @@ public class Subject {
setAddDate(getMergedValue("addDate", this.getAddDate(), subject.getAddDate(), errorPrefix));
setmPowerId(getMergedValue("mPowerId", this.getmPowerId(), subject.getmPowerId(), errorPrefix));
setType(getMergedValue("type", this.getType(), subject.getType(), errorPrefix));
setResigned(this.isResigned()|| subject.isResigned());
setDead(this.isDead()|| subject.isDead());
setPostponed(this.isPostponed()|| subject.isPostponed());
setResigned(this.isResigned() || subject.isResigned());
setDead(this.isDead() || subject.isDead());
setPostponed(this.isPostponed() || subject.isPostponed());
// override only when to be seen by flying team
if (subject.getToBeSeenAt().equals("F")) {
if (subject.getToBeSeenAt()!=null && subject.getToBeSeenAt().equals("F")) {
setToBeSeenAt(subject.getToBeSeenAt());
}
addLanguages(subject.getLanguages());
......@@ -579,7 +579,8 @@ public class Subject {
}
/**
* @param postponed the postponed to set
* @param postponed
* the postponed to set
* @see #postponed
*/
public void setPostponed(boolean postponed) {
......
......@@ -84,4 +84,28 @@ public class SubjectDao {
}
}
public void removeDuplicate(Subject subject1, Subject subject2, String errorPrefix) {
String screeningNumber1 = subject1.getScreeningNumber().trim();
String screeningNumber2 = subject2.getScreeningNumber().trim();
String screeningNumber = null;
if (screeningNumber1.toLowerCase().contains(screeningNumber2.toLowerCase())) {
screeningNumber = screeningNumber1;
} else if (screeningNumber2.toLowerCase().contains(screeningNumber1.toLowerCase())) {
screeningNumber = screeningNumber2;
} else {
screeningNumber = screeningNumber1 + "; " + screeningNumber2;
}
subject1.setScreeningNumber(screeningNumber);
subject2.setScreeningNumber(screeningNumber);
if (!subject1.getNdNumber().trim().isEmpty() && !subject2.getNdNumber().trim().isEmpty()
&& !subject1.getNdNumber().trim().equals(subject2.getNdNumber().trim())) {
throw new InvalidArgumentException("Two different nd numbers: " + subject1.getNdNumber() + ", " + subject2.getNdNumber());
}
String remarks = (subject1.getRemarks()+"\n"+subject2.getRemarks()).trim();
subject1.setRemarks(remarks);
subject1.update(subject2, errorPrefix);
subjects.remove(subject2);
}
}
#Set root logger 's level and its appender to an appender called CONSOLE which is defined below.
log4j.rootLogger=fatal, CONSOLE
log4j.rootLogger=CONSOLE, R
#Set the behavior of the CONSOLE appender
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.Threshold=FATAL
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%d %5p [%t] (%F:%L) - %m%n
#log4j.appender.CONSOLE.layout.ConversionPattern=%m%n
#Set the behavior of the FILE appender
log4j.appender.R=org.apache.log4j.FileAppender
log4j.appender.R.File=log.txt
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%d %5p [%t] (%F:%L) - %m%n
log4j.appender.R.Threshold=DEBUG
......@@ -7,6 +7,7 @@ import org.junit.runners.Suite.SuiteClasses;
@RunWith(Suite.class)
@SuiteClasses({ AppointmentDaoTest.class, //
CellParserTest.class, //
DuplicateRemoveParserTest.class, //
LihControlMappingParserTest.class, //
LihControlParserTest.class, //
PrcControlParserTest.class, //
......
package smash.appointment.parse;
import static org.junit.Assert.*;
import java.io.IOException;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.Test;
public class DuplicateRemoveParserTest {
@AfterClass
public static void tearDownAfterClass() throws Exception {
}
@Before
public void setUp() throws Exception {
}
@After
public void tearDown() throws Exception {
}
@Test
public void test() throws IOException {
DuplicateRemoveParser parser = new DuplicateRemoveParser();
SubjectDao subjectDao = new SubjectDao();
subjectDao.addSubject(new Subject("First", "Last", "","P-113"), null);
subjectDao.addSubject(new Subject("First", "Last", "","P-114"), null);
subjectDao.addSubject(new Subject("A", "B", "","P-115"), null);
subjectDao.addSubject(new Subject("C", "D", "","P-116"), null);
assertEquals(4, subjectDao.getSubjects().size());
parser.setSubjectDao(subjectDao);
parser.removeDuplicates("testFiles/duplicates.txt");
assertEquals(2, subjectDao.getSubjects().size());
}
}
......@@ -49,7 +49,7 @@ public class LihControlParserTest extends TestBase {
assertEquals("11, Rue blabla", subject.getAddress());
assertEquals("L-3322", subject.getZipCode());
assertEquals("Luxembourg", subject.getCity());
assertEquals("", subject.getCountry());
assertEquals("Luxembourg", subject.getCountry());
assertEquals("123456789", subject.getPhone1());
assertEquals("321654", subject.getPhone2());
assertEquals("", subject.getPhone3());
......
package smash.appointment.parse;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import org.junit.After;
import org.junit.AfterClass;
......@@ -29,4 +31,58 @@ public class SubjectDaoTest {
assertEquals("Piotr", subjectDao.getSubjects().get(0).getName());
}
@Test
public void testDuplicateRem() throws Exception {
Subject subject1 = new Subject("A", "B", "C", "D");
subject1.setToBeSeenAt("");
Subject subject2 = new Subject("A", "B", "", "E");
subject2.setToBeSeenAt("");
SubjectDao subjectDao = new SubjectDao();
subjectDao.addSubject(subject1, null);
subjectDao.addSubject(subject2, null);
subjectDao.removeDuplicate(subject1, subject2, null);
assertEquals(1, subjectDao.getSubjects().size());
assertTrue(subject1.getScreeningNumber().contains("D"));
assertTrue(subject1.getScreeningNumber().contains("E"));
assertEquals("C", subject1.getNdNumber());
}
@Test
public void testDuplicateRem2() throws Exception {
Subject subject1 = new Subject("A", "B", "C", "D");
subject1.setToBeSeenAt("");
Subject subject2 = new Subject("A", "B", "", "E");
subject2.setToBeSeenAt("");
SubjectDao subjectDao = new SubjectDao();
subjectDao.addSubject(subject1, null);
subjectDao.addSubject(subject2, null);
subjectDao.removeDuplicate(subject2, subject1, null);
assertEquals(1, subjectDao.getSubjects().size());
assertTrue(subject2.getScreeningNumber().contains("D"));
assertTrue(subject2.getScreeningNumber().contains("E"));
assertEquals("C", subject2.getNdNumber());
}
@Test
public void testDuplicateRem3() throws Exception {
try {
Subject subject1 = new Subject("A", "B", "C", "D");
subject1.setToBeSeenAt("");
Subject subject2 = new Subject("A", "B", "X", "E");
subject2.setToBeSeenAt("");
SubjectDao subjectDao = new SubjectDao();
subjectDao.addSubject(subject1, null);
subjectDao.addSubject(subject2, null);
subjectDao.removeDuplicate(subject2, subject1, null);
fail("Exception expected");
} catch (InvalidArgumentException e) {
}
}
}
P-113 P-114
P-115 P-116
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment