Skip to content
Snippets Groups Projects
Commit ab6b12b3 authored by Piotr Gawron's avatar Piotr Gawron
Browse files

simple java parser for agenda (for import purpose)

parent 29eec2e8
No related branches found
No related tags found
1 merge request!1Appointments dev
Showing
with 799 additions and 0 deletions
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
/target/
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>appointment-import</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.7
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>smash</groupId>
<artifactId>appointment-import</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<name>appointment-import</name>
<url>http://maven.apache.org</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.17</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.15</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.5.1</version>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<mainClass>smash.appointment.parse.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
</plugins>
</build>
</project>
package smash.appointment.parse;
public class AppointmentEntry {
private String day;
private String time;
private Subject subject;
private AppointmentType type;
private String source;
/**
* @return the time
* @see #time
*/
public String getTime() {
return time;
}
/**
* @param time
* the time to set
* @see #time
*/
public void setTime(String time) {
this.time = time;
}
/**
* @return the subject
* @see #subject
*/
public Subject getSubject() {
return subject;
}
/**
* @param subject
* the subject to set
* @see #subject
*/
public void setSubject(Subject subject) {
this.subject = subject;
}
/**
* @return the type
* @see #type
*/
public AppointmentType getType() {
return type;
}
/**
* @param type
* the type to set
* @see #type
*/
public void setType(AppointmentType type) {
this.type = type;
}
/**
* @return the day
* @see #day
*/
public String getDay() {
return day;
}
/**
* @param day
* the day to set
* @see #day
*/
public void setDay(String day) {
this.day = day;
}
/**
* @return the source
* @see #source
*/
public String getSource() {
return source;
}
/**
* @param source
* the source to set
* @see #source
*/
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return day + " " + time + " " + subject + " " + type + "\t\t[source: " + source + "]";
}
}
package smash.appointment.parse;
public enum AppointmentType {
//most complex should be first
LEVEL_BV_BG_SB(new String[] { "evel BV + BG + SB","BV + BG + SB" }), //
LEVEL_BV_SB(new String[] { "evel BV + SB","BV + SB" }), //
LEVEL_BV_BG(new String[] { "evel BV + BG","BV + BG" }), //
LEVEL_BG_SB(new String[] { "evel BG + SB","BG + SB" }), //
LEVEL_BV(new String[] { "evel BV", "BV" }), //
LEVEL_BG(new String[] { "evel BG","BG" }), //
LEVEL_SB(new String[] { "evel SB", "SB" }), //
LEVEL_A(new String[] { "level A" }), //
OTHER(new String[] {}), //
LEVEL_B(new String[] { "evel B" }), //
LEVEL_B_M_POWER(new String[] { "mPower" }), //
;
private String[] queryStrings;
private AppointmentType(String[] queryStrings) {
this.queryStrings = queryStrings;
}
/**
* @return the queryStrings
* @see #queryStrings
*/
public String[] getQueryStrings() {
return queryStrings;
}
}
package smash.appointment.parse;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
public class CellParser {
Logger logger = Logger.getLogger(CellParser.class);
private SubjectDao subjectDao;
Pattern timePattern = Pattern.compile("^[0-9][0-9]\\:[0-9][0-9]");
public String extractTime(String content) {
String result = null;
Matcher matcher = timePattern.matcher(content);
if (matcher.find()) {
result = matcher.group();
}
return result;
}
public String removeTime(String content) {
Matcher matcher = timePattern.matcher(content);
if (matcher.find()) {
content = matcher.replaceFirst("").trim();
}
return content;
}
public AppointmentEntry parseAppointment(String query, String defaultTime) {
AppointmentEntry result = new AppointmentEntry();
String time = extractTime(query);
if (time != null) {
query = removeTime(query);
} else {
time = defaultTime;
}
result.setTime(time);
Subject subject = extractSubject(query);
result.setSubject(subject);
AppointmentType type = extractType(query);
if (type == null) {
type = AppointmentType.OTHER;
}
result.setType(type);
result.setSource(query);
return result;
}
private AppointmentType extractType(String query) {
String simplifiedQuery = Utils.simplifyString(query);
AppointmentType result = null;
String usedString = null;
for (AppointmentType type : AppointmentType.values()) {
boolean matchFound = false;
for (String string : type.getQueryStrings()) {
if (!matchFound) {
String simplifiedString = Utils.simplifyString(string);
if (simplifiedQuery.contains(simplifiedString)) {
matchFound = true;
if (result == null) {
result = type;
usedString = string;
} else {
if (string.contains(usedString)) {
result = type;
usedString = string;
} else if (usedString.contains(string)) {
//new one is a substring of old
} else { //if there is no substring then we might have a problem
AppointmentType newType = result;
if (usedString.length() < string.length()) {
result = type;
usedString = string;
}
logger.warn("More than one type possible for query: " + query + ". Type 1: " + result + ". Type 2: " + type + ". Choosing: " + newType);
}
}
}
}
}
}
return result;
}
private Subject extractSubject(String query) {
Subject result = null;
String simplifiedQuery = Utils.simplifyString(query);
SubjectIndexer[] mainIndices = new SubjectIndexer[] { //
new NameSurnameIndexer(), //
new SurnameNameIndexer(), //
new NdNumberIndexer(),//
};
result = getByIndices(query, simplifiedQuery, mainIndices);
if (result == null) {
SubjectIndexer[] secondaryIndices = new SubjectIndexer[] { //
new SurnameIndexer(), //
};
result = getByIndices(query, simplifiedQuery, secondaryIndices);
}
return result;
}
private Subject getByIndices(String query, String simplifiedQuery, SubjectIndexer[] mainIndices) {
Subject result = null;
for (Subject subject : subjectDao.getSubjects()) {
boolean matchFound = false;
for (SubjectIndexer indexer : mainIndices) {
if (!matchFound) {
if (indexer.match(subject, simplifiedQuery)) {
matchFound = true;
if (result == null) {
result = subject;
} else {
Subject newResult = result;
if (indexer.isBetter(subject, result)) {
newResult = subject;
}
logger.warn(
"More than one subject possible for query: " + query + ". Subject 1: " + result + ". Subject 2: " + subject + ". Choosing: " + newResult);
result = newResult;
}
}
}
}
}
return result;
}
/**
* @return the subjectDao
* @see #subjectDao
*/
public SubjectDao getSubjectDao() {
return subjectDao;
}
/**
* @param subjectDao
* the subjectDao to set
* @see #subjectDao
*/
public void setSubjectDao(SubjectDao subjectDao) {
this.subjectDao = subjectDao;
}
}
package smash.appointment.parse;
import java.util.List;
import org.apache.log4j.Logger;
public class Main {
private static Logger logger = Logger.getLogger(Main.class);
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.out.println("Usage: command <agenda.xlsx> <subjects.txt>");
} else {
SubjectDao subjectDao = new SubjectDao();
subjectDao.readFile(args[1]);
XlsxCalendarProcessor processor = new XlsxCalendarProcessor();
processor.setSubjectDao(subjectDao);
List<AppointmentEntry> entries = processor.processExcel(args[0]);
for (AppointmentEntry appointmentEntry : entries) {
logger.debug(appointmentEntry);
}
}
}
}
package smash.appointment.parse;
public class NameSurnameIndexer extends SubjectIndexer {
public String getIndexedString(Subject subject) {
return Utils.simplifyString(subject.getName() + subject.getSurname());
}
}
package smash.appointment.parse;
public class NdNumberIndexer extends SubjectIndexer {
public String getIndexedString(Subject subject) {
return Utils.simplifyString(subject.getNdNumber());
}
}
package smash.appointment.parse;
public class Subject {
private String name;
private String surname;
private String ndNumber;
private String screeningNumber;
public Subject(String name, String surname, String ndNumber, String screeningNumber) {
this.name = name;
this.surname = surname;
this.ndNumber = ndNumber;
this.screeningNumber = screeningNumber;
}
/**
* @return the name
* @see #name
*/
public String getName() {
return name;
}
/**
* @param name
* the name to set
* @see #name
*/
public void setName(String name) {
this.name = name;
}
/**
* @return the surname
* @see #surname
*/
public String getSurname() {
return surname;
}
/**
* @param surname
* the surname to set
* @see #surname
*/
public void setSurname(String surname) {
this.surname = surname;
}
/**
* @return the ndNumber
* @see #ndNumber
*/
public String getNdNumber() {
return ndNumber;
}
/**
* @param ndNumber
* the ndNumber to set
* @see #ndNumber
*/
public void setNdNumber(String ndNumber) {
this.ndNumber = ndNumber;
}
/**
* @return the screeningNumber
* @see #screeningNumber
*/
public String getScreeningNumber() {
return screeningNumber;
}
/**
* @param screeningNumber
* the screeningNumber to set
* @see #screeningNumber
*/
public void setScreeningNumber(String screeningNumber) {
this.screeningNumber = screeningNumber;
}
@Override
public String toString() {
return this.getName() + " " + this.getSurname() + " (" + this.getNdNumber() + "; " + this.getScreeningNumber() + ")";
}
}
package smash.appointment.parse;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class SubjectDao {
private List<Subject> subjects = new ArrayList<Subject>();
public void addSubject(Subject subject) {
subjects.add(subject);
}
public void readFile(String filename) throws IOException {
try (BufferedReader br = new BufferedReader(new FileReader(filename))) {
String line;
while ((line = br.readLine()) != null) {
String tmp[] = line.split("\t");
addSubject(new Subject(tmp[0], tmp[1], tmp[2], tmp[3]));
}
}
}
/**
* @return the subjects
* @see #subjects
*/
public List<Subject> getSubjects() {
return subjects;
}
/**
* @param subjects
* the subjects to set
* @see #subjects
*/
public void setSubjects(List<Subject> subjects) {
this.subjects = subjects;
}
}
package smash.appointment.parse;
import org.apache.log4j.Logger;
public abstract class SubjectIndexer {
Logger logger = Logger.getLogger(SubjectIndexer.class);
public abstract String getIndexedString(Subject subject);
public boolean match(Subject subject, String simplifiedQuery) {
String indexedString = getIndexedString(subject);
// logger.debug("Check: " + simplifiedQuery + " against: " + indexedString);
if (simplifiedQuery.startsWith(indexedString)) {
return true;
}
return false;
}
public boolean isBetter(Subject subject, Subject oldSubject) {
return getIndexedString(subject).length()>getIndexedString(oldSubject).length();
}
}
package smash.appointment.parse;
public class SurnameIndexer extends SubjectIndexer {
public String getIndexedString(Subject subject) {
return Utils.simplifyString(subject.getSurname());
}
}
package smash.appointment.parse;
public class SurnameNameIndexer extends SubjectIndexer {
public String getIndexedString(Subject subject) {
return Utils.simplifyString(subject.getSurname() + subject.getName());
}
}
package smash.appointment.parse;
public class Utils {
public static String simplifyString(String query) {
return query.replaceAll("[\\s\\-©]", "").toLowerCase();
}
}
package smash.appointment.parse;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.log4j.Logger;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
public class XlsxCalendarProcessor {
Logger logger = Logger.getLogger(XlsxCalendarProcessor.class);
private SubjectDao subjectDao;
public List<AppointmentEntry> processExcel(String filename) throws EncryptedDocumentException, InvalidFormatException, IOException, ParseException {
List<AppointmentEntry> result = new ArrayList<AppointmentEntry>();
InputStream inp = new FileInputStream(filename);
Workbook workbook = WorkbookFactory.create(inp);
Iterator<Sheet> sheetIter = workbook.sheetIterator();
while (sheetIter.hasNext()) {
Sheet sheet = sheetIter.next();
String name = sheet.getSheetName().trim();
if (name.contains("2017")) {
String monthName = name.split(" ")[0];
String monthNumber = parseMonth(monthName);
result.addAll(processSheet(sheet, "2017-" + monthNumber));
} else {
logger.debug("Skipping sheet: " + name);
}
}
return result;
}
int[] dayColumns = new int[] { 3, 4, 5, 6, 7 };
int[] weekStartRows = new int[] { 5, 23, 41, 60, 78, 96 };
int hourColum = 0;
int dayOfMonthRowOffset = 0;
int calendarRowStartOffset = 3;
int calendarRowEndOffset = 18;
private List<AppointmentEntry> processSheet(Sheet sheet, String string) {
List<AppointmentEntry> result = new ArrayList<AppointmentEntry>();
CellParser parser = new CellParser();
parser.setSubjectDao(subjectDao);
for (int weekOffset : weekStartRows) {
Row weekRow = sheet.getRow(weekOffset + dayOfMonthRowOffset);
for (int dayColumnOffset : dayColumns) {
Cell dayCell = weekRow.getCell(dayColumnOffset);
String dayOfMonth = ((int) dayCell.getNumericCellValue()) + "";
if (dayOfMonth.length() == 1) {
dayOfMonth = "0" + dayOfMonth;
}
if (!dayOfMonth.equals("00")) {
String day = string + "-" + dayOfMonth;
String hour = "08:00";
for (int hourOffset = calendarRowStartOffset; hourOffset < calendarRowEndOffset; hourOffset++) {
Row hourRow = sheet.getRow(weekOffset + hourOffset);
Cell hourCell = hourRow.getCell(hourColum);
if (hourCell.getCellTypeEnum().equals(CellType.NUMERIC)) {
SimpleDateFormat formatTime = new SimpleDateFormat("HH:mm");
String hourString = formatTime.format(hourCell.getDateCellValue());
if (isHour(hourString)) {
hour = hourString;
}
}
String query = hourRow.getCell(dayColumnOffset).getStringCellValue();
if (query != null && !query.isEmpty()) {
AppointmentEntry entry = parser.parseAppointment(query, hour);
entry.setDay(day);
result.add(entry);
}
}
}
}
}
return result;
}
Pattern timePattern = Pattern.compile("^[0-9][0-9]\\:[0-9][0-9]");
private boolean isHour(String hourString) {
Matcher matcher = timePattern.matcher(hourString);
return matcher.find();
}
private String parseMonth(String monthName) throws ParseException {
Date date = new SimpleDateFormat("MMMM", Locale.ENGLISH).parse(monthName);
Calendar cal = Calendar.getInstance();
cal.setTime(date);
String result = (cal.get(Calendar.MONTH) + 1) + "";
if (result.length() == 1) {
result = "0" + result;
}
return result;
}
/**
* @return the subjectDao
* @see #subjectDao
*/
public SubjectDao getSubjectDao() {
return subjectDao;
}
/**
* @param subjectDao
* the subjectDao to set
* @see #subjectDao
*/
public void setSubjectDao(SubjectDao subjectDao) {
this.subjectDao = subjectDao;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment