Skip to content
Snippets Groups Projects
Commit f034cc45 authored by Winnie's avatar Winnie
Browse files

- Improve parsing. Now uses 100% Regex Matching

- Clean up functions. Created new ones in the process.
parent 492b47a0
No related branches found
No related tags found
No related merge requests found
......@@ -12,9 +12,8 @@ import java.util.regex.Pattern;
*
* Notes:
* - Processes one row at a time
* - Press enter for the next row.
* - Parsed data is in string format
* - Data will be converted to the appropriate type in a toADT() function
* - TOO MANY FUNCTION CALLS?
*/
public class FileProcessor {
private static String path = "src/occurrence.csv";
......@@ -36,8 +35,9 @@ public class FileProcessor {
}
/**
* Initialize Processing.
* Reads file at path.
* Calls parse() automatically
* Calls parse() for each line
*/
private static void initProcessing() {
FileReader fr;
......@@ -52,7 +52,7 @@ public class FileProcessor {
br.readLine(); // Reads Past Field Names
while ((currentLine = br.readLine()) != null) {
System.out.println(currentLine);
//System.out.println(currentLine); //Testing ONLY for checking one line at a time
parse(currentLine);
s.nextLine(); //Testing ONLY for checking one line at a time
}
......@@ -67,80 +67,87 @@ public class FileProcessor {
}
/**
* Parses out data from string
* Parses data from string
* Calls createObject on successful regex matching
*
* @param currentLine, a line/row of data
* @throws IOException
*/
// TODO: Fix spahgetti code where taxonId needs to be converted to a string when it is received from biotree.
private static void parse(String currentLine) throws IOException {
String[] splitLine = currentLine.split(",");
String eventId = null, occurId, taxonId = null, individualCount, latitude, longitude, year=null, month= null, day=null;
occurId = splitLine[3];
individualCount = splitLine[4];
Pattern patternEventId = Pattern.compile("OP_ID (\\d+)");
Matcher matchEventId = patternEventId.matcher(splitLine[7]);
/* Regex Pattern Grouping Guide
* Retrieve String Groups with: matchEventId.group(x):
* group 0: full matched string
* group 1: occurrenceId
* group 2: individualCount
* group 3: eventId
* group 4: year
* group 5: month
* group 6: date
* group 7: latitude
* group 8: longitude
* group 9: taxonId
* group 10: Scientific Name
*/
Pattern patternEventId = Pattern.compile("([^,]+)?,([^,]+)?,[^,]+,[^,]+,[^,]+OP_ID (\\d+)?,(\\d+)?-(\\d+)?-(\\d+)?,(\\d+.\\d+)?,(-\\d+.\\d+)?,[^,\\d]+(\\d+)?,([^,]+)?");
Matcher matchEventId = patternEventId.matcher(currentLine);
if(matchEventId.find()) {
eventId = matchEventId.group(1);
}
else {
// TODO: Throw Exception?
System.out.println("Could not parse eventId. String may be unique or missing.");
}
Pattern patternDate = Pattern.compile("(\\d+)-(\\d+)-(\\d+)");
Matcher matchDate = patternDate.matcher(splitLine[8]);
if(matchDate.find()) {
year = matchDate.group(1);
month = matchDate.group(2);
day = matchDate.group(3);
createObjects(matchEventId);
}
else {
// TODO: Throw Exception?
System.out.println("Could not parse eventId. String may be unique or missing.");
}
latitude = splitLine[9];
longitude = splitLine[10];
Pattern patternTaxId = Pattern.compile(":(\\d+)");
Matcher matchTaxId = patternTaxId.matcher(splitLine[11]);
if(matchTaxId.find()) {
taxonId = matchTaxId.group(1);
}
else {
// TODO: Throw Exception?
System.out.println("Could not parse TaxId. String may be unique or missing.");
}
System.out.println("Regex Matching Failed.");
}
// Testing ONLY Print lines
System.out.println("Full String:" + matchEventId.group(0));
System.out.println("Occurence Id:" + matchEventId.group(1));
System.out.println("Ind. Count:" + matchEventId.group(2));
System.out.println("event Id:" + matchEventId.group(3));
System.out.println("Year:" + matchEventId.group(4));
System.out.println("Month:" + matchEventId.group(5));
System.out.println("Day:" + matchEventId.group(6));
System.out.println("lat:" + matchEventId.group(7));
System.out.println("long:" + matchEventId.group(8));
System.out.println("tax Id:" + matchEventId.group(9));
System.out.println("Scientific Name:" + matchEventId.group(10));
}
/**
* Calls BioTree's processRecord and another method to create a Record
*
* @param matchEventId
*/
public static void createObjects(Matcher matchEventId) {
// Call BioTree
if(taxonId != null) {
BioTree.processRecord(Integer.parseInt(taxonId));
if(matchEventId.group(9) != null) {
BioTree.processRecord(Integer.parseInt(matchEventId.group(9)));
// Create Record Object
createRecord(Integer.parseInt(matchEventId.group(3)), matchEventId.group(1), Integer.parseInt(matchEventId.group(9)), Integer.parseInt(matchEventId.group(2)), Float.parseFloat(matchEventId.group(7)), Float.parseFloat(matchEventId.group(8)), Integer.parseInt(matchEventId.group(4)), Integer.parseInt(matchEventId.group(5)), Integer.parseInt(matchEventId.group(6)));
}
else if (splitLine[12] != "NA") {
else if (matchEventId.group(10) != "NA") {
try{
taxonId = Integer.toString(BioTree.processRecord(splitLine[12]));
createRecord(Integer.parseInt(matchEventId.group(3)), matchEventId.group(1), BioTree.processRecord(matchEventId.group(10)), Integer.parseInt(matchEventId.group(2)), Float.parseFloat(matchEventId.group(7)), Float.parseFloat(matchEventId.group(8)), Integer.parseInt(matchEventId.group(4)), Integer.parseInt(matchEventId.group(5)), Integer.parseInt(matchEventId.group(6)));
} catch(IOException e) {
System.out.println("No Taxon ID or Scientific Name");
System.out.println("No Taxon ID or Scientific Name. OR createRecord Error");
}
}
// Create Record Object
createRecord(Integer.parseInt(eventId), occurId, Integer.parseInt(taxonId), Integer.parseInt(individualCount), Float.parseFloat(latitude), Float.parseFloat(longitude), Integer.parseInt(year), Integer.parseInt(month), Integer.parseInt(day));
// Testing ONLY Print lines
// System.out.println("Occurence Id:" + occurId);
// System.out.println("Ind. Count:" + individualCount);
// System.out.println("event Id:" + eventId);
// System.out.println("Year:" + year);
// System.out.println("Month:" + month);
// System.out.println("Day:" + day);
// System.out.println("lat:" + latitude);
// System.out.println("long:" + longitude);
// System.out.println("tax Id:" + taxonId);
}
/**
* Create a Record Object
*
* @param eventId
* @param occurId
* @param taxonId
* @param individualCount
* @param latitude
* @param longitude
* @param year
* @param month
* @param day
* @return
*/
public static Record createRecord(int eventId, String occurId, int taxonId, int individualCount, float latitude, float longitude, int year, int month, int day) {
return new Record(eventId, occurId, taxonId, individualCount, latitude, longitude, year, month, day);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment