Something went wrong on our end
-
Christopher Schankula authoredChristopher Schankula authored
Code owners
Assign users and groups as approvers for specific file changes. Learn more.
BioTree.java 11.67 KiB
package data;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import org.json.simple.parser.ParseException;
import search.BST;
import sort.KDT;
public class BioTree implements Serializable {
/**
*
*/
private static final long serialVersionUID = 4291273291916906661L;
//FIXME: replace with a single kd-tree
private static BST<Integer, TaxonNode> idNodes = new BST<Integer, TaxonNode>();
private static BST<String, TaxonNode> strNodes = new BST<String, TaxonNode>();
private static BST<String, Integer> incorrectNames = new BST<String, Integer>();
public static void main(String[] args) throws IOException, ParseException {
BioTree.init("data/biotree");
System.out.println(idNodes.get(2));
Iterable<Integer> children = getNonEmptyChildren(159512);
for (Integer i: children)
System.out.println(i);
}
/**
* Initialize species abstract object
*/
public static void init() {
idNodes = new BST<Integer, TaxonNode>();
strNodes = new BST<String, TaxonNode>();
incorrectNames = new BST<String, Integer>();
}
/**
* Reads the BioTree from a file written by write().
* TODO: Implement
*
* @param fn
* Filename to read from
* @return
*/
public static void init(String fn) {
BST<Integer, TaxonNode> idNodes = null;
try {
FileInputStream fileIn = new FileInputStream(fn+"/idnodes.ser");
ObjectInputStream in = new ObjectInputStream(fileIn);
idNodes = (BST<Integer, TaxonNode>) in.readObject();
in.close();
fileIn.close();
} catch (IOException i) {
i.printStackTrace();
} catch (ClassNotFoundException c) {
System.out.println("Employee class not found");
c.printStackTrace();
}
BioTree.idNodes = idNodes;
BST<String, TaxonNode> strNodes = null;
try {
FileInputStream fileIn = new FileInputStream(fn+"/strNodes.ser");
ObjectInputStream in = new ObjectInputStream(fileIn);
strNodes = (BST<String, TaxonNode>) in.readObject();
in.close();
fileIn.close();
} catch (IOException i) {
i.printStackTrace();
} catch (ClassNotFoundException c) {
System.out.println("Employee class not found");
c.printStackTrace();
}
BioTree.strNodes = strNodes;
BST<String, Integer> incorrectNames = null;
try {
FileInputStream fileIn = new FileInputStream(fn+"/incorNames.ser");
ObjectInputStream in = new ObjectInputStream(fileIn);
incorrectNames = (BST<String, Integer>) in.readObject();
in.close();
fileIn.close();
} catch (IOException i) {
i.printStackTrace();
} catch (ClassNotFoundException c) {
System.out.println("Employee class not found");
c.printStackTrace();
}
BioTree.incorrectNames = incorrectNames;
}
/**
* Writes the BioTree BST to a file.
* TODO: Implement
*
* @param dir
* Filename to write to
*/
public static void write(String dir) {
//https://examples.javacodegeeks.com/core-java/io/file/check-if-directory-exists/
File d = new File(dir);
if (!d.exists())
d.mkdirs();
try {
FileOutputStream fileOut =
new FileOutputStream(dir+"/idNodes.ser");
ObjectOutputStream out = new ObjectOutputStream(fileOut);
out.writeObject(BioTree.idNodes);
out.close();
fileOut.close();
System.out.printf("Serialized data is saved in /tmp/kdtree.ser");
} catch (IOException i) {
i.printStackTrace();
}
try {
FileOutputStream fileOut =
new FileOutputStream(dir+"/strNodes.ser");
ObjectOutputStream out = new ObjectOutputStream(fileOut);
out.writeObject(BioTree.strNodes);
out.close();
fileOut.close();
System.out.printf("Serialized data is saved in /tmp/kdtree.ser");
} catch (IOException i) {
i.printStackTrace();
}
try {
FileOutputStream fileOut =
new FileOutputStream(dir+"/incorNames.ser");
ObjectOutputStream out = new ObjectOutputStream(fileOut);
out.writeObject(BioTree.incorrectNames);
out.close();
fileOut.close();
System.out.printf("Serialized data is saved in /tmp/kdtree.ser");
} catch (IOException i) {
i.printStackTrace();
}
}
/**
* Process a record. Adds classification to tree if it doesn't exist.
* Returns the taxonId of the new / existing record.
*
* @param taxonId The taxonId of the possible new entry
* @return taxonId of new species entry
*/
public static Integer processRecord(int taxonId) {
//pass taxonId directly to function to add / increment it
if (processTaxonId(taxonId)) return null;
return taxonId;
}
/**
* Process a record. Adds classification to tree if it doesn't exist.
* Returns the taxonId of the new / existing record.
*
* @param scientificName The scientific name of the possible new entry
* @return taxonId of new / existing entry
* @throws IOException
* @throws ParseException
*/
public static Integer processRecord(String scientificName) throws IOException, ParseException {
//reverse lookup based on name, try adding the found taxonId.
Integer taxonId = nameToTaxonId(scientificName);
System.out.println(scientificName + ": " + taxonId);
if (taxonId == null) return null;
if (taxonId == -1) return null;
if (processTaxonId(taxonId)) return null;
return taxonId;
}
/**
* Process a new entry if it doesn't exist. If it does exist, increment the number
* of Records for this classification by one.
* @param taxonId New / existing TaxonID to add / increment count thereof.
* @return true if the process failed, false if nothing went wrong
*/
private static boolean processTaxonId(int taxonId) {
TaxonNode[] newNodes = null; //possible eventual new nodes
TaxonNode tx = idNodes.get(taxonId); //search tree to see if the node exists already
System.out.println("tx" + tx);
if (tx != null) { //if it does exist, increment its count
tx.incCount();
}
else { //otherwise, perform API call to get tree
try {
newNodes = WormsAPI.idToClassification(taxonId);
} catch (IOException | ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (newNodes == null) return true;
newNodes[newNodes.length - 1].incCount(); //one of the new nodes exists
for (int i = newNodes.length - 1; i >= 0; i--) { //iterate over all node starting from lowest child
tx = newNodes[i];
TaxonNode current = idNodes.get(tx.getTaxonId());
TaxonNode parent = null;
if (i > 0) { //if this is not the highest up find its parent
parent = idNodes.get(newNodes[i - 1].getTaxonId()); //the parent is either already in existence
if (parent == null) parent = newNodes[i - 1]; //or is is the old one that will be added later
}
if (current == null) { //if this node is not found, add it
System.out.println("Put: " + tx.getTaxonId());
idNodes.put(tx.getTaxonId(), tx); //put it in the search structure
strNodes.put(tx.getName(), tx);
tx.setParent(parent); //set its parent to the last
if (parent != null) parent.addChild(tx); //if a parent exists, add it as a child to its parent
} else
//stop loop if this node already exists in the tree (all its parents must exist too!)
break;
}
}
return false;
}
/**
* Get the species at a given index (taxonId). This assumes that the
* node already exists or else it will return null. As such, it is best
* to use this function once all the data has been parsed and the BioTree
* has been built.
*
* @param i
* The speciesid (index) of the species.
* @return The Species object.
*/
public static TaxonNode getTaxonRecord(int taxonId) {
return idNodes.get(taxonId);
}
/**
* Get the species at a given index (taxonId). This assumes that the
* node already exists or else it will return null. As such, it is best
* to use this function once all the data has been parsed and the BioTree
* has been built.
*
* @param i
* The speciesid (index) of the species.
* @return The Species object.
* @throws ParseException
* @throws IOException
*/
public static TaxonNode getTaxonRecord(String scientificName) throws IOException, ParseException {
return idNodes.get(nameToTaxonId(scientificName));
}
/**
* Get the TaxonNode containing information about the given scientific name.
* This assumes that thenode already exists locally or else it will return null.
* As such, it is best to use this function once all the data has been parsed
* and the BioTree has been built.
*
* @param scientificName
* The scientific name of the taxon.
* @return The Species object.
* @throws ParseException
* @throws IOException
*/
public static Integer nameToTaxonId(String scientificName) throws IOException, ParseException {
Integer taxonId;
//look up based on string literal, return if found
TaxonNode tx = strNodes.get(scientificName);
if (tx != null) return tx.getTaxonId();
else System.out.println(scientificName + " not in local db");
//look up in local incorrect names database, return if it exists
taxonId = incorrectNames.get(scientificName);
if (taxonId != null) {
tx = idNodes.get(taxonId);
if (tx != null) return tx.getTaxonId();
} else { //otherwise use Worms to look it up
System.out.println(scientificName + " not in incor db");
taxonId = WormsAPI.nameToRecordID(scientificName);
if (taxonId == null) //if nothing is found, mark this species as not existing.
incorrectNames.put(scientificName, -1);
else {
System.out.println(scientificName + " found in Worms: " + taxonId);
incorrectNames.put(scientificName, taxonId);
}
}
return taxonId;
}
public static Iterable<Integer> getNonEmptyChildren(int taxonId){
ArrayList<Integer> result = new ArrayList<Integer>();
getAllChildren(idNodes.get(taxonId), result, false);
return result;
}
public static Iterable<Integer> getAllChildren(int taxonId){
ArrayList<Integer> result = new ArrayList<Integer>();
getAllChildren(idNodes.get(taxonId), result, true);
return result;
}
private static void getAllChildren(TaxonNode txNode, ArrayList<Integer> result, boolean emptyAllowed) {
if (txNode == null) return;
if ((txNode.getCount() > 0) || emptyAllowed) result.add(txNode.getTaxonId());
for (TaxonNode tx: txNode.getChildren()) {
getAllChildren(tx, result, emptyAllowed);
}
}
public static void printTree() {
printTree(idNodes.get(2), 0);
}
public static void printTree(int taxonId) {
TaxonNode txNode = idNodes.get(taxonId);
if (txNode == null) return;
printTree(txNode, 0);
}
public static void printTree(String scientificName) {
TaxonNode txNode = strNodes.get(scientificName);
if (txNode == null)
try {
int taxonId = WormsAPI.nameToRecordID(scientificName);
txNode = idNodes.get(taxonId);
} catch (IOException | ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (txNode == null) return;
printTree(txNode, 0);
}
/**
* Print a taxonNode's tree starting at the supplied root.
* @param tx
* @param level
*/
private static void printTree(TaxonNode tx, int level) {
String padd = new String(new char[level * 4]).replace('\0', ' ');
System.out.format(padd + "%s %d\n", tx.getName(), tx.getCount());
for (TaxonNode tx2: tx.getChildren())
printTree(tx2, level + 1);
}
}