Functional/Stream programming for the graph problem "Reconstruct Itinerary" - algorithm

I am trying to solve the reconstruct itinerary problem (https://leetcode.com/problems/reconstruct-itinerary/) in Scala using functional approach. Java solution works but Scala doesn't. One reason I found out was the hashmap is being updated and every iteration has the latest hashmap (even when popping from recursion) which is weird.
Here is the solution in Java:
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
public class Solution1 {
private void dfg(Map<String, PriorityQueue<String>> adj, LinkedList<String> result, String vertex){
PriorityQueue<String> pq = adj.get(vertex);
while (pq!=null && !pq.isEmpty()){
System.out.println("Before :"+adj.get(vertex));
String v = pq.poll();
System.out.println("After :"+ adj.get(vertex));
dfg(adj,result,v);
}
result.addFirst(vertex);
}
public List<String> findItinerary(List<List<String>> tickets){
Map<String,PriorityQueue<String>> adj = new HashMap<>();
for(List<String> ticket: tickets){
adj.putIfAbsent(ticket.get(0),new PriorityQueue<>());
adj.get(ticket.get(0)).add(ticket.get(1));
}
LinkedList<String> result = new LinkedList<>();
dfg(adj,result,"JFK");
//not reverse yet
return result;
}
public static void main(String[] args){
List<List<String>> tickets = new ArrayList<>();
List t1= new ArrayList();
t1.add("JFK");
t1.add("SFO");
tickets.add(t1);
List t2= new ArrayList();
t2.add("JFK");
t2.add("ATL");
tickets.add(t2);
List t3= new ArrayList();
t3.add("SFO");
t3.add("ATL");
tickets.add(t3);
List t4= new ArrayList();
t4.add("ATL");
t4.add("JFK");
tickets.add(t4);
List t5= new ArrayList();
t5.add("ATL");
t5.add("SFO");
tickets.add(t5);
System.out.println();
Solution1 s1 = new Solution1();
List<String> finalRes = s1.findItinerary(tickets);
for(String model : finalRes) {
System.out.print(model + " ");
}
}
}
Here is my solution in Scala which is not working:
package graph
class Itinerary {
}
case class Step(g: Map[String,List[String]],sort: List[String]=List())
object Solution {
def main(arr: Array[String]) = {
val tickets = List(List("JFK","SFO"),List("JFK","ATL"),List("SFO","ATL"),List("ATL","JFK"),List("ATL","SFO"))
println(findItinerary(tickets))
}
def findItinerary(tickets: List[List[String]]): List[String] = {
val g = tickets.foldLeft(Map[String,List[String]]())((m,t)=>{
val key=t(0)
val value= t(1)
m + (key->(m.getOrElse(key,Nil) :+ value).sorted)
})
println(g)
// g.keys.foldLeft(Step())((s,n)=> dfs(n,g,s)).sort.toList
dfs("JFK",Step(g)).sort.toList
}
def dfs(vertex: String,step: Step): Step = {
println("Input vertex " + vertex)
println("Input map "+ step.g)
val updatedStep= step.g.getOrElse(vertex,Nil).foldLeft(step) ((s,n)=>{
//println("Processing "+n+" of vertex "+vertex)
//delete link
val newG = step.g + (vertex->step.g.getOrElse(vertex,Nil).filter(v=>v!=n))
// println(newG)
dfs(n,step.copy(g=newG))
})
println("adding vertex to result "+vertex)
updatedStep.copy(sort = updatedStep.sort:+vertex)
}
}

Scala is sometimes approached as a "better" Java, but that's really very limiting. If you can get into the FP mindset, and study the Standard Library, you'll find that it's a whole new world.
def findItinerary(tickets: List[List[String]]): List[String] = {
def loop(from : String
,jump : Map[String,List[String]]
,acc : List[String]) : List[String] = jump.get(from) match {
case None => if (jump.isEmpty) from::acc else Nil
case Some(next::Nil) => loop(next, jump - from, from::acc)
case Some(nLst) =>
nLst.view.map{ next =>
loop(next, jump+(from->(nLst diff next::Nil)), from::acc)
}.find(_.lengthIs > 0).getOrElse(Nil)
}
loop("JFK"
,tickets.groupMap(_(0))(_(1)).map(kv => kv._1 -> kv._2.sorted)
,Nil).reverse
}

I am going to be honest that I didn't look through your code to see where the problem was. But, I got caught by the problem and decided to give it a go; here is the code:
(hope my code helps you)
type Airport = String // Refined 3 upper case letters.
final case class AirlineTiket(from: Airport, to: Airport)
object ReconstructItinerary {
// I am using cats NonEmptyList to improve type safety, but you can easily remove it from the code.
private final case class State(
currentAirport: Airport,
availableDestinations: Map[Airport, NonEmptyList[Airport]],
solution: List[Airport]
)
def apply(tickets: List[AirlineTiket])(start: Airport): Option[List[Airport]] = {
#annotation.tailrec
def loop(currentState: State, checkpoints: List[State]): Option[List[Airport]] = {
if (currentState.availableDestinations.isEmpty) {
// We used all the tickets, so we can return this solution.
Some((currentState.currentAirport :: currentState.solution).reverse)
} else {
val State(currentAirport, availableDestinations, solution) = currentState
availableDestinations.get(currentAirport) match {
case None =>
// We got into nowhere, lets see if we can return to a previous state...
checkpoints match {
case checkpoint :: remaining =>
// If we can return from there
loop(currentState = checkpoint, checkpoints = remaining)
case Nil =>
// If we can't, then we can say that there is no solution.
None
}
case Some(NonEmptyList(destination, Nil)) =>
// If from the current airport we can only travel to one destination, we will just follow that.
loop(
currentState = State(
currentAirport = destination,
availableDestinations - currentAirport,
currentAirport :: solution
),
checkpoints
)
case Some(NonEmptyList(destination, destinations # head :: tail)) =>
// If we can travel to more than one destination, we are going to try all in order.
val newCheckpoints = destinations.map { altDestination =>
val newDestinations = NonEmptyList(head = destination, tail = destinations.filterNot(_ == altDestination))
State(
currentAirport = altDestination,
availableDestinations.updated(key = currentAirport, value = newDestinations),
currentAirport :: solution
)
}
loop(
currentState = State(
currentAirport = destination,
availableDestinations.updated(key = currentAirport, value = NonEmptyList(head, tail)),
currentAirport :: solution
),
newCheckpoints ::: checkpoints
)
}
}
}
val availableDestinations = tickets.groupByNel(_.from).view.mapValues(_.map(_.to).sorted).toMap
loop(
currentState = State(
currentAirport = start,
availableDestinations,
solution = List.empty
),
checkpoints = List.empty
)
}
}
You can see the code running here.

Related

Using the filter function in kotlin

so the past couple of hours, i have been trying to understand how the filter function works in kotlin and if it has any correlation with that of Java.
basically, i have a code that's written in java and i would love to have it transcribed to kotlin
private List<Order> getFilteredOrders(Courier courier) {
String[] glovoBoxKeywords = glovoBoxWords.toLowerCase().split(",");
List<Vehicle> allowedVehicles = Arrays.asList(MOTORCYCLE, ELECTRIC_SCOOTER);
return orders.stream()
.filter(order -> {
String description = order.getDescription().toLowerCase();
if (!courier.getBox()) {
return Arrays.stream(glovoBoxKeywords).noneMatch(description::contains);
}
return true;
})
.filter(order -> {
Location pickupLocation = order.getPickup();
Location deliveryLocation = order.getDelivery();
Double distance = calculateDistance(pickupLocation, deliveryLocation);
if (distance > longDeliveryDistance) {
return allowedVehicles.contains(courier.getVehicle());
}
return true;
})
.collect(Collectors.toList());
}
i tried this but i got at this, and was literally stuck :(
private fun findFilteredOrder(courier: Courier) : List<Order> {
val glovoBoxKeyWords = glovoBoxWords.toLowerCase().split(",")
val allowedVehicles = listOf(Vehicle.ELECTRIC_SCOOTER, Vehicle.MOTORCYCLE)
orderList.filter { order ->
val description = order.getDescription().toLowerCase()
if(!courier.getBox()) {
}
true
}.filter {
val pickupLocation = it.getPickup()
val deliveryLocation = it.getDelivery()
val distance = calculateDistance(deliveryLocation, pickupLocation)
if(distance > longDeliveryDistance) {
courier.getVehicle() in allowedVehicles
}
true
}
}
Please this is my first attempt and doing something with kotlin, so please go easy guys. thanks, also i'd be appreciative if anyone could help me with informative stuff as to how to understand these kotlin functions better. let, apply, associateBy... etc.. THANKS
The filter function in Kotlin Collections has the same principle as other frameworks/libraries, including Java Streams. Given a predicate (a function from the type of the collection to Boolean) it will return a new collection with the elements matching the predicate. You can find more information and examples of other functions and operators in the official documentation and here.
Your code was almost there, I translate the Java Stream operation to Kotlin List and rewrite the return statements to remove the redundant if
private fun findFilteredOrder(courier: Courier) : List<Order> {
val glovoBoxKeyWords = glovoBoxWords.toLowerCase().split(",")
val allowedVehicles = listOf(Vehicle.ELECTRIC_SCOOTER, Vehicle.MOTORCYCLE)
orderList.filter { order ->
val description = order.getDescription().toLowerCase()
courier.getBox() || glovoBoxKeywords.none { it in description }
}.filter { order ->
val pickupLocation = order.getPickup()
val deliveryLocation = order.getDelivery()
val distance = calculateDistance(deliveryLocation, pickupLocation)
distance <= longDeliveryDistance || courier.getVehicle() in allowedVehicles
}
}
I don't know why no one mentioned the use of labels: https://kotlinlang.org/docs/returns.html#break-and-continue-labels.
Since this question has a nice google ranking, I'll add what I was originally searching for.
The OP probably was aware that filter needs a predicate that returns a Boolean and that the filter will return a list with the items that pass the predicate (the items which the predicate returned true).
What he was not aware is that we can "emulate" Java returns through Kotlin labels:
private fun findFilteredOrder(courier: Courier) : List<Order> {
val glovoBoxKeyWords = glovoBoxWords.toLowerCase().split(",")
val allowedVehicles = listOf(Vehicle.ELECTRIC_SCOOTER, Vehicle.MOTORCYCLE)
orderList.filter shouldSkip#{ order ->
val description = order.getDescription().toLowerCase()
if (courier.getBox()) {
return#shouldSkip true
}
if (glovoBoxKeywords.none { it in description }) {
return#shouldSkip true
}
return#shouldSkip false
}.filter shouldSkip# { order ->
val pickupLocation = order.getPickup()
val deliveryLocation = order.getDelivery()
val distance = calculateDistance(deliveryLocation, pickupLocation)
if (distance <= longDeliveryDistance) {
return#shouldSkip true
}
if (courier.getVehicle() in allowedVehicles) {
return#shouldSkip true
}
return#shouldSkip false
}
}
Since Kotlin allows us to return in the last block line and the return keyword returns to the outer scope, it is pretty easy to:
filter {
startPutting >= someMagic && andComplex ||
verificationsThat.is { hardToUnderstand }.because {
weNeedToReturnHere
}
}
The labels allow us to be more verbose but also more clear.

List as an object field - how to handle it via Stream (Java 8)?

There is a Waybill object that has a Set<Packing> field, the Packing object has a PRICE field.
I get a List<Waybill>.
Need to calculate the total cost of all Packing from the entire List<Waybill>.
How it competently to make through Stream?
Thank you.
class Waybill {
Set<Packing> setOfPacking;
}
class Packing {
int PRICE;
}
List<Waybill> allWaybills = ...
This worked for me:
double total = allWaybills.stream()
.flatMap(waybill -> waybill.setOfPacking.stream())
.mapToInt(packing -> packing.PRICE)
.sum();
I think it is easier to reason about because there aren't any multi-level stream operations.
I would be interested to see how to use flatMapToInt to replace both the flatMap and map operations with one operation without making it multi-level.
Here is a test program:
import java.util.Set;
import java.util.List;
import java.util.HashSet;
import java.util.ArrayList;
import java.util.stream.Collectors;
public class HelloWorld
{
public static class Packing
{
public int PRICE = 0;
}
public static class Waybill
{
public Set<Packing> setOfPacking = new HashSet<Packing>();
}
public static void main(String []args){
List<Waybill> allWaybills = new ArrayList<Waybill>();
Waybill w1 = new Waybill();
Packing p1 = new Packing(); p1.PRICE = 1; w1.setOfPacking.add(p1);
Packing p2 = new Packing(); p2.PRICE = 2; w1.setOfPacking.add(p2);
allWaybills.add(w1);
Waybill w2 = new Waybill();
Packing p3 = new Packing(); p3.PRICE = 3; w2.setOfPacking.add(p3);
Packing p4 = new Packing(); p4.PRICE = 4; w2.setOfPacking.add(p4);
allWaybills.add(w2);
double total = allWaybills.stream()
.flatMap(waybill -> waybill.setOfPacking.stream())
.mapToInt(packing -> packing.PRICE)
.sum();
System.out.println("total = "+total);
}
}
import java.util.stream.*
List<Waybill> allWaybills = ...
int totalCost = allWaybills
.stream()
.mapToInt(w -> w.setOfPacking
.stream()
.mapToInt(p -> p.PRICE)
.sum()
)
.sum();

OrientDB edge creation latency

I am using Tinkerpop Blueprints to create a OrientDB graph for a dataset with millions of nodes and 100M edges on a laptop with 16gb memory, 64-bit Ubuntu, 64-bit jvm.
Here are the results from our benchmarking – the vertices get added fine but each edge addition takes almost a second.
Edge Creation Latency
Can you suggest what we are not doing right wrt to the edge creation and how we can get it to improve to a more reasonable latency?
Here's the code associated with the above table:
package orientdbtest
import com.orientechnologies.orient.core.metadata.schema.OType
import com.tinkerpop.blueprints.{Direction, Edge, Vertex}
import com.tinkerpop.blueprints.impls.orient._
import com.orientechnologies.orient.core.intent.OIntentMassiveInsert
object Example {
def addRandomVertex(graph: OrientGraphNoTx, uuid: String) : Vertex = graph.addVertex("class:Random", "uuid", uuid)
def addRandomEdge(source: Vertex, target: Vertex, id: String) = graph.addEdge(null, source, target, id)
def createRandomNetworkDatabase(graph: OrientGraphNoTx, numNodes: Int, numEdges: Int, useLightWeightEdges: Boolean):(Long, Long) = {
if (graph.getVertexType("Random") == null) {
println("Creating random type")
val random_vertex_type: OrientVertexType = graph.createVertexType("Random")
random_vertex_type.createProperty("id", OType.STRING)
} else {
println("Random type exists")
}
val timeStartNodeCreation = System.currentTimeMillis()
val nodeList: List[Vertex] = Range(0,numNodes).toList.map(x => addRandomVertex(graph, x.toString()))
val timeEndNodeCreation = System.currentTimeMillis()
println("Time to create " + numNodes + " is " + (timeEndNodeCreation-timeStartNodeCreation))
val nodeListFirstHalf = nodeList.slice(0,nodeList.length/2)
val nodeListSecondHalf = nodeList.slice(nodeList.length/2+1,nodeList.length)
var edgeID = 1
if(useLightWeightEdges) graph.setUseLightweightEdges(true)
val timeStartEdgeCreation = System.currentTimeMillis()
// createEdges from first half to the second half
nodeListFirstHalf.foreach(sourceVertex => {
nodeListSecondHalf.foreach(targetVertex => {
while(edgeID < numEdges)
{
addRandomEdge(sourceVertex, targetVertex, edgeID.toString())
edgeID = edgeID +1
graph.commit()
}
})
})
val timeEndEdgeCreation = System.currentTimeMillis()
println("Time to create " + edgeID + " is " + (timeEndEdgeCreation-timeStartEdgeCreation))
(0L, 0L)
}
def main(args: Array[String]): Unit = {
val numNodes = 10
val numEdges = 25
val useLightWeightEdges = false
val uri: String = "plocal:target/database/random_sample_" + numNodes + "_" + numEdges + useLightWeightEdges.toString()
val graph: OrientGraphNoTx = new OrientGraphNoTx(uri)
graph.setKeepInMemoryReferences(false);
graph.getRawGraph().getLocalCache().setEnable(false)
graph.declareIntent(new OIntentMassiveInsert())
try {
createRandomNetworkDatabase(graph, numNodes, numEdges, useLightWeightEdges)
graph.declareIntent(null)
} finally {
graph.shutdown()
}
println("Adios")
}
}

How to add tags to a parsed tree that has no tag?

For example, the parsing tree from Stanford Sentiment Treebank
"(2 (2 (2 near) (2 (2 the) (2 end))) (3 (3 (2 takes) (2 (2 on) (2 (2 a) (2 (2 whole) (2 (2 other) (2 meaning)))))) (2 .)))",
where the number is the sentiment label of each node.
I want to add POS tagging information to each node. Such as:
"(NP (ADJP (IN near)) (DT the) (NN end)) "
I have tried to directly parse the sentence, but the resulted tree is different from that in the Sentiment Treebank (may be because of the parsing version or parameters, I have tried to contact to the author but there is no response).
How can I obtain the tagging information?
I think the code in edu.stanford.nlp.sentiment.BuildBinarizedDataset should be helpful. The main() method steps through how these binary trees can be created in Java code.
Some key lines to look out for in the code:
LexicalizedParser parser = LexicalizedParser.loadModel(parserModel);
TreeBinarizer binarizer = TreeBinarizer.simpleTreeBinarizer(parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
...
Tree tree = parser.apply(tokens);
Tree binarized = binarizer.transformTree(tree);
You can access the node tag information from the Tree object. You should look at the javadoc for edu.stanford.nlp.trees.Tree to see how to access this information.
Also in this answer I have some code that shows accessing a Tree:
How to get NN andNNS from a text?
You want to look at the label() of each tree and subtree to get the tag for a node.
Here is the reference on GitHub to BuildBinarizedDataset.java:
https://github.com/stanfordnlp/CoreNLP/blob/master/src/edu/stanford/nlp/sentiment/BuildBinarizedDataset.java
Please let me know if anything is unclear about this and I can provide further assistance!
First, you need to download the Stanford Parser
Set up
private LexicalizedParser parser;
private TreeBinarizer binarizer;
private CollapseUnaryTransformer transformer;
parser = LexicalizedParser.loadModel(PCFG_PATH);
binarizer = TreeBinarizer.simpleTreeBinarizer(
parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
transformer = new CollapseUnaryTransformer();
Parse
Tree tree = parser.apply(tokens);
Access POSTAG
public String[] constTreePOSTAG(Tree tree) {
Tree binarized = binarizer.transformTree(tree);
Tree collapsedUnary = transformer.transformTree(binarized);
Trees.convertToCoreLabels(collapsedUnary);
collapsedUnary.indexSpans();
List<Tree> leaves = collapsedUnary.getLeaves();
int size = collapsedUnary.size() - leaves.size();
String[] tags = new String[size];
HashMap<Integer, Integer> index = new HashMap<Integer, Integer>();
int idx = leaves.size();
int leafIdx = 0;
for (Tree leaf : leaves) {
Tree cur = leaf.parent(collapsedUnary); // go to preterminal
int curIdx = leafIdx++;
boolean done = false;
while (!done) {
Tree parent = cur.parent(collapsedUnary);
if (parent == null) {
tags[curIdx] = cur.label().toString();
break;
}
int parentIdx;
int parentNumber = parent.nodeNumber(collapsedUnary);
if (!index.containsKey(parentNumber)) {
parentIdx = idx++;
index.put(parentNumber, parentIdx);
} else {
parentIdx = index.get(parentNumber);
done = true;
}
tags[curIdx] = parent.label().toString();
cur = parent;
curIdx = parentIdx;
}
}
return tags;
}
Here is the full source code ConstituencyParse.java that run:
Use param:
java ConstituencyParse -tokpath outputtoken.toks -parentpath outputparent.txt -tagpath outputag.txt < input_sentence_in_text_file_one_sent_per_line.txt
(Note: the source code is adapt from treelstm repo, you also need to replace preprocess-sst.py to call ConstituencyParse.java file below)
import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.process.PTBTokenizer;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.TreeBinarizer;
import edu.stanford.nlp.tagger.maxent.MaxentTagger;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Trees;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.TypedDependency;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.StringReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.HashMap;
import java.util.Properties;
import java.util.Scanner;
public class ConstituencyParse {
private boolean tokenize;
private BufferedWriter tokWriter, parentWriter, tagWriter;
private LexicalizedParser parser;
private TreeBinarizer binarizer;
private CollapseUnaryTransformer transformer;
private GrammaticalStructureFactory gsf;
private static final String PCFG_PATH = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
public ConstituencyParse(String tokPath, String parentPath, String tagPath, boolean tokenize) throws IOException {
this.tokenize = tokenize;
if (tokPath != null) {
tokWriter = new BufferedWriter(new FileWriter(tokPath));
}
parentWriter = new BufferedWriter(new FileWriter(parentPath));
tagWriter = new BufferedWriter(new FileWriter(tagPath));
parser = LexicalizedParser.loadModel(PCFG_PATH);
binarizer = TreeBinarizer.simpleTreeBinarizer(
parser.getTLPParams().headFinder(), parser.treebankLanguagePack());
transformer = new CollapseUnaryTransformer();
// set up to produce dependency representations from constituency trees
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
gsf = tlp.grammaticalStructureFactory();
}
public List<HasWord> sentenceToTokens(String line) {
List<HasWord> tokens = new ArrayList<>();
if (tokenize) {
PTBTokenizer<Word> tokenizer = new PTBTokenizer(new StringReader(line), new WordTokenFactory(), "");
for (Word label; tokenizer.hasNext(); ) {
tokens.add(tokenizer.next());
}
} else {
for (String word : line.split(" ")) {
tokens.add(new Word(word));
}
}
return tokens;
}
public Tree parse(List<HasWord> tokens) {
Tree tree = parser.apply(tokens);
return tree;
}
public String[] constTreePOSTAG(Tree tree) {
Tree binarized = binarizer.transformTree(tree);
Tree collapsedUnary = transformer.transformTree(binarized);
Trees.convertToCoreLabels(collapsedUnary);
collapsedUnary.indexSpans();
List<Tree> leaves = collapsedUnary.getLeaves();
int size = collapsedUnary.size() - leaves.size();
String[] tags = new String[size];
HashMap<Integer, Integer> index = new HashMap<Integer, Integer>();
int idx = leaves.size();
int leafIdx = 0;
for (Tree leaf : leaves) {
Tree cur = leaf.parent(collapsedUnary); // go to preterminal
int curIdx = leafIdx++;
boolean done = false;
while (!done) {
Tree parent = cur.parent(collapsedUnary);
if (parent == null) {
tags[curIdx] = cur.label().toString();
break;
}
int parentIdx;
int parentNumber = parent.nodeNumber(collapsedUnary);
if (!index.containsKey(parentNumber)) {
parentIdx = idx++;
index.put(parentNumber, parentIdx);
} else {
parentIdx = index.get(parentNumber);
done = true;
}
tags[curIdx] = parent.label().toString();
cur = parent;
curIdx = parentIdx;
}
}
return tags;
}
public int[] constTreeParents(Tree tree) {
Tree binarized = binarizer.transformTree(tree);
Tree collapsedUnary = transformer.transformTree(binarized);
Trees.convertToCoreLabels(collapsedUnary);
collapsedUnary.indexSpans();
List<Tree> leaves = collapsedUnary.getLeaves();
int size = collapsedUnary.size() - leaves.size();
int[] parents = new int[size];
HashMap<Integer, Integer> index = new HashMap<Integer, Integer>();
int idx = leaves.size();
int leafIdx = 0;
for (Tree leaf : leaves) {
Tree cur = leaf.parent(collapsedUnary); // go to preterminal
int curIdx = leafIdx++;
boolean done = false;
while (!done) {
Tree parent = cur.parent(collapsedUnary);
if (parent == null) {
parents[curIdx] = 0;
break;
}
int parentIdx;
int parentNumber = parent.nodeNumber(collapsedUnary);
if (!index.containsKey(parentNumber)) {
parentIdx = idx++;
index.put(parentNumber, parentIdx);
} else {
parentIdx = index.get(parentNumber);
done = true;
}
parents[curIdx] = parentIdx + 1;
cur = parent;
curIdx = parentIdx;
}
}
return parents;
}
// convert constituency parse to a dependency representation and return the
// parent pointer representation of the tree
public int[] depTreeParents(Tree tree, List<HasWord> tokens) {
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
Collection<TypedDependency> tdl = gs.typedDependencies();
int len = tokens.size();
int[] parents = new int[len];
for (int i = 0; i < len; i++) {
// if a node has a parent of -1 at the end of parsing, then the node
// has no parent.
parents[i] = -1;
}
for (TypedDependency td : tdl) {
// let root have index 0
int child = td.dep().index();
int parent = td.gov().index();
parents[child - 1] = parent;
}
return parents;
}
public void printTokens(List<HasWord> tokens) throws IOException {
int len = tokens.size();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < len - 1; i++) {
if (tokenize) {
sb.append(PTBTokenizer.ptbToken2Text(tokens.get(i).word()));
} else {
sb.append(tokens.get(i).word());
}
sb.append(' ');
}
if (tokenize) {
sb.append(PTBTokenizer.ptbToken2Text(tokens.get(len - 1).word()));
} else {
sb.append(tokens.get(len - 1).word());
}
sb.append('\n');
tokWriter.write(sb.toString());
}
public void printParents(int[] parents) throws IOException {
StringBuilder sb = new StringBuilder();
int size = parents.length;
for (int i = 0; i < size - 1; i++) {
sb.append(parents[i]);
sb.append(' ');
}
sb.append(parents[size - 1]);
sb.append('\n');
parentWriter.write(sb.toString());
}
public void printTags(String[] tags) throws IOException {
StringBuilder sb = new StringBuilder();
int size = tags.length;
for (int i = 0; i < size - 1; i++) {
sb.append(tags[i]);
sb.append(' ');
}
sb.append(tags[size - 1]);
sb.append('\n');
tagWriter.write(sb.toString().toLowerCase());
}
public void close() throws IOException {
if (tokWriter != null) tokWriter.close();
parentWriter.close();
tagWriter.close();
}
public static void main(String[] args) throws Exception {
String TAGGER_MODEL = "stanford-tagger/models/english-left3words-distsim.tagger";
Properties props = StringUtils.argsToProperties(args);
if (!props.containsKey("parentpath")) {
System.err.println(
"usage: java ConstituencyParse -deps - -tokenize - -tokpath <tokpath> -parentpath <parentpath>");
System.exit(1);
}
// whether to tokenize input sentences
boolean tokenize = false;
if (props.containsKey("tokenize")) {
tokenize = true;
}
// whether to produce dependency trees from the constituency parse
boolean deps = false;
if (props.containsKey("deps")) {
deps = true;
}
String tokPath = props.containsKey("tokpath") ? props.getProperty("tokpath") : null;
String parentPath = props.getProperty("parentpath");
String tagPath = props.getProperty("tagpath");
ConstituencyParse processor = new ConstituencyParse(tokPath, parentPath, tagPath, tokenize);
Scanner stdin = new Scanner(System.in);
int count = 0;
long start = System.currentTimeMillis();
while (stdin.hasNextLine() && count < 2) {
String line = stdin.nextLine();
List<HasWord> tokens = processor.sentenceToTokens(line);
//end tagger
Tree parse = processor.parse(tokens);
// produce parent pointer representation
int[] parents = deps ? processor.depTreeParents(parse, tokens)
: processor.constTreeParents(parse);
String[] tags = processor.constTreePOSTAG(parse);
// print
if (tokPath != null) {
processor.printTokens(tokens);
}
processor.printParents(parents);
processor.printTags(tags);
// print tag
StringBuilder sb = new StringBuilder();
int size = tags.length;
for (int i = 0; i < size - 1; i++) {
sb.append(tags[i]);
sb.append(' ');
}
sb.append(tags[size - 1]);
sb.append('\n');
count++;
if (count % 100 == 0) {
double elapsed = (System.currentTimeMillis() - start) / 1000.0;
System.err.printf("Parsed %d lines (%.2fs)\n", count, elapsed);
}
}
long totalTimeMillis = System.currentTimeMillis() - start;
System.err.printf("Done: %d lines in %.2fs (%.1fms per line)\n",
count, totalTimeMillis / 100.0, totalTimeMillis / (double) count);
processor.close();
}
}

What is the Java 8 way to pull an object from a set?

I'd like to pull an item out of a set, and keep it, based on a predicate. It sure seems like this should be possible, but I can't find a way to prevent going thru the list twice. Such an operation could be used to 'pop' an object based on a dynamic priority.
Perhaps I should stick with an iterator.
Here's an example:
import org.junit.Test;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
public class RemoveAndUse {
class A {
int x;
A(int x) { this.x = x;}
}
class B {
int y;
B(int y) { this.y = y;}
}
#Test
public void removeHappyPath() {
Set<A> aList = new HashSet<>(Arrays.asList(new A(1), new A(2), new A(3)));
B b = new B(2);
// remove and keep an A that matches b
A found = aList.stream()
.filter( a -> a.x == b.y )
.findAny().get();
aList.removeIf( a -> a.x == b.y);
// or: aList.remove(found);
assert(!aList.contains(found));
assert(found.x == b.y);
}
}
Any other ideas?
A found;
for (Iterator<A> it = aList.iterator();it.hasNext();) {
A a = it.next();
if (a.x == b.y) {
found = a;
it.remove();
break;
}
}
O(n) is guaranteed;

Resources