How to parallelize merge sort using JavaRDD<Integer> Spark? - parallel-processing

public class MergeSort {
public static List<Integer> serialSort(List<Integer> array) {
if (array.size() > 1) {
int mid = array.size() / 2;
List<Integer> firstHalf = new ArrayList<>(mid);
List<Integer> secondHalf = new ArrayList<>(array.size() - mid);
for (int i = 0; i < array.size(); i++) {
if (i <= mid) {
firstHalf.add(array.get(i));
} else {
secondHalf.add(array.get(i));
}
}
serialSort(firstHalf);
serialSort(secondHalf);
merge(firstHalf, secondHalf, array);
}
return array;
}
//
private static void merge(List firstHalf, List secondHalf, List array) {
int currentIndexFirst = 0;
int currentIndexSecond = 0;
int currentIndexArray = 0;
while (currentIndexFirst < firstHalf.size() && currentIndexSecond < secondHalf.size()) {
if (firstHalf.get(currentIndexFirst) < secondHalf.get(currentIndexSecond)) {
array.set(currentIndexArray, firstHalf.get(currentIndexFirst));
currentIndexArray++;
currentIndexFirst++;
} else {
array.set(currentIndexArray, secondHalf.get(currentIndexSecond));
currentIndexArray++;
currentIndexSecond++;
}
}
while (currentIndexFirst < firstHalf.size()) {
array.set(currentIndexArray, firstHalf.get(currentIndexFirst));
currentIndexArray++;
currentIndexFirst++;
}
while (currentIndexSecond < secondHalf.size()) {
array.set(currentIndexArray, secondHalf.get(currentIndexSecond));
currentIndexArray++;
currentIndexSecond++;
}
}
public static JavaRDD<Integer> parallelSort(JavaRDD<Integer> data) {
SparkConf conf = new SparkConf().setAppName("surya").setMaster("local[6]");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<Integer> distData = (JavaRDD<Integer>) sc.parallelize(data);
return data.union(data);
}
}
public class Main {
public static void main(String[] args) {
List<Integer> unsorted = new ArrayList<>(10000);
for (int i = 0; i < 10000; i++) {
unsorted.add((int) Math.floor(Math.random() * 100000));
}
List<Integer> sorted = new ArrayList<>(unsorted);
sorted.sort(Integer::compareTo);
long startTime = System.currentTimeMillis();
List<Integer> serialSorted = MergeSort.serialSort(new ArrayList<>(unsorted));
long serialTime = System.currentTimeMillis() - startTime;
System.out.println("Unsorted serial list: " + unsorted +"\n");
System.out.println("Sorted serial list: " + sorted+"\n");
System.out.println("Serial implementation took " + serialTime + " milliseconds.");
assert serialSorted.equals(sorted);
// parallelization
// SparkConf conf = new SparkConf().setAppName("surya").setMaster("local[6]");
// JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<Integer> data = sc.parallelize(new ArrayList<>(unsorted));
startTime = System.currentTimeMillis();
data = MergeSort.parallelSort(data);
long parallelTime = System.currentTimeMillis() - startTime;
System.out.println("Parallel implementation took " + parallelTime + " milliseconds.");
List<Integer> parallelSorted = data.collect();
sc.stop();
assert parallelSorted.equals(sorted);
}
}
I tried to parallelize this but succeeded. Any help is appreciated.

Related

Given Parent Array and Values Array. Find Best Possible Sum in a tree

Given Parent Array Such that parent[i]=j where j is the parent and Value array . Need to Find Best possible sum.
Root node will have -1 as parent.
Best Possible sum is maximum sum in one of the tree paths.
Ex)
Integer[] parent = new Integer[] { -1, 0, 0, 2, 3 };
Integer[] values = new Integer[] { 0, 4, 6, -11, 3 };
(0/0)----(1/4)
|
|
(2/6)
|
|
(3/-11)
|
|
(4/3)
Maximum sum here would be 6+0+4=10 for path 2-->0-->1.
I have tried solving it the dfs way. But not sure if it works for all cases. Below is my code. It gives all possible sum. we can take out max from that.
package com.programs.algo;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
public class BestPossibleSum {
static class Node<T> {
T label;
T data;
List<Node<T>> nodes;
}
public static void main(String[] args) {
Integer[] parent = new Integer[] { -1, 0, 0, 1, 1, 3, 5 };
Integer[] values = new Integer[] { 0, 4, 6, -11, 3, 10, 11 };
List<Integer> list1 = new ArrayList<>(Arrays.asList(parent));
List<Integer> list2 = new ArrayList<>(Arrays.asList(values));
bestPossibleSum(list1, list2);
}
static List<Node<Integer>> tree = new ArrayList<>();
private static void bestPossibleSum(List<Integer> list1, List<Integer> list2) {
int adj[][] = new int[list1.size()][list1.size()];
createTree(list1, list2, adj);
List<Integer> traversedNodes = new ArrayList<>();
List<Integer> sumOfraversedNodes = new ArrayList<>();
for (int i = 0; i < adj.length; i++) {
dfs(tree.get(i), traversedNodes, sumOfraversedNodes);
traversedNodes.clear();
}
System.out.println(sumOfraversedNodes);
}
private static void dfs(Node<Integer> tree, List<Integer> traversedNodes, List<Integer> sums) {
if (!traversedNodes.contains(tree.label)) {
traversedNodes.add(tree.label);
sums.add(getSum(traversedNodes));
for (Node<Integer> child : tree.nodes) {
dfs(child, traversedNodes, sums);
}
}
}
private static Integer getSum(List<Integer> traversedNodes) {
System.out.println(traversedNodes);
return traversedNodes.stream().reduce(0, Integer::sum);
}
private static void createTree(List<Integer> parent, List<Integer> values, int[][] adj) {
for (int i = 0; i < parent.size(); i++) {
Node<Integer> node = new Node<>();
node.label = i;
node.data = values.get(i);
node.nodes = new ArrayList<>();
tree.add(i, node);
}
for (int i = 0; i < parent.size(); i++) {
if (parent.get(i) != -1) {
adj[parent.get(i)][i] = 1;
adj[i][parent.get(i)] = 1;
tree.get(parent.get(i)).nodes.add(tree.get(i));
}
}
tree.forEach(t -> {
System.out.println(t.label);
System.out.println(t.nodes.stream().map(m -> m.label).collect(Collectors.toList()));
});
// System.out.println(Arrays.deepToString(adj));
}
}
I would divide your question to 2 different issues:
Build tree from your data
Find the max sum
I wrote the code in PHP but you can convert it to any language you need (my JAVA skill are bit rusty...)
Build the Tree:
$parent = array( -1, 0, 0, 2, 3 );
$values = array(0, 4, 6, -11, 3 );
function getNode($id, $data) {
return array("id" => $id, "data" => $data, "childs" => array());
}
function addToTree($node, &$root, $parentsId) {
if ($parentsId == -1)
$root = $node;
else if ( $root["id"] == $parentsId)
$root["childs"][] = $node;
else
foreach($root["childs"] as &$child)
addToTree($node, $child, $parentsId);
}
$root = null;
for($i = 0; $i < count($parent); $i++) {
addToTree(getNode($i, $values[$i]), $root, $parent[$i]);
}
Now root if contain you "tree-like" data. Notice this code works only if the nodes given at the right order and it cannot support multi root (assume tree and not forest)
Find max path:
function maxPath($node) {
$sum = $node["data"];
foreach($node["childs"] as $child) {
$s = maxPath($child);
if ($s > 0) // if its not positive then don't take it
$sum += $s;
}
return $sum;
}
This recursive function will get your max-sum-path. Notice this will allow multi-child per node and also the path can have star-shape.
Posting Java code considering it as tree with left and right nodes.
https://www.geeksforgeeks.org/construct-a-binary-tree-from-parent-array-representation/
https://www.geeksforgeeks.org/find-maximum-path-sum-in-a-binary-tree/
private static int maxSum(Node<Integer> btree, Result result) {
if (btree == null)
return 0;
int l = maxSum(btree.left, result);
int r = maxSum(btree.right, result);
System.out.println(l + " " + r + " " + btree.data);
int maxSingle = Math.max(Math.max(l, r) + btree.label, btree.label);
int maxTop = Math.max(l + r + btree.label, maxSingle);
result.val = Math.max(maxTop, result.val);
return maxSingle;
}
private static Node<Integer> createBinaryTree(Integer[] parent, Node<Integer> root) {
Map<Integer, Node<Integer>> map = new HashMap<>();
for (int i = 0; i < parent.length; i++) {
map.put(i, new Node<>(i));
}
for (int i = 0; i < parent.length; i++) {
if (parent[i] == -1) {
root = map.get(i);
} else {
Node<Integer> par = map.get(parent[i]);
Node<Integer> child = map.get(i);
if (par.left == null) {
par.left = child;
} else {
par.right = child;
}
}
}
return root;
}
1 . convert the given parent array into graph with the following steps :
unordered_map<int,vector<pair<int,int>>> graph;
for(int i=0;i<n;i++){
if(parents[i]!=-1){
graph[parents[i]].push_back({i,values[i]});
graph[i].push_back({parents[i],values[parents[i]]});
}
}
2.apply DFS on each node and check the maximum Path Sum
vector<bool> vis(n,false);
int res=0;
for(int i=0;i<n;i++){
vis.clear();
dfs(i,vis,mp,values,res);
}
DFS function
void dfs(int src,vector&vis,unordered_map<int,
vector<pair<int,int>>>&graph,vector<int>&values,int res){
res+=values[src];
ans=max(ans,res);
vis[src]=true;
for(int i=0;i<graph[src].size();i++){
if(!vis[graph[src][i].first]){
dfs(graph[src][i].first,vis,graph,values,res);
}
}
vis[src]=false;
}
C++ code :
#include<bits/stdc++.h>
using namespace std;
int ans=INT_MIN;
void dfs(int src,vector<bool>&vis,unordered_map<int,
vector<pair<int,int>>>&graph,vector<int>&values,int res){
res+=values[src];
ans=max(ans,res);
vis[src]=true;
for(int i=0;i<graph[src].size();i++){
if(!vis[graph[src][i].first]){
dfs(graph[src][i].first,vis,graph,values,res);
}
}
vis[src]=false;
}
int maxPathSum(vector<int>&parents,vector<int>&values){
int n=parents.size();
unordered_map<int,vector<pair<int,int>>> mp;
for(int i=0;i<n;i++){
if(parents[i]!=-1){
mp[parents[i]].push_back({i,values[i]});
mp[i].push_back({parents[i],values[parents[i]]});
}
}
vector<bool> vis(n,false);
int res=0;
for(int i=0;i<n;i++){
vis.clear();
dfs(i,vis,mp,values,res);
}
return ans;
}
int main(){
vector<int> parent = {-1,0,0,2,3}; //{-1,0,1,2,0};
vector<int> values = {0,4,6,-11,3}; //{-2,10,10,-3,10};
cout<<maxPathSum(parent,values)<<endl;
return 0;
}
Today I got this problem in One of the company's hackerrank test.
Here is my solution. All test cases have been passed successfully
import java.io.*;
import java.math.*;
import java.security.*;
import java.text.*;
import java.util.*;
import java.util.concurrent.*;
import java.util.function.*;
import java.util.regex.*;
import java.util.stream.*;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toList;
class Result {
/*
* Complete the 'bestSumDownwardTreePath' function below.
*
* The function is expected to return an INTEGER.
* The function accepts following parameters:
* 1. INTEGER_ARRAY parent
* 2. INTEGER_ARRAY values
*/
static int bestPath = Integer.MIN_VALUE;
public static int bestSumDownwardTreePath(List<Integer> parent, List<Integer> values) {
if(parent.size() == 1) return values.get(0);
Map<Integer, List<Integer>> tree = new HashMap<>();
for(int i = 1; i < parent.size(); i++) {
List<Integer> temp = tree.getOrDefault(parent.get(i), null);
if(temp == null) {
temp = new ArrayList<>();
temp.add(i);
tree.put(parent.get(i), temp);
}
else {
temp.add(i);
}
}
findBestSum(parent, values, tree, 0, 0);
return bestPath;
}
public static void findBestSum(List<Integer> parent, List<Integer> values,
Map<Integer, List<Integer>> tree, int root, int sum) {
sum = sum + values.get(root);
bestPath = Math.max(bestPath, sum);
sum = Math.max(0, sum);
if(tree.get(root) == null) return;
for(Integer child: tree.get(root)) {
findBestSum(parent, values, tree, child, sum);
}
}
}
public class Solution {
public static void main(String[] args) throws IOException {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(System.in));
BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(System.getenv("OUTPUT_PATH")));
int parentCount = Integer.parseInt(bufferedReader.readLine().trim());
List<Integer> parent = IntStream.range(0, parentCount).mapToObj(i -> {
try {
return bufferedReader.readLine().replaceAll("\\s+$", "");
} catch (IOException ex) {
throw new RuntimeException(ex);
}
})
.map(String::trim)
.map(Integer::parseInt)
.collect(toList());
int valuesCount = Integer.parseInt(bufferedReader.readLine().trim());
List<Integer> values = IntStream.range(0, valuesCount).mapToObj(i -> {
try {
return bufferedReader.readLine().replaceAll("\\s+$", "");
} catch (IOException ex) {
throw new RuntimeException(ex);
}
})
.map(String::trim)
.map(Integer::parseInt)
.collect(toList());
int result = Result.bestSumDownwardTreePath(parent, values);
bufferedWriter.write(String.valueOf(result));
bufferedWriter.newLine();
bufferedReader.close();
bufferedWriter.close();
}
}

combine two byte[] in SHA512Digest 's doFinal

in this methode there is only one instruction that not working:
private static byte[] encodePassword(String password,String salt) throws UnsupportedEncodingException
{
String mergedPasswordAndSalt =mergePasswordAndSalt(password, salt);
SHA512Digest digester =new SHA512Digest();
byte[] hash = new byte[digester.getDigestSize()];
digester.update(hash, 0, mergedPasswordAndSalt.length());
digester.doFinal(hash, 0);
System.out.println("init hash= "+Base64.encode(hash));
for (int i = 1; i < ITERATIONS; ++i) {
digester.update(hash, 0, mergedPasswordAndSalt.length());
digester.doFinal(Bytes.concat(hash, mergedPasswordAndSalt.getBytes("UTF-8")),0);
}
System.out.println("FINAL hash= "+Base64.encode(hash));
return hash;
}
that instructions: is the equivalent of this one in the java api that is way:
for (int i = 1; i < ITERATIONS; ++i) {
hash = digester.digest(Bytes.concat(hash, mergedPasswordAndSalt.getBytes("UTF-8")));
}
I have the solution:
To concat bytes i used this method:
public static byte[] concat(byte[]... arrays) {
int length = 0;
byte[][] arr$ = arrays;
int pos = arrays.length;
for(int i$ = 0; i$ < pos; ++i$) {
byte[] array = arr$[i$];
length += array.length;
}
byte[] result = new byte[length];
pos = 0;
byte[][] arr$$=arrays;
arr$=arr$$;
int len$ = arrays.length;
for(int i$ = 0; i$ < len$; ++i$) {
byte[] array = arr$[i$];
System.arraycopy(array, 0, result, pos, array.length);
pos += array.length;
}
return result;
}
To do 4999 iteration on the digest we need a method that takes the hash after every iteration and works in the current digest concatenated with the first diegest(generate out of loop):
private static byte[] encodePassword(String password,String salt) throws UnsupportedEncodingException
{
String mergedPasswordAndSalt =mergePasswordAndSalt(password, salt);
byte[] hash = new byte[88];
hash=digestt(mergedPasswordAndSalt.getBytes("UTF-8"));
for (int i = 1; i < ITERATIONS; ++i) {
hash=digestt(concat(hash,mergedPasswordAndSalt.getBytes("UTF-8")));
}
return hash;
}
public static byte[] digestt(byte[] bytes) {
Digest digest = new SHA512Digest();
byte[] resBuf = new byte[digest.getDigestSize()];
digest.update(bytes, 0, bytes.length);
digest.doFinal(resBuf, 0);
return resBuf;
}
thank you very much i asked many questions and you are always there for help.

Optimizing apriori algorithm code

I am writing code for apriori algorithm in data mining my code takes as long as 60 seconds for a pretty small dataset which is solved by other code i got from internet in just 2 seconds but i am not getting where am i doing wrong, can someone tell me why the other code is fast over mine.
My code:
import java.util.*;
import java.io.*;
public class Apriori_p {
double support;
ArrayList<String> trans;
Map<String, Integer> map;
long start;
void print(ArrayList<String> temp) {
for (int i = 0; i < temp.size(); i++) {
System.out.println(temp.get(i));
}
System.out.println("Count :" + temp.size());
}
void run() throws FileNotFoundException {
start = System.currentTimeMillis();
trans = new ArrayList<>();
ArrayList<String> temp = new ArrayList<>();
map = new HashMap<>();
Scanner sc = new Scanner(System.in);
System.out.println("Enter support %");
support = sc.nextDouble();
System.out.println("Enter file name");
String file = sc.next();
sc = new Scanner(new File(file));
int lines = 0;
while (sc.hasNextLine()) {
String s = sc.nextLine();
if (s.matches("\\s*")) {
continue;
}
lines++;
String[] spl = s.split("\\s+");
ArrayList<Integer> elem = new ArrayList<>();
for (int i = 0; i < spl.length; i++) {
String cand;
int n = Integer.parseInt(spl[i]);
cand = spl[i].trim();
if (!elem.contains(n)) {
elem.add(n);
}
if (map.containsKey(cand)) {
int count = map.get(cand);
map.put(cand, count + 1);
} else {
map.put(cand, 1);
}
}
Collections.sort(elem);
String con = " ";
for (int i = 0; i < elem.size(); i++) {
con = con + elem.get(i) + " ";
String s1 = String.valueOf(elem.get(i)).trim();
if(!temp.contains(s1))
temp.add(s1);
}
trans.add(con);
}
support = (support * lines) / 100;
System.out.println(System.currentTimeMillis() - start);
apriori(temp, 1);
}
public static void main(String[] args) throws FileNotFoundException {
new Apriori_p().run();
}
public void apriori(ArrayList<String> temp, int m) {
Set<String> diff = null;
if (m == 1) {
diff = new HashSet<>();
}
for (int i = 0; i < temp.size(); i++) {
if (map.get(temp.get(i)) < support) {
if (m == 1) {
diff.add(temp.get(i));
}
temp.remove(i);
i--;
}
}
for (int i = 0; i < trans.size() && m == 1; i++) {
for (String j : diff) {
String rep = " " + j + " ";
trans.get(i).replace(rep, " ");
}
}
if (temp.size() == 0) {
return;
}
System.out.println("Size " + m + " :");
print(temp);
ArrayList<String> ntemp = new ArrayList<>();
int n = temp.size();
for (int i = 0; i < n; i++) {
for (int j = i + 1; j < n; j++) {
StringTokenizer st1 = new StringTokenizer(temp.get(i), " ");
StringTokenizer st2 = new StringTokenizer(temp.get(j), " ");
String str1 = "", str2 = "";
for (int s = 0; s < m - 2; s++) {
str1 = str1 + " " + st1.nextToken();
str2 = str2 + " " + st2.nextToken();
}
if (str2.compareToIgnoreCase(str1) == 0) {
int s1 = Integer.parseInt(st1.nextToken()), s2 = Integer.parseInt(st2.nextToken());
String s3;
if (s1 <= s2) {
s3 = (str1 + " " + s1 + " " + s2).trim();
} else {
s3 = (str1 + " " + s2 + " " + s1).trim();
}
if(!ntemp.contains(s3)){
ntemp.add(s3);
}
}
}
}
temp.clear();
for (int j = 0; j < ntemp.size(); j++) {
int c = 0;
for (int i = 0; i < trans.size(); i++) {
int check = 0;
String tr = trans.get(i);
StringTokenizer st1 = new StringTokenizer(ntemp.get(j)," ");
while(st1.hasMoreElements()){
String str = st1.nextToken();
if(!tr.contains(" " + str + " ")){
check = 1;
break;
}
}
if(check == 0){
c= 1;
if (map.containsKey(ntemp.get(j))) {
int count = map.get(ntemp.get(j));
map.put(ntemp.get(j), count + 1);
} else {
map.put(ntemp.get(j), 1);
}
}
}
if (c == 0) {
ntemp.remove(j);
j--;
}
}
apriori(ntemp, m + 1);
}
}
Fast code:
import java.io.*;
import java.util.*;
public class Apriori3{
public static void main(String[] args) throws Exception {
Apriori3 ap = new Apriori3(args);
}
private List<int[]> itemsets;
private String transaFile;
private int numItems;
private int numTransactions;
private double minSup;
private boolean usedAsLibrary = false;
public Apriori3(String[] args) throws Exception {
configure(args);
go();
}
private void go() throws Exception {
long start = System.currentTimeMillis();
createItemsetsOfSize1();
int itemsetNumber = 1;
int nbFrequentSets = 0;
while (itemsets.size() > 0) {
calculateFrequentItemsets();
if (itemsets.size() != 0) {
nbFrequentSets += itemsets.size();
log("Found " + itemsets.size() + " frequent itemsets of size " + itemsetNumber + " (with support " + (minSup * 100) + "%)");;
createNewItemsetsFromPreviousOnes();
}
itemsetNumber++;
}
long end = System.currentTimeMillis();
log("Execution time is: " + ((double) (end - start) / 1000) + " seconds.");
log("Found " + nbFrequentSets + " frequents sets for support " + (minSup * 100) + "% (absolute " + Math.round(numTransactions * minSup) + ")");
log("Done");
}
private void foundFrequentItemSet(int[] itemset, int support) {
if (usedAsLibrary) {
} else {
System.out.println(Arrays.toString(itemset) + " (" + ((support / (double) numTransactions)) + " " + support + ")");
}
}
private void log(String message) {
if (!usedAsLibrary) {
System.err.println(message);
}
}
private void configure(String[] args) throws Exception {
if (args.length != 0) {
transaFile = args[0];
} else {
transaFile = "chess.dat"; // default
}
if (args.length >= 2) {
minSup = (Double.valueOf(args[1]).doubleValue());
} else {
minSup = .8;
}
if (minSup > 1 || minSup < 0) {
throw new Exception("minSup: bad value");
}
numItems = 0;
numTransactions = 0;
BufferedReader data_in = new BufferedReader(new FileReader(transaFile));
while (data_in.ready()) {
String line = data_in.readLine();
if (line.matches("\\s*")) {
continue;
}
numTransactions++;
StringTokenizer t = new StringTokenizer(line, " ");
while (t.hasMoreTokens()) {
int x = Integer.parseInt(t.nextToken());
if (x + 1 > numItems) {
numItems = x + 1;
}
}
}
outputConfig();
}
private void outputConfig() {
log("Input configuration: " + numItems + " items, " + numTransactions + " transactions, ");
log("minsup = " + minSup + "%");
}
private void createItemsetsOfSize1() {
itemsets = new ArrayList<int[]>();
for (int i = 0; i < numItems; i++) {
int[] cand = {i};
itemsets.add(cand);
}
}
private void createNewItemsetsFromPreviousOnes() {
int currentSizeOfItemsets = itemsets.get(0).length;
log("Creating itemsets of size " + (currentSizeOfItemsets + 1) + " based on " + itemsets.size() + " itemsets of size " + currentSizeOfItemsets);
HashMap<String, int[]> tempCandidates = new HashMap<String, int[]>(); //temporary candidates
for (int i = 0; i < itemsets.size(); i++) {
for (int j = i + 1; j < itemsets.size(); j++) {
int[] X = itemsets.get(i);
int[] Y = itemsets.get(j);
assert (X.length == Y.length);
int[] newCand = new int[currentSizeOfItemsets + 1];
for (int s = 0; s < newCand.length - 1; s++) {
newCand[s] = X[s];
}
int ndifferent = 0;
for (int s1 = 0; s1 < Y.length; s1++) {
boolean found = false;
for (int s2 = 0; s2 < X.length; s2++) {
if (X[s2] == Y[s1]) {
found = true;
break;
}
}
if (!found) {
ndifferent++;
newCand[newCand.length - 1] = Y[s1];
}
}
assert (ndifferent > 0);
if (ndifferent == 1) {
Arrays.sort(newCand);
tempCandidates.put(Arrays.toString(newCand), newCand);
}
}
}
itemsets = new ArrayList<int[]>(tempCandidates.values());
log("Created " + itemsets.size() + " unique itemsets of size " + (currentSizeOfItemsets + 1));
}
private void line2booleanArray(String line, boolean[] trans) {
Arrays.fill(trans, false);
StringTokenizer stFile = new StringTokenizer(line, " ");
while (stFile.hasMoreTokens()) {
int parsedVal = Integer.parseInt(stFile.nextToken());
trans[parsedVal] = true;
}
}
private void calculateFrequentItemsets() throws Exception {
log("Passing through the data to compute the frequency of " + itemsets.size() + " itemsets of size " + itemsets.get(0).length);
List<int[]> frequentCandidates = new ArrayList<int[]>();
boolean match;
int count[] = new int[itemsets.size()];
BufferedReader data_in = new BufferedReader(new InputStreamReader(new FileInputStream(transaFile)));
boolean[] trans = new boolean[numItems];
for (int i = 0; i < numTransactions; i++) {
String line = data_in.readLine();
line2booleanArray(line, trans);
for (int c = 0; c < itemsets.size(); c++) {
match = true;
int[] cand = itemsets.get(c);
for (int xx : cand) {
if (trans[xx] == false) {
match = false;
break;
}
}
if (match) {
count[c]++;
}
}
}
data_in.close();
for (int i = 0; i < itemsets.size(); i++) {
if ((count[i] / (double) (numTransactions)) >= minSup) {
foundFrequentItemSet(itemsets.get(i), count[i]);
frequentCandidates.add(itemsets.get(i));
}
}
itemsets = frequentCandidates;
}
}

sentiment analysis using SentiWordNet.

I want to classify sentences as positive, negative or neutral. I have got the code written in java but i am not understanding how to use the code for my file.In the above code where i should use my file and the results for each sentence should come as positive, negative and neutral. Please guide me for this.
public class SWN3 {
private String pathToSWN = "data"+File.separator+"SentiWordNet_3.0.0.txt";
private HashMap<String, String> _dict;
public SWN3(){
_dict = new HashMap<String, String>();
HashMap<String, Vector<Double>> _temp = new HashMap<String, Vector<Double>>();
try{
BufferedReader csv = new BufferedReader(new FileReader(pathToSWN));
String line = "";
while((line = csv.readLine()) != null)
{
String[] data = line.split("\t");
Double score = Double.parseDouble(data[2])- Double.parseDouble(data[3]);
String[] words = data[4].split(" ");
for(String w:words)
{
String[] w_n = w.split("#");
w_n[0] += "#"+data[0];
int index = Integer.parseInt(w_n[1])-1;
if(_temp.containsKey(w_n[0]))
{
Vector<Double> v = _temp.get(w_n[0]);
if(index>v.size())
for(int i = v.size();i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
else
{
Vector<Double> v = new Vector<Double>();
for(int i = 0;i<index; i++)
v.add(0.0);
v.add(index, score);
_temp.put(w_n[0], v);
}
}
}
Set<String> temp = _temp.keySet();
for (Iterator<String> iterator = temp.iterator(); iterator.hasNext();) {
String word = (String) iterator.next();
Vector<Double> v = _temp.get(word);
double score = 0.0;
double sum = 0.0;
for(int i = 0; i < v.size(); i++)
score += ((double)1/(double)(i+1))*v.get(i);
for(int i = 1; i<=v.size(); i++)
sum += (double)1/(double)i;
score /= sum;
String sent = "";
if(score>=0.75)
sent = "strong_positive";
else
if(score > 0.25 && score<=0.5)
sent = "positive";
else
if(score > 0 && score>=0.25)
sent = "weak_positive";
else
if(score < 0 && score>=-0.25)
sent = "weak_negative";
else
if(score < -0.25 && score>=-0.5)
sent = "negative";
else
if(score<=-0.75)
sent = "strong_negative";
_dict.put(word, sent);
}
}
catch(Exception e){e.printStackTrace();}
}
public String extract(String word, String pos)
{
return _dict.get(word+"#"+pos);
}
}

Sorting an ArrayList containing custom objects

I want to sort an ArrayList of High scores. The list contains objects, which each contain String name, String category and int score. I want to sort the ArrayList into descending order by score:
This is what I have at the moment, however it crashes the program when the procedure runs:
**INITIALIZING ARRAY AND VARIABLES:**
private static ArrayList<HighScore> highScores = new ArrayList<HighScore>();
private static int Last;
private static int First = 0;
private static int PivotValue;
private static int LeftPointer;
private static int RightPointer;
private static int Pivot;
**READING IN VALUES INTO ARRAYLIST**
String File = "src/project/res/highscores.txt";
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(File));
while ((line = br.readLine()) != null) {
HighScore h = new HighScore(line);
highScores.add(h);
}
} catch (Exception e) {
System.out.println("Problem reading in high scores!");
}
**CALLING THE PROCEDURE**
quickSort(highScores,First,Last);
private static void quickSort(ArrayList<HighScore> highScores2, int first, int last){
if(first < last)
{
PivotValue = highScores2.get(first).getScore();
LeftPointer = first + 1;
RightPointer = last;
while((LeftPointer <= RightPointer))
{
while((LeftPointer <= RightPointer)&&(highScores2.get(LeftPointer).getScore() < PivotValue))
{
LeftPointer++;
}
while((highScores2.get(RightPointer).getScore() > PivotValue)&&(LeftPointer <= RightPointer))
{
RightPointer--;
}
if(LeftPointer < RightPointer)
{
HighScore temp = highScores2.get(LeftPointer);
highScores2.set(RightPointer, highScores2.get(LeftPointer));
temp.equals(highScores2.get(RightPointer));
}
}
Pivot = RightPointer;
HighScore temp = highScores2.get(first);
highScores2.set(Pivot,highScores2.get(first));
temp.equals(highScores2.get(Pivot));
quickSort(highScores2, first, Pivot-1);
quickSort(highScores2, Pivot + 1, last);
}
}

Resources