K-means on hadoop compile error... - hadoop

I've downloaded the k-means(in hadoop mapreduce) opensource. But, it has compile errors.
---------------------SOURCE----------------------------
/*
* Copyright 2012
* Parallel and Distributed Systems Group (PVS)
* Institute of Computer Science (IFI)
* Heidelberg University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package examples;
import algorithms.kmeans.Cluster;
import algorithms.kmeans.Clusters;
import algorithms.kmeans.SamplesCache;
import org.apache.*;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.DenseVectorWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
public class KMeansHadoop {
private final static Logger LOG = LoggerFactory.getLogger(KMeansHadoop.class);
public static class KMeansMapper extends
MRMapper<LongWritable, Text, IntWritable, Clusters, Clusters> {
private SamplesCache cache = new SamplesCache(500);
private int cacheSize = 10000;
private Clusters clusters = null;
private int k = 0;
private int nextCentroidToInit = 0;
/**
* Configures the mapper by reading two configuration options:
* - "numClusters": the k in k-Means
* - "numAuxClusters": the number of in-memory auxiliary clusters representing the input data
*
* #param context the mapper context, used to access the configuration
* #throws IOException
* #throws InterruptedException
*/
#Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
this.k = conf.getInt("numCluster", 5);
this.clusters = new Clusters(k);
this.cacheSize = conf.getInt("numAuxCluster", 500);
this.cache = new SamplesCache(cacheSize);
}
/**
* Maps the input lines to initial centroids and, as a side-effect, stores auxiliary clusters representing the
* input data in memory
*
* #param key the key provided by the input format, not used here
* #param value one line of the input; input format: one data point per line, vector components delimited by spaces
* #param context the mapper context used to send initial centroids to the reducer
* #throws IOException
* #throws InterruptedException
*/
#Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// Input format: one data point per line, components delimited by spaces
final List<Double> doubleValues = new ArrayList<Double>();
final StringTokenizer tk = new StringTokenizer(value.toString());
while(tk.hasMoreElements()) {
final String token = tk.nextToken();
doubleValues.add(Double.parseDouble(token));
}
double[] dv = new double[doubleValues.size()];
for(int i=0; i<doubleValues.size(); i++) {
dv[i] = doubleValues.get(i);
}
DenseVector dvec = new DenseVector(dv);
DenseVectorWritable sample = new DenseVectorWritable(dvec);
// add sample to local auxiliary clusters
this.cache.addSample(sample);
// first k points are chosen as initial centroids
if (nextCentroidToInit < k) {
this.clusters.set(nextCentroidToInit, new Cluster(sample, sample));
this.nextCentroidToInit += 1;
} else if (nextCentroidToInit == k) {
// send initial centroids to reducer
context.write(new IntWritable(0), this.clusters);
this.nextCentroidToInit += 1;
}
}
/**
* Remaps the input data when a new set of preliminary clusters is received from the reducer by recalculating
* the assignment of the local input data, as represented by the auxiliary clusters, to the preliminary clusters
* and sends the updated centroids to the reducer.
* #param cs the preliminary clusters computed by the reducer
* #param context the mapper context used to send the locally recomputed centroids to the reducer
* #throws IOException
* #throws InterruptedException
*/
public void remap(List<Clusters> cs, Context context) throws IOException, InterruptedException {
LOG.info("Remapping preliminary clusters");
// set the preliminary clusters as new clusters
this.clusters = cs.get(0).clone();
this.clusters.reset();
// reassign the local input data, represented by the auxiliary clusters, to the clusters, thereby readjusting
// the clusters centroids
this.cache.reAssignAll(clusters);
// send the locally updated clusters to the reducer
context.write(new IntWritable(0), this.clusters);
}
}
public static class KMeansReducer extends
MRReducer<IntWritable, Clusters, IntWritable, Clusters, Clusters> {
private double lastError = Double.MAX_VALUE;
private float epsilon = Float.MAX_VALUE;
/**
* Configures the mapper by reading the configuration option "epsilon": The minimum change of the MSE needed to
* trigger a new iteration.
*
* #param context the reducer context, used to access the configuration
* #throws IOException
* #throws InterruptedException
*/
#Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
epsilon = conf.getFloat("epsilon", 100f);
}
/**
* Reduces a list of clusters locally computed by the mappers into a preliminary global set of clusters, which
* is then restreamed to the mappers, or, iff the MSE of the global set of clusters has not changed by more than
* epsilon since the last reduce invocation ends the iteration by emiting the final set of clusters.
*
* #param key the key set by the mapper, not used here
* #param values the list of locally computed clusters computed by the mappers
* #param context the reducer context, used to restream preliminary clusters to the mappers and emit the final
* clusters
* #throws IOException
* #throws InterruptedException
*/
#Override
protected void reduce(IntWritable key, Iterable<Clusters> values,
MRReduceContext<IntWritable, Clusters, IntWritable, Clusters, Clusters> context) throws IOException, InterruptedException {
// Merge the list of clusters into one set of clusters
Clusters results = null;
for(Clusters clusters : values) {
if( results == null ) {
results = clusters;
} else {
results.merge(clusters);
}
}
Double error = results.getMSE();
LOG.info("Last error " + lastError + ", current error " + error);
if (lastError < Double.MAX_VALUE &&
error <= lastError + epsilon &&
error >= lastError - epsilon) {
// MSE has changed by less than epsilon: Emit final result
context.write(new IntWritable(0), results);
LOG.info("Final result written.");
} else {
// MSE has changed by more than epsilon: Send recomputed preliminary clusters to mappers to start a new
// iteration
this.lastError = error;
results.computeNewCentroids();
context.restream(results);
LOG.info("Preliminary result restreamed.");
}
}
}
/**
* Executes the streaming Hadoop MapReduce program
* #param args first arg is input path, second arg is output path
* #throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.setBoolean("mrstreamer.hadoop.streaming", true);
// has to be 1 to ensure the algorithm producing valid results
conf.setInt(JobContext.NUM_REDUCES, 1);
conf.setInt(JobContext.NUM_MAPS, 4);
conf.set("numCluster", "5");
conf.set("numAuxCluster", "500");
Job job = new MRSJob(conf, "kmeanshadoop");
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(Clusters.class);
job.setMapperClass(KMeansMapper.class);
job.setReducerClass(KMeansReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
---------------------------ERROR------------------------------
Exception in thread "main" java.lang.Error: Unresolved compilation problems:
NUM_REDUCES cannot be resolved or is not a field
NUM_MAPS cannot be resolved or is not a field
at examples.KMeansHadoop.main(KMeansHadoop.java:222)

Probably, you are not using the same version of Hadoop as the authors of this code. It should be covered by the line:
import org.apache.hadoop.mapred.JobContext;
Update to hadoop version 2.2.0 (or later) if you want to use these settings.
Otherwise, you can use instead of these two commands, the following on the old API:
conf.setNumReduceTasks(1);
conf.setNumMapTasks(4); //but this is only a suggestion to hadoop

Related

Hadoop: Redcuer doesn't emit correct claculation

I have the following Reducer class (part of a MapReduce job) that's supposed to compute a score = POS /(-1*sum(NEGs)).
where POS is one positive number, and NEGs are 2 negative numbers. It's always this way.
For example, if the input from the mapper is:
<A, A> -15.0
<A, A> 2.0
<A, A> -15.0
The expected output would be:
<A, A> 0.06666666666666667
However, it's outputting infinity for every output record!
<A, A> Infinity
While debugging, if I added statement to emit values inside the while loop:
score.set(val);
context.write(key, score);
, it prints the results fine but repeats the division. So I get the following:
<A, A> -15.0
<A, A> 2.0
<A, A> -15.0
<A, A> 0.06666666666666667 # correct calculation (2/30)
<A, A> 0.0022222222222222222 # Not sure why it divids twice by 30 (2/30/30)!!
This is MyReducer class
private static class MyReducer extends
Reducer<Pair, DoubleWritable, Pair, DoubleWritable> {
private DoubleWritable score = new DoubleWritable();
int counter = 0;
#Override
public void reduce(Pair key, Iterable<DoubleWritable> values, Context context)
throws IOException, InterruptedException {
Iterator<DoubleWritable> iter = values.iterator();
double nor = 0.0;
double don = 0.0;
double val;
while (iter.hasNext()) {
val = iter.next().get();
if (val < 0)
don += val*-1;
else
nor = val;
//uncomment for debugging!
//score.set(val);
//context.write(key, score);
}
score.set(nor / don);
context.write(key, score);
}
Can anyone explain why it
emits infinity if I didn't emit anything inside the while loop
divides by the denominator twice?
Thanks!
Doubles acting funny in Java is far from rare, of course, but in this particular case, it's not the weird ways of doubles, as for how compatible they can be in Hadoop terms.
First and foremost, this type of reduce computation is critical to only be used at the Reduce stage of the job and not on the Combine stage (if any). In case you have set this reduce computation to be also implemented as a combiner, you could consider un-setting this setup. This is not so much of a rule of thumb, but there's been a lot of bugs in MapReduce jobs where one can't quite figure out why the reducers get weird data or have computations being executed twice in a row (just like you have pointed out).
However, the possible culprit of the issue is the fact that in order to have safe double-type divisions, you really need to use type casting to have a proper double-type result.
To showcase this, I used an example of input based on your input data and stored in an \input directory. Every unique key has one positive and two negative numbers as values (here the keys are set as String for the sake of simplicity), as shown below:
Α -15.0
Α 2.0
Α -15.0
Β -10.0
Β 9.0
Β -12.0
C -7.0
C 1.0
C -19.0
D -5.0
D 18.0
D -5.0
E -6.0
E 6.0
E -6.0
Then explicit type casting was used for the calculation of each score, as you can see from the code below:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import java.io.*;
import java.io.IOException;
import java.util.*;
import java.nio.charset.StandardCharsets;
public class ScoreComp
{
/* input: <Character, Number>
* output: <Character, Number>
*/
public static class Map extends Mapper<Object, Text, Text, DoubleWritable>
{
public void map(Object key, Text value, Context context) throws IOException, InterruptedException
{
String record = value.toString();
String[] parts = record.split(" "); // just split the lines into key and value
// create key-value pairs from each line
context.write(new Text(parts[0]), new DoubleWritable(Double.parseDouble(parts[1])));
}
}
/* input: <Character, Number>
* output: <Character, Score>
*/
public static class Reduce extends Reducer<Text, DoubleWritable, Text, DoubleWritable>
{
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException
{
double pos = 0.0;
double neg = 0.0;
// for every value of a unique key...
for(DoubleWritable value : values)
{
// retrieve the positive number and calculate the sum of the two negative numbers
if(value.get() < 0)
neg += value.get();
else
pos = value.get();
}
// calculate the score based on the values of each key (using explicit type casting)
double result = (double) pos / (-1 * neg);
// create key-value pairs for each key with its score
context.write(key, new DoubleWritable(result));
}
}
public static void main(String[] args) throws Exception
{
// set the paths of the input and output directories in the HDFS
Path input_dir = new Path("input");
Path output_dir = new Path("scores");
// in case the output directory already exists, delete it
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
if(fs.exists(output_dir))
fs.delete(output_dir, true);
// configure the MapReduce job
Job scorecomp_job = Job.getInstance(conf, "Score Computation");
scorecomp_job.setJarByClass(ScoreComp.class);
scorecomp_job.setMapperClass(Map.class);
scorecomp_job.setReducerClass(Reduce.class);
scorecomp_job.setMapOutputKeyClass(Text.class);
scorecomp_job.setMapOutputValueClass(DoubleWritable.class);
scorecomp_job.setOutputKeyClass(Text.class);
scorecomp_job.setOutputValueClass(DoubleWritable.class);
FileInputFormat.addInputPath(scorecomp_job, input_dir);
FileOutputFormat.setOutputPath(scorecomp_job, output_dir);
scorecomp_job.waitForCompletion(true);
}
}
And you can see the results from the MapReduce job in the /scores directory are making sense math-wise (screenshot taken through the HDFS browsing explorer):

How to normalize columns in csv with hadoop

I want to read a csv file and normalize the data. If I understand well how hadoop works, the mapper gets the data line by line.
I found this formula to normalize : Xnew = (X - Xmin)/(Xmax - Xmin)
So I need to know the minimum value of the column and the maximum in order to normalize.
How can I do that when in a mapper I have access to only one line at a time ?
The problem with finding the max and min value of a column it this type of application is the scope of the max/min variables where they can be accessed/modified in a parallel program where each instance is isolated from the other in terms of data. So what it needs to be done here is finding a way to have a global scope for the max/min variables in order to access and synchronize their own instances at the end of each map/reduce step.
The closest thing to this supported by Hadoop (at the time this answer was written) is the feature of counters, but they are designed in a way to only increment their values so you have to be creative to achieve your desired output.
The trick here is to actually have if-statements modifying the maximum and minimum counters to the column value of each line (in case they are the max and/or min), by
resetting the counter to zero by adding the negative value of itself and then
increment the counter to the value of this specific line from the input csv file
It's a bit tedious, but it does the job inside the Map function.
Now, for accessing the max and min values of the counters from the Reduce function, we can simply get them in a setup method before the execution of all reducer instances and use them for computing the new normalized values of each key-value pair.
So, let's say we have a grades.csv file stored in the grades directory in the HDFS, which the grades of the students at an elementary school class are stored like that:
Jack,3
Dennis,5
Kate,10
Nancy,9
Peter,1
Zack,2
Alex,4
Yvonne,10
Violet,1
Claire,2
We can find the max and min values at the Map stage while turning each line of the input file into key-value pairs, and compute the normalized grade for each student (using the max and min values of course) at the Reduce stage as seen below:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Cluster;
import java.io.*;
import java.io.IOException;
import java.util.*;
import java.nio.charset.StandardCharsets;
public class NormGrades
{
public static enum Global_Counters
{
MAX_GRADE,
MIN_GRADE
}
/* input: <byte_offset, line_of_tweet>
* output: <student, grade>
*/
public static class Map_Normalize extends Mapper<Object, Text, Text, IntWritable>
{
public void map(Object key, Text value, Context context) throws IOException, InterruptedException
{
String line = value.toString();
String[] columns = line.split(",");
int student_grade = Integer.parseInt(columns[1]);
int max_grade = Math.toIntExact(context.getCounter(Global_Counters.MAX_GRADE).getValue());
int min_grade = Math.toIntExact(context.getCounter(Global_Counters.MIN_GRADE).getValue());
// in order to find the maximum grade, we first set the max grade counter to 0
// by "increasing" it to the negative value of itself, and then increment by
// the new found maximum grade
if(student_grade > max_grade)
{
context.getCounter(Global_Counters.MAX_GRADE).increment(max_grade*(-1));
context.getCounter(Global_Counters.MAX_GRADE).increment(student_grade);
}
// in order to find the minimum grade, we first set the min grade counter to 0
// by "increasing" it to the negative value of itself, and then increment by
// the new found minimum grade
// the contents on this if statement will be accessed at least once in order to
// make sure that the min grade counter value is certainly higher than 0
if((student_grade < min_grade) || (min_grade == 0))
{
context.getCounter(Global_Counters.MIN_GRADE).increment(min_grade*(-1));
context.getCounter(Global_Counters.MIN_GRADE).increment(student_grade);
}
context.write(new Text(columns[0]), new IntWritable(student_grade));
}
}
/* input: <student, grade>
* output: <student, normalized_grade>
*/
public static class Reduce_Normalize extends Reducer<Text, IntWritable, Text, DoubleWritable>
{
public int max_grade, min_grade;
protected void setup(Context context) throws IOException, InterruptedException
{
Configuration conf = context.getConfiguration();
Cluster cluster = new Cluster(conf);
Job current_job = cluster.getJob(context.getJobID());
max_grade = Math.toIntExact(current_job.getCounters().findCounter(Global_Counters.MAX_GRADE).getValue());
min_grade = Math.toIntExact(current_job.getCounters().findCounter(Global_Counters.MIN_GRADE).getValue());
}
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
// each reducer instance is run for each student, so there is only one value/grade to access
int student_grade = values.iterator().next().get();
Double normalized_grade = (double) (student_grade - min_grade) / (max_grade - min_grade);
context.write(key, new DoubleWritable(normalized_grade));
}
}
public static void main(String[] args) throws Exception
{
Path input_dir = new Path("grades");
Path output_dir = new Path("normalized_grades");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
if(fs.exists(output_dir))
fs.delete(output_dir, true);
Job normalize_job = Job.getInstance(conf, "Normalize Grades");
normalize_job.setJarByClass(NormGrades.class);
normalize_job.setMapperClass(Map_Normalize.class);
normalize_job.setReducerClass(Reduce_Normalize.class);
normalize_job.setMapOutputKeyClass(Text.class);
normalize_job.setMapOutputValueClass(IntWritable.class);
normalize_job.setOutputKeyClass(Text.class);
normalize_job.setOutputValueClass(DoubleWritable.class);
TextInputFormat.addInputPath(normalize_job, input_dir);
TextOutputFormat.setOutputPath(normalize_job, output_dir);
normalize_job.waitForCompletion(true);
}
}
The results are being stored as seen through the HDFS Browser in the following screenshot:

Spring Cache a List of object with Condition getting IllegalArgumentException

I want to cache a List of Category when level == 0, but keeping getting IllegalArgumentException. What am I missing?
In Service class:
#Override
#Transactional(readOnly = true)
#Cacheable(value="categories", condition="#level == 0")
public List<Category> findCategoryByLevel(int level) throws DataAccessException {
return categoryRepository.findCategoryByLevel(level);
}
Error:
java.lang.IllegalArgumentException: Cannot find cache named 'categories' for CacheableOperation[public java.util.List com.mySite.service.DidicityServiceImpl.findCategoryByLevel(int) throws org.springframework.dao.DataAccessException] caches=[categories] | key='' | condition='#level == 0' | unless=''
What caching provider are you using in Spring's Cache Abstraction? (I.e. ehcache, Guava, Hazelcast, etc)
It appears you are missing an explicit "Cache" definition and instance in your actual caching provider. For example, when using Pivotal GemFire as a caching provider in Spring's Cache Abstraction, you need to define a Region (a.k.a. Cache in the Spring Cache Abstraction), using your example above, like so...
<gfe:cache ...>
<gfe:replicated-region id="categories" persistent="false"...>
...
</gfe:replicated-region>
Spring Data GemFire goes onto lookup the "Cache" when the cached application service|repository method is invoked, and so the actual backing "Cache" (i.e. the GemFire Region) must exist, otherwise the Spring Cache Abstraction throws an IllegalArgumentException.
So, by way of a more explicit example, I wrote the following test...
/*
* Copyright 2014-present the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.spring.cache;
import static org.hamcrest.CoreMatchers.*;
import static org.junit.Assert.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.spring.cache.CachingWithConcurrentMapUsingExplicitlyNamedCachesTest.ApplicationConfiguration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.cache.CacheManager;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.cache.annotation.EnableCaching;
import org.springframework.cache.concurrent.ConcurrentMapCacheManager;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.stereotype.Service;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
/**
* The CachingWithConcurrentMapUsingExplicitlyNamedCachesTest class is a test suite of test cases testing the contract
* and functionality of Spring Cache Abstraction using the ConcurrentMap-based Cache Management Strategy
* with explicitly named "Caches".
*
* NOTE: when the Cache(s) [is|are] explicitly named using the ConcurrentMapCacheManager, then "dynamic" is disabled
* and corresponding the named Cache in the #Cacheable annotation of the cached service method must exist
* (or be declared). If no explicitly named Caches are provided to the ConcurrentMapManager constructor, then dynamic
* is enabled and the Cache will be created at runtime, on the fly.
*
* #author John Blum
* #see org.junit.Test
* #see org.junit.runner.RunWith
* #see org.springframework.cache.Cache
* #see org.springframework.cache.CacheManager
* #see org.springframework.cache.annotation.Cacheable
* #see org.springframework.cache.annotation.EnableCaching
* #see org.springframework.cache.concurrent.ConcurrentMapCacheManager
* #see org.springframework.context.annotation.Bean
* #see org.springframework.context.annotation.Configuration
* #see org.springframework.test.context.ContextConfiguration
* #see org.springframework.test.context.junit4.SpringJUnit4ClassRunner
* #since 1.0.0
*/
#RunWith(SpringJUnit4ClassRunner.class)
#ContextConfiguration(classes = ApplicationConfiguration.class)
#SuppressWarnings("unused")
public class CachingWithConcurrentMapUsingExplicitlyNamedCachesTest {
#Autowired
private NumberCategoryService numberCategoryService;
#Test
public void numberCategoryCaching() {
assertThat(numberCategoryService.isCacheMiss(), is(false));
List<NumberCategory> twoCategories = numberCategoryService.classify(2.0);
assertThat(twoCategories, is(notNullValue()));
assertThat(twoCategories.size(), is(equalTo(3)));
assertThat(twoCategories.containsAll(Arrays.asList(
NumberCategory.EVEN, NumberCategory.POSITIVE, NumberCategory.WHOLE)), is(true));
assertThat(numberCategoryService.isCacheMiss(), is(true));
List<NumberCategory> twoCategoriesAgain = numberCategoryService.classify(2.0);
assertThat(twoCategoriesAgain, is(sameInstance(twoCategories)));
assertThat(numberCategoryService.isCacheMiss(), is(false));
List<NumberCategory> negativeThreePointFiveCategories = numberCategoryService.classify(-3.5);
assertThat(negativeThreePointFiveCategories, is(notNullValue()));
assertThat(negativeThreePointFiveCategories.size(), is(equalTo(3)));
assertThat(negativeThreePointFiveCategories.containsAll(Arrays.asList(
NumberCategory.ODD, NumberCategory.NEGATIVE, NumberCategory.FLOATING)), is(true));
assertThat(numberCategoryService.isCacheMiss(), is(true));
}
#Configuration
#EnableCaching
public static class ApplicationConfiguration {
#Bean
public CacheManager cacheManager() {
//return new ConcurrentMapCacheManager("Categories");
return new ConcurrentMapCacheManager("Temporary");
}
#Bean
public NumberCategoryService numberCategoryService() {
return new NumberCategoryService();
}
}
#Service
public static class NumberCategoryService {
private volatile boolean cacheMiss;
public boolean isCacheMiss() {
boolean localCacheMiss = this.cacheMiss;
this.cacheMiss = false;
return localCacheMiss;
}
protected void setCacheMiss() {
this.cacheMiss = true;
}
#Cacheable("Categories")
public List<NumberCategory> classify(double number) {
setCacheMiss();
List<NumberCategory> categories = new ArrayList<>(3);
categories.add(isEven(number) ? NumberCategory.EVEN : NumberCategory.ODD);
categories.add(isPositive(number) ? NumberCategory.POSITIVE : NumberCategory.NEGATIVE);
categories.add(isWhole(number) ? NumberCategory.WHOLE : NumberCategory.FLOATING);
return categories;
}
protected boolean isEven(double number) {
return (isWhole(number) && Math.abs(number) % 2 == 0);
}
protected boolean isFloating(double number) {
return !isWhole(number);
}
protected boolean isNegative(double number) {
return (number < 0);
}
protected boolean isOdd(double number) {
return !isEven(number);
}
protected boolean isPositive(double number) {
return (number > 0);
}
protected boolean isWhole(double number) {
return (number == Math.floor(number));
}
}
public enum NumberCategory {
EVEN,
FLOATING,
NEGATIVE,
ODD,
POSITIVE,
WHOLE
}
}
This test example is currently setup to throw the IllegalArgumentException. If you change this...
return new ConcurrentMapCacheManager("Temporary");
To this...
return new ConcurrentMapCacheManager("Categories");
Then all is well.
Hopefully this adequately illustrates the problem you are having and how to fix it.
Cheers,
John
Looks like you want to use the key as a static hardcoded string.
Try the following "'categories'"
#Override
#Transactional(readOnly = true)
#Cacheable(value="'categories'", condition="#level == 0")
public List<Category> findCategoryByLevel(int level) throws DataAccessException {
return categoryRepository.findCategoryByLevel(level);
}

Hadoop Not Finding Map Class

I am using hadoop-1.2.1 and trying to run a simple RowCount HBase job using ToolRunner. However, no matter what I seem to try, hadoop cannot find the map class. The jar file is being copied correctly into hdfs, but I can't seem to figure out where it is going wrong. Please help!
Here is the code:
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class HBaseRowCountToolRunnerTest extends Configured implements Tool
{
// What to copy.
public static final String JAR_NAME = "myJar.jar";
public static final String LOCAL_JAR = <path_to_jar> + JAR_NAME;
public static final String REMOTE_JAR = "/tmp/"+JAR_NAME;
public static void main(String[] args) throws Exception
{
Configuration config = HBaseConfiguration.create();
//All connection configs set here -- omitted to post the code
config.set("tmpjars", REMOTE_JAR);
FileSystem dfs = FileSystem.get(config);
System.out.println("pathString = " + (new Path(LOCAL_JAR)).toString() + " \n");
// Copy jar file to remote.
dfs.copyFromLocalFile(new Path(LOCAL_JAR), new Path(REMOTE_JAR));
// Get rid of jar file when we're done.
dfs.deleteOnExit(new Path(REMOTE_JAR));
// Run the job.
System.exit(ToolRunner.run(config, new HBaseRowCountToolRunnerTest(), args));
}
#Override
public int run(String[] args) throws Exception
{
Job job = new RowCountJob(getConf(), "testJob", "myLittleHBaseTable");
return job.waitForCompletion(true) ? 0 : 1;
}
public static class RowCountJob extends Job
{
RowCountJob(Configuration conf, String jobName, String tableName) throws IOException
{
super(conf, RowCountJob.class.getCanonicalName() + "_" + jobName);
setJarByClass(getClass());
Scan scan = new Scan();
scan.setCacheBlocks(false);
scan.setFilter(new FirstKeyOnlyFilter());
setOutputFormatClass(NullOutputFormat.class);
TableMapReduceUtil.initTableMapperJob(tableName, scan,
RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, this);
setNumReduceTasks(0);
}
}//end public static class RowCountJob extends Job
//Mapper that runs the count
//TableMapper -- TableMapper<KEYOUT, VALUEOUT> (*OUT by type)
public static class RowCounterMapper extends TableMapper<ImmutableBytesWritable, Result>
{
//Counter enumeration to count the actual rows
public static enum Counters {ROWS}
/**
* Maps the data.
*
* #param row The current table row key.
* #param values The columns.
* #param context The current context.
* #throws IOException When something is broken with the data.
* #see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN,
* org.apache.hadoop.mapreduce.Mapper.Context)
*/
#Override
public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException
{
// Count every row containing data times 2, whether it's in qualifiers or values
context.getCounter(Counters.ROWS).increment(2);
}
}//end public static class RowCounterMapper extends TableMapper<ImmutableBytesWritable, Result>
}//end public static void main(String[] args) throws Exception
Ok- I found a workaround to the problem and thought that I would share for all others having similar issues...
As is turns out, I abandoned the tmpjars configuration option and just copied the jar file directed into the DistributedCache from the code itself. Here is what it looks like:
// Copy jar file to remote.
FileSystem dfs = FileSystem.get(conf);
dfs.copyFromLocalFile(new Path(LOCAL_JAR), new Path(REMOTE_JAR));
// Get rid of jar file when we're done.
dfs.deleteOnExit(new Path(REMOTE_JAR));
//Place it in the distributed cache
DistributedCache.addFileToClassPath(new Path(REMOTE_JAR), conf, dfs);
Perhaps it doesn't solve what is going on with tmpjars, but it does work.
I got the same problem today.Finally, I found it was because I forgot to insert the following sentence in the driver class...
job.setJarByClass(HBaseTestDriver.class);

integrating Spring MVC 3 , AJAX and apache tiles

I am facing problems while integrating Spring MVC 3 , AJAX and apache tiles. Specially with AJAX.
Kindly suggest some links for this.
I am trying to load results on a tile with help of ajax call from another tile containing search criteria.
Thanks in advance.
you need to reconfigure this:
<bean id="tilesViewResolver" class="org.springframework.js.ajax.AjaxUrlBasedViewResolver">
<property name="viewClass" value="org.springframework.js.ajax.tiles3.AjaxTilesView"/>
</bean>
<bean class="org.springframework.web.servlet.view.tiles3.TilesConfigurer" id="tilesConfigurer">
<property name="definitions">
<list>
<value>/WEB-INF/layouts/layouts.xml</value>
<!-- Scan views directory for Tiles configurations -->
<value>/WEB-INF/views/**/views.xml</value>
</list>
</property>
</bean>
where AjaxUrlBasedViewResolver is in spring-js-2.3.1-RELEASE.jar and
AjaxTilesView is a custom implementation based on org.springframework.js.ajax.tiles2.AjaxTilesView and org.apache.tiles.web.util.TilesDispatchServlet.doGet() like that:
package org.springframework.js.ajax.tiles3;
/*
* Copyright 2004-2008 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import javax.el.ELContext;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.jsp.JspContext;
import javax.servlet.jsp.JspWriter;
import javax.servlet.jsp.el.ExpressionEvaluator;
import javax.servlet.jsp.el.VariableResolver;
import org.apache.tiles.Attribute;
import org.apache.tiles.AttributeContext;
import org.apache.tiles.Definition;
import org.apache.tiles.TilesContainer;
import org.apache.tiles.access.TilesAccess;
import org.apache.tiles.context.TilesRequestContextHolder;
import org.apache.tiles.request.ApplicationContext;
import org.apache.tiles.request.Request;
import org.apache.tiles.request.jsp.JspUtil;
import org.apache.tiles.request.servlet.ServletRequest;
import org.apache.tiles.request.servlet.ServletUtil;
import org.springframework.js.ajax.AjaxHandler;
import org.springframework.js.ajax.SpringJavascriptAjaxHandler;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;
import org.springframework.web.servlet.support.JstlUtils;
import org.springframework.web.servlet.support.RequestContext;
import org.springframework.web.servlet.view.tiles3.TilesView;
/**
* Tiles view implementation that is able to handle partial rendering for Spring
* Javascript Ajax requests.
*
* <p>
* This implementation uses the {#link SpringJavascriptAjaxHandler} by default
* to determine whether the current request is an Ajax request. On an Ajax
* request, a "fragments" parameter will be extracted from the request in order
* to determine which attributes to render from the current tiles view.
* </p>
*
* #author Jeremy Grelle
* #author David Winterfeldt
*/
public class AjaxTilesView extends TilesView {
private static final String FRAGMENTS_PARAM = "fragments";
private TilesRequestContextHolder tilesRequestContextFactory;
private AjaxHandler ajaxHandler = new SpringJavascriptAjaxHandler();
public void afterPropertiesSet() throws Exception {
super.afterPropertiesSet();
tilesRequestContextFactory = new TilesRequestContextHolder();
}
public AjaxHandler getAjaxHandler() {
return ajaxHandler;
}
public void setAjaxHandler(AjaxHandler ajaxHandler) {
this.ajaxHandler = ajaxHandler;
}
protected void renderMergedOutputModel(Map model, HttpServletRequest request, HttpServletResponse response)
throws Exception {
ServletContext servletContext = getServletContext();
if (ajaxHandler.isAjaxRequest(request, response)) {
String[] fragmentsToRender = getRenderFragments(model, request, response);
if (fragmentsToRender.length == 0) {
logger.warn("An Ajax request was detected, but no fragments were specified to be re-rendered. "
+ "Falling back to full page render. This can cause unpredictable results when processing "
+ "the ajax response on the client.");
super.renderMergedOutputModel(model, request, response);
return;
}
ApplicationContext tilesRequestContext = org.apache.tiles.request.servlet.ServletUtil
.getApplicationContext(getServletContext());
ServletRequest servletRequest = new ServletRequest(tilesRequestContext,
request, response);
TilesContainer container = TilesAccess.getContainer(tilesRequestContext);
if (container == null) {
throw new ServletException("Tiles container is not initialized. "
+ "Have you added a TilesConfigurer to your web application context?");
}
exposeModelAsRequestAttributes(model, request);
JstlUtils.exposeLocalizationContext(new RequestContext(request, servletContext));
Definition compositeDefinition = container.getDefinition(getUrl(), servletRequest);
Map flattenedAttributeMap = new HashMap();
flattenAttributeMap(container, tilesRequestContext, flattenedAttributeMap, compositeDefinition,
servletRequest);
addRuntimeAttributes(container, flattenedAttributeMap, servletRequest);
if (fragmentsToRender.length > 1) {
request.setAttribute(ServletRequest.FORCE_INCLUDE_ATTRIBUTE_NAME, true);
}
for (int i = 0; i < fragmentsToRender.length; i++) {
Attribute attributeToRender = (Attribute) flattenedAttributeMap.get(fragmentsToRender[i]);
if (attributeToRender == null) {
throw new ServletException("No tiles attribute with a name of '" + fragmentsToRender[i]
+ "' could be found for the current view: " + this);
} else {
// container.inheritCascadedAttributes(compositeDefinition);
container.render(attributeToRender, servletRequest);
container.endContext(servletRequest);
}
}
} else {
super.renderMergedOutputModel(model, request, response);
}
}
protected String[] getRenderFragments(Map model, HttpServletRequest request, HttpServletResponse response) {
String attrName = request.getParameter(FRAGMENTS_PARAM);
String[] renderFragments = StringUtils.commaDelimitedListToStringArray(attrName);
return StringUtils.trimArrayElements(renderFragments);
}
/**
* <p>
* Iterate over all attributes in the given Tiles definition. Every
* attribute value that represents a template (i.e. start with "/") or is a
* nested definition is added to a Map. The method class itself recursively
* to traverse nested definitions.
* </p>
*
* #param container
* the TilesContainer
* #param requestContext
* the TilesRequestContext
* #param resultMap
* the output Map where attributes of interest are added to.
* #param compositeDefinition
* the definition to search for attributes of interest.
* #param request
* the servlet request
* #param response
* the servlet response
*/
protected void flattenAttributeMap(TilesContainer container, ApplicationContext requestContext, Map resultMap,
Definition compositeDefinition, ServletRequest servletRequest) {
Set<String> cascadedAttributeNames = compositeDefinition.getCascadedAttributeNames();
Iterator iterator = null;
if (cascadedAttributeNames ==null){
iterator = compositeDefinition.getLocalAttributeNames().iterator();
}else{
iterator = cascadedAttributeNames.iterator();
}
while (iterator.hasNext()) {
String attributeName = (String) iterator.next();
Attribute attribute = compositeDefinition.getAttribute(attributeName);
if (attribute.getValue() == null || !(attribute.getValue() instanceof String)) {
continue;
}
String value = attribute.getValue().toString();
if (value.startsWith("/")) {
resultMap.put(attributeName, attribute);
} else if (container.isValidDefinition(value, servletRequest)) {
resultMap.put(attributeName, attribute);
Definition nestedDefinition = container.getDefinition(value, servletRequest);
Assert.isTrue(nestedDefinition != compositeDefinition, "Circular nested definition: " + value);
flattenAttributeMap(container, requestContext, resultMap, nestedDefinition, servletRequest);
}
}
}
/**
* <p>
* Iterate over dynamically added Tiles attributes (see
* "Runtime Composition" in the Tiles documentation) and add them to the
* output Map passed as input.
* </p>
*
* #param container
* the Tiles container
* #param resultMap
* the output Map where attributes of interest are added to.
* #param request
* the Servlet request
* #param response
* the Servlet response
*/
protected void addRuntimeAttributes(TilesContainer container, Map resultMap, ServletRequest servletRequest) {
AttributeContext attributeContext = container.getAttributeContext(servletRequest);
Set attributeNames = new HashSet();
if (attributeContext.getLocalAttributeNames() != null) {
attributeNames.addAll(attributeContext.getLocalAttributeNames());
}
if (attributeContext.getCascadedAttributeNames() != null) {
attributeNames.addAll(attributeContext.getCascadedAttributeNames());
}
Iterator iterator = attributeNames.iterator();
while (iterator.hasNext()) {
String name = (String) iterator.next();
Attribute attr = attributeContext.getAttribute(name);
resultMap.put(name, attr);
}
}
}
Hope this help you.
Last Spring WebFlow 2.4 includes a new FlowAjaxTiles3View that extends org.springframework.js.ajax.tiles3.AjaxTilesView and works with Tiles 3 yet. It permits to define render fragments in flow definition besides using "fragments" request param:
<bean id="tilesViewResolver" class="org.springframework.js.ajax.AjaxUrlBasedViewResolver">
<property name="viewClass" value="org.springframework.webflow.mvc.view.FlowAjaxTiles3View"/>
</bean>
Also you shouldn't forget to point your view factory to this viewResolver:
<webflow:flow-builder-services id="flowBuilderServices" view-factory-creator="mvcViewFactoryCreator" />
<!-- Configures Web Flow to use Tiles to create views for rendering; Tiles allows for applying consistent layouts to your views -->
<bean id="mvcViewFactoryCreator" class="org.springframework.webflow.mvc.builder.MvcViewFactoryCreator">
<property name="viewResolvers" ref="tilesViewResolver"/>
</bean>

Resources