Apache spark with apache flume integration - spark-streaming

How Spark Streaming can be configured to receive input data from Flume in Java?. (I am stuck in the code)
Here is my code:
public static void main(String[] args)
{
Duration batchInterval = new Duration(2000);
System.out.println("-Starting Spark Context");
System.out.println("-Spark_home:" + System.getenv("SPARK_HOME"));
JavaStreamingContext sc = new JavaStreamingContext(master,
"FlumeEventCount", batchInterval,
System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");
System.out.println("-Setting up Flume Stream: " + host + " " + port);
JavaDStream<SparkFlumeEvent> flumeStream
=FlumeUtils.createStream(sc,host, port);
flumeStream.count().print();
flumeStream.count().map(new Function<Long, String>()
{
public String call(Long in) {
return "????????????? Received " + in + " flume events.";
}
}).print();
System.out.println("-Starting Spark Context");
sc.start();
System.out. println("-Finished");
}
}

Related

How to session manually in a java project that connects to a JMS MQ with xml as payload and gets the response from the remote jms endpoint

I have been searching for Java code to send an XML payload to the JMS endpiont and also receive the response in XML format only. Everyone is writing the producer and consumer but nobody is giving code for existing jms endpoint with payload.
Plug In- Tibco EMS
Connection factory – com.tibco.tibjms.TibjmsConnectonfactory
Server URL - tcp://10.xxx.xx.xx:69999
User name – scott
Password - tiger
XML payload:
<?xml version="1.0" encoding="UTF-8"?>
<MYServices>
<header>
<version>1.0</version>
</header>
<body>
<srv_req>
<req_due_amount>
<card_no>blablanumber</card_no>
</req_due_amount>
</srv_req>
</body>
</MYServices>
JMS endpoint
jms://session_name::queue_name::request_topic
import javax.jms.*;
import java.util.Enumeration;
public class JMSExample{
protected static final String SERVICE_QUEUE = "QUEUE_NAME_THAT_IS_CREDTED_IN_SERVER_FOR_ACCEPTING";
static String serverUrl = "tcp://10.xxx.xxx.xxx:xxxxx";
static String userName = "UR_UserID";
static String password = "UR_Pass";
public static void sendTopicMessage(String topicName, String messageStr) {
Connection connection = null;
Session session = null;
MessageProducer msgProducer = null;
Destination destination = null;
try {
TextMessage msg;
System.out.println("Publishing to destination '" + topicName
+ "'\n");
ConnectionFactory factory = new com.tibco.tibjms.TibjmsConnectionFactory(serverUrl);
connection = factory.createConnection(userName, password);
connection.start();
session = connection
.createSession(false,javax.jms.Session.AUTO_ACKNOWLEDGE);
TemporaryQueue tempQueue = session.createTemporaryQueue();
TextMessage message_t = session.createTextMessage(messageStr);
//This step is compulsory to get the reply from JMS server
message_t.setJMSReplyTo(tempQueue);
MessageProducer producer = session.createProducer(session.createQueue(SERVICE_QUEUE));
producer.send(message_t);
System.out.println("INFO:: The producer has sent the message"+message_t);
Destination dest = tempQueue;
MessageConsumer consumer = session.createConsumer(dest);
Message replyMsg = consumer.receive();
TextMessage tm = (TextMessage) replyMsg;
System.out.println("INFO The response is "+ replyMsg);
consumer.close();
producer.close();
session.close();
connection.close();
} catch (JMSException e) {
System.out.println("Error :: there was exception"+e);
e.printStackTrace();
}
}
/*-----------------------------------------------------------------------
* main
*----------------------------------------------------------------------*/
public static void main(String[] args) {
JMSExample5.sendTopicMessage(SERVICE_QUEUE,
"<?xml version = \"1.0\" encoding = \"UTF-8\"?>\n" +
"<MYServices>\n" +
" <header>\n" +
" <Version>1.0</Version>\n" +
" <SrvType>OML</SrvType>\n" +
" <SrvName>REQ_BALANCE_ENQUIRY</SrvName>\n" +
" <SrcApp>BNK</SrcApp>\n" +
" <OrgId>BLA</OrgId>\n" +
" </header>\n" +
" <body>\n" +
" <srv_req>\n" +
" <req_credit_card_balance_enquiry>\n" +
" <card_no>12345678</card_no>\n" +
" </req_credit_card_balance_enquiry>\n" +
" </srv_req>\n" +
" </body>\n" +
"</MYServices>\n");
}
}

java.io.IOException: Timed out waiting for Mini HDFS Cluster to start

I am getting timeout exception while trying to start the Hbase mini cluster. Further I want to write a hbase test case but currently its failing for hadoop 3.1.1 and hbase 2.0.2 combination.
1) Have tried with all the version of > = 3.1.1 and hbase >=2.0.0
2) Have taken code from https://github.com/apache/hbase/blob/rel/2.0.2/hbase-server/src/test/java/org/apache/hadoop/hbase/TestHBaseTestingUtility.java
and
https://github.com/apache/ranger/blob/master/hbase-agent/src/test/java/org/apache/ranger/authorization/hbase/HBaseRangerAuthorizationTest.java
import java.net.ServerSocket;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
public class HBaseRangerAuthorizationTest2 {
private static int port;
private static HBaseTestingUtility utility;
public static void main(String args[]) {
try {
port = getFreePort();
utility = new HBaseTestingUtility();
utility.getConfiguration().set("test.hbase.zookeeper.property.clientPort", "" + port);
utility.getConfiguration().set("hbase.master.port", "" + getFreePort());
utility.getConfiguration().set("hbase.master.info.port", "" + getFreePort());
utility.getConfiguration().set("hbase.regionserver.port", "" + getFreePort());
utility.getConfiguration().set("hbase.regionserver.info.port", "" + getFreePort());
utility.getConfiguration().set("zookeeper.znode.parent", "/hbase-unsecure");
utility.startMiniCluster();
/*
utility= new HBaseTestingUtility();
// Set a different zk path for each cluster
utility.getConfiguration().set(HConstants.ZOOKEEPER_ZNODE_PARENT, "/1");
utility.startMiniZKCluster();
utility.startMiniCluster();*/
}catch(Exception e) {
e.printStackTrace();
}
}
public static int getFreePort() throws IOException {
ServerSocket serverSocket = new ServerSocket(0);
int port = serverSocket.getLocalPort();
serverSocket.close();
return port;
}
}```
I expect the mini server should start without fail.
I am using hadoop version 2.7.3 and hbase version 1.1.2
For timeout exception, add the hadoop-client dependency to your gradle file:
compile 'org.apache.hadoop:hadoop-client:2.7.3'
Further check if you have added the dependency:
compile 'org.apache.hbase:hbase-testing-util:1.1.2'

AWS Elasticache Jedis using credentials

I need to connect to a redis instance in my Elasticache. As I understand from Amazon Elasticache Redis cluster - Can't get Endpoint, I can get the endpoint from this.
Now suppose I get the endpoint and I use this endpoint to create a JedisClient(Since I use java) then How do I provide the AWS IAM credentials?
I am going to secure ElastiCache using IAM policies. How do I ensure no other application connects to this redis?
static AWSCredentials credentials = null;
static {
try {
//credentials = new ProfileCredentialsProvider("default").getCredentials();
credentials = new SystemPropertiesCredentialsProvider().getCredentials();
} catch (Exception e) {
System.out.println("Got exception..........");
throw new AmazonClientException("Cannot load the credentials from the credential profiles file. "
+ "Please make sure that your credentials file is at the correct "
+ "location (/Users/USERNAME/.aws/credentials), and is in valid format.", e);
}
}
#Bean
public LettuceConnectionFactory redisConnectionFactory() {
AmazonElastiCache elasticacheClient = AmazonElastiCacheClientBuilder.standard().withCredentials(new AWSStaticCredentialsProvider(credentials)).withRegion(Regions.US_EAST_1).build();
DescribeCacheClustersRequest dccRequest = new DescribeCacheClustersRequest();
dccRequest.setShowCacheNodeInfo(true);
DescribeCacheClustersResult clusterResult = elasticacheClient.describeCacheClusters(dccRequest);
List<CacheCluster> cacheClusters = clusterResult.getCacheClusters();
List<String> clusterNodes = new ArrayList <String> ();
try {
for (CacheCluster cacheCluster : cacheClusters) {
for (CacheNode cacheNode : cacheCluster.getCacheNodes()) {
String addr = cacheNode.getEndpoint().getAddress();
int port = cacheNode.getEndpoint().getPort();
String url = addr + ":" + port;
if(<ReplicationGroup Name>.equalsIgnoreCase(cacheCluster.getReplicationGroupId()))
clusterNodes.add(url);
}
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
LettuceConnectionFactory redisConnectionFactory = new LettuceConnectionFactory(new RedisClusterConfiguration(clusterNodes));
redisConnectionFactory.setUseSsl(true);
redisConnectionFactory.afterPropertiesSet();
return redisConnectionFactory;
}

How to transfer *.pgp files using SFTP spring Integration

We are developing generic automated application which will download *.pgp file from SFTP server.
The application working fine with *.txt files. But when we are trying to pull *.pgp files we are getting the below exception.
2016-03-18 17:45:45 INFO jsch:52 - SSH_MSG_SERVICE_REQUEST sent
2016-03-18 17:45:46 INFO jsch:52 - SSH_MSG_SERVICE_ACCEPT received
2016-03-18 17:45:46 INFO jsch:52 - Next authentication method: publickey
2016-03-18 17:45:48 INFO jsch:52 - Authentication succeeded (publickey).
sftpSession org.springframework.integration.sftp.session.SftpSession#37831f
files size158
java.io.IOException: inputstream is closed
at com.jcraft.jsch.ChannelSftp.fill(ChannelSftp.java:2884)
at com.jcraft.jsch.ChannelSftp.header(ChannelSftp.java:2908)
at com.jcraft.jsch.ChannelSftp.access$500(ChannelSftp.java:36)
at com.jcraft.jsch.ChannelSftp$2.read(ChannelSftp.java:1390)
at com.jcraft.jsch.ChannelSftp$2.read(ChannelSftp.java:1340)
at org.springframework.util.StreamUtils.copy(StreamUtils.java:126)
at org.springframework.util.FileCopyUtils.copy(FileCopyUtils.java:109)
at org.springframework.integration.sftp.session.SftpSession.read(SftpSession.java:129)
at com.sftp.test.SFTPTest.main(SFTPTest.java:49)
java code :
public class SFTPTest {
public static void main(String[] args) {
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("beans.xml");
DefaultSftpSessionFactory defaultSftpSessionFactory = applicationContext.getBean("defaultSftpSessionFactory", DefaultSftpSessionFactory.class);
System.out.println(defaultSftpSessionFactory);
SftpSession sftpSession = defaultSftpSessionFactory.getSession();
System.out.println("sftpSessikon "+sftpSession);
String remoteDirectory = "/";
String localDirectory = "C:/312421/temp/";
OutputStream outputStream = null;
List<String> fileAtSFTPList = new ArrayList<String>();
try {
String[] fileNames = sftpSession.listNames(remoteDirectory);
for (String fileName : fileNames) {
boolean isMatch = fileCheckingAtSFTPWithPattern(fileName);
if(isMatch){
fileAtSFTPList.add(fileName);
}
}
System.out.println("files size" + fileAtSFTPList.size());
for (String fileName : fileAtSFTPList) {
File file = new File(localDirectory + fileName);
/*InputStream ipstream= sftpSession.readRaw(fileName);
FileUtils.writeByteArrayToFile(file, IOUtils.toByteArray(ipstream));
ipstream.close();*/
outputStream = new FileOutputStream(file);
sftpSession.read(remoteDirectory + fileName, outputStream);
outputStream.close();
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}finally {
try {
if (outputStream != null)
outputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static boolean fileCheckingAtSFTPWithPattern(String fileName){
Pattern pattern = Pattern.compile(".*\\.pgp$");
Matcher matcher = pattern.matcher(fileName);
if(matcher.find()){
return true;
}
return false;
}
}
Please suggest how to sort out this issue.
Thanks
The file type is irrelevant to Spring Integration - it looks like the server is closing the connection while reading the preamble - before the data is being fetched...
at com.jcraft.jsch.ChannelSftp.header(ChannelSftp.java:2908)
at com.jcraft.jsch.ChannelSftp.access$500(ChannelSftp.java:36)
at com.jcraft.jsch.ChannelSftp$2.read(ChannelSftp.java:1390)
at com.jcraft.jsch.ChannelSftp$2.read(ChannelSftp.java:1340)
The data itself is not read until later (line 1442 in ChannelSftp).
So it looks like a server-side problem.

Storm-jms Spout collecting Avro messages and sending down stream?

I am new to Avro Format. I am trying to collect Avro messages from a JMS Queue using Storm-Jms spout and send them to hdfs using hdfs bolt.
Queue is sending avro but i am not able to get them in avro format using the HDFS BOLT.
How to properly collect the avro message and send them downstream without encoding errors in hdfs.
The existing HDFS Bolt does not support Writing avro Files we need to overcome this by making the following changes. In this sample Code i am using the getting JMS Messages from my spout and the converting those JMS bytes message to AVRO and emmiting them to HDFS.
This code can serve as a sample for modifying the methods in AbstractHdfsBolt.
public void execute(Tuple tuple) {
try {
long length = bytesMessage.getBodyLength();
byte[] bytes = new byte[(int)length];
///////////////////////////////////////
bytesMessage.readBytes(bytes);
String replyMessage = new String(bytes, "UTF-8");
datumReader = new SpecificDatumReader<IndexedRecord>(schema);
decoder = DecoderFactory.get().binaryDecoder(bytes, null);
result = datumReader.read(null, decoder);
synchronized (this.writeLock) {
dataFileWriter.append(result);
dataFileWriter.sync();
this.offset += bytes.length;
if (this.syncPolicy.mark(tuple, this.offset)) {
if (this.out instanceof HdfsDataOutputStream) {
((HdfsDataOutputStream) this.out).hsync(EnumSet.of(SyncFlag.UPDATE_LENGTH));
} else {
this.out.hsync();
this.out.flush();
}
this.syncPolicy.reset();
}
dataFileWriter.flush();
}
if(this.rotationPolicy.mark(tuple, this.offset)){
rotateOutputFile(); // synchronized
this.offset = 0;
this.rotationPolicy.reset();
}
} catch (IOException | JMSException e) {
LOG.warn("write/sync failed.", e);
this.collector.fail(tuple);
}
}
#Override
void closeOutputFile() throws IOException {
this.out.close();
}
#Override
Path createOutputFile() throws IOException {
Path path = new Path(this.fileNameFormat.getPath(), this.fileNameFormat.getName(this.rotation, System.currentTimeMillis()));
this.out = this.fs.create(path);
dataFileWriter.create(schema, out);
return path;
}
#Override
void doPrepare(Map conf, TopologyContext topologyContext,OutputCollector collector) throws IOException {
// TODO Auto-generated method stub
LOG.info("Preparing HDFS Bolt...");
try {
schema = new Schema.Parser().parse(new File("/home/*******/********SchemafileName.avsc"));
} catch (IOException e1) {
e1.printStackTrace();
}
this.fs = FileSystem.get(URI.create(this.fsUrl), hdfsConfig);
datumWriter = new SpecificDatumWriter<IndexedRecord>(schema);
dataFileWriter = new DataFileWriter<IndexedRecord>(datumWriter);
JMSAvroUtils JASV = new JMSAvroUtils();
}

Resources