Index data to elasticssearch cloud from Aws DynamoDB using java - spring-boot

I want to index the data from dynamo DB to elasticsearch cloud SAAS. My elasticsearch endpoint will be like
elasticsearch endpoint URL
https://d9bc7cbca5ec49ea96a6ea683f70caca.eastus2.azure.elastic-cloud.com:1234
When ever if the CRUD operation happened in AWS dynamo db immediately it should get reflected in the index of elasticsearch cloud.
May i know the best way to achieve this?

Hi All thanks for all your support. I have attached the sample code it does the magic, I am able to index the data from dynamo db to elasticsearch both inside AWS and outside but we have to set a proper permission to lambda function as suggested in above comments.
package com.Firstlambda;
import com.amazonaws.services.lambda.runtime.Context;
import com.amazonaws.services.lambda.runtime.RequestHandler;
import com.amazonaws.auth.AWS4Signer;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.services.dynamodbv2.document.Item;
import com.amazonaws.services.dynamodbv2.document.ItemUtils;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.lambda.runtime.events.DynamodbEvent;
import com.amazonaws.services.lambda.runtime.events.DynamodbEvent.DynamodbStreamRecord;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.elasticsearch.action.delete.DeleteRequest;
import org.elasticsearch.action.delete.DeleteResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.action.update.UpdateResponse;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.xcontent.XContentType;
import org.json.JSONObject;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class HelloWorld implements RequestHandler<DynamodbEvent, String> {
private static String serviceName = "es";
private static String region = "us-east-1";
private static String aesEndpoint = ""
private static String index = "";
private static String type = "_doc";
static final AWSCredentialsProvider credentialsProvider = new DefaultAWSCredentialsProviderChain();
public String handleRequest(DynamodbEvent ddbEvent, Context context) {
for (DynamodbStreamRecord record : ddbEvent.getRecords()) {
System.out.println("EventName : " + record.getEventName());
System.out.println("EventName : " + record.getDynamodb());
//AWS outside
RestHighLevelClient esClient = esClient();
//AWS outside
//AWS Inside
//RestHighLevelClient esClient = esClient(serviceName, region);
//AWS Inside
if (record.getEventName().toLowerCase().equals("insert")) {
String JsonString = getJsonstring(record.getDynamodb().getNewImage());
String JsonUniqueId = GetIdfromJsonString(JsonString);
IndexRequest indexRequest = new IndexRequest(index, type, JsonUniqueId);
indexRequest.source(JsonString, XContentType.JSON);
try {
IndexResponse indexResponse = esClient.index(indexRequest, RequestOptions.DEFAULT);
System.out.println(indexResponse.toString());
return "Successfully processed " + ddbEvent.getRecords().size() + " records.";
} catch (IOException e) {
System.out.println(e.getMessage());
}
} else if (record.getEventName().toLowerCase().equals("modify")) {
String JsonString = getJsonstring(record.getDynamodb().getNewImage());
String JsonUniqueId = GetIdfromJsonString(JsonString);
UpdateRequest request = new UpdateRequest(index, type, JsonUniqueId);
String jsonString = JsonString;
request.doc(jsonString, XContentType.JSON);
try {
UpdateResponse updateResponse = esClient.update(
request, RequestOptions.DEFAULT);
System.out.println(updateResponse.toString());
return "Successfully processed " + ddbEvent.getRecords().size() + " records.";
} catch (IOException e) {
System.out.println(e.getMessage());
}
} else {
System.out.println("remove");
System.out.println("KEYID : " + record.getDynamodb().getKeys().get("ID").getN());
String deletedId = record.getDynamodb().getKeys().get("ID").getN();
DeleteRequest request = new DeleteRequest(index, type, deletedId);
try {
DeleteResponse deleteResponse = esClient.delete(
request, RequestOptions.DEFAULT);
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
}
return "Successfullyprocessed";
}
public String getJsonstring(Map<String, AttributeValue> newIma) {
String json = null;
Map<String, AttributeValue> newImage = newIma;
List<Map<String, AttributeValue>> listOfMaps = new ArrayList<Map<String, AttributeValue>>();
listOfMaps.add(newImage);
List<Item> itemList = ItemUtils.toItemList(listOfMaps);
for (Item item : itemList) {
json = item.toJSON();
}
return json;
}
public String GetIdfromJsonString(String Json) {
JSONObject jsonObj = new JSONObject(Json);
return String.valueOf(jsonObj.getInt("ID"));
}
// Adds the interceptor to the ES REST client
// public static RestHighLevelClient esClient(String serviceName, String region) {
// AWS4Signer signer = new AWS4Signer();
// signer.setServiceName(serviceName);
// signer.setRegionName(region);
// HttpRequestInterceptor interceptor = new AWSRequestSigningApacheInterceptor(serviceName, signer, credentialsProvider);
// return new RestHighLevelClient(RestClient.builder(HttpHost.create(aesEndpoint)).setHttpClientConfigCallback(hacb -> hacb.addInterceptorLast(interceptor)));
// }
public static RestHighLevelClient esClient() {
String host = "d9bc7cbca5ec49ea9gfde6a6ea683f70caca.eastus2.azure.elastic-cloud.com";
int port = 9200;
String userName = "elastic";
String password = "L4Nfnle3wxsfgjgLmV95lPiUb46hp";
String protocol = "https";
final CredentialsProvider credentialsProvider = new BasicCredentialsProvider();
credentialsProvider.setCredentials(AuthScope.ANY,
new UsernamePasswordCredentials(userName, password));
RestClientBuilder builder = RestClient.builder(new HttpHost(host, port, protocol))
.setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider));
RestHighLevelClient client = new RestHighLevelClient(builder);
return client;
}
}

Related

How to handle vast amount of http get request in spring boot

I am trying to request 1676 http get request in spring boot.
One http get request takes about 6 second.
So if I request 1676 get request it will take about 3 hours.
How can I reduce http reqeust Time??
The code below is the code that is requesting one http get reqeust.
And I'm trying to request api by using the information that is in the List which have 1676 amount of info.
package com.wook.controller;
import java.time.Duration;
import org.mybatis.spring.annotation.MapperScan;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.CommandLineRunner;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.http.HttpStatus;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.web.util.DefaultUriBuilderFactory;
import com.wook.model.dto.ApiKey;
import com.wook.model.dto.Item;
import com.wook.model.dto.Items;
import com.wook.model.dto.SweatherRootRes;
import com.wook.model.dto.Temperature;
import com.wook.service.GeoService;
import reactor.core.publisher.Mono;
import reactor.netty.http.client.HttpClient;
#SpringBootApplication(scanBasePackages= {"com.wook.model","com.wook.weather","com.wook.service"})
#MapperScan("com.wook.model.dao")
public class Application implements CommandLineRunner{
private final static String BASE_URL = "http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0";
private final ApiKey APK;
private SweatherRootRes result;
private Temperature temp;
private GeoService gs; //To get GeoService
private Logger logger = LoggerFactory.getLogger(Application.class); //로그를 찍기 위해서 사용하는 Class
#Autowired
public Application(ApiKey apk, SweatherRootRes result,Temperature temp, GeoService gs) {
this.APK = apk;
this.result = result;
this.temp = temp;
this.gs = gs;
}
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
#Override
public void run( String... args ) throws Exception {
String serviceKey = APK.getApiKey();
String pageNo = "1";
String numOfRows = "290";
String dataType = "JSON";
String base_date = "20211110";
String base_time = "2300";
String nx = "55";
String ny = "127";
DefaultUriBuilderFactory factory = new DefaultUriBuilderFactory(BASE_URL); //UriBuilder를 생성하는 옵션을 설정하는 DefaultUriBuilderFactory 인스턴스 생성
factory.setEncodingMode(DefaultUriBuilderFactory.EncodingMode.VALUES_ONLY); //encoding 모드 설정
HttpClient client = HttpClient.create()
.responseTimeout(Duration.ofSeconds(60)); //http response timeout 설정을 60초로 설정함
WebClient wc = WebClient.builder()
.uriBuilderFactory(factory) //위에서 만든 uri 인코딩 설정으로 uribuilder 설정을 함
.baseUrl(BASE_URL) //baseURI 설정하고
.clientConnector(new ReactorClientHttpConnector(client)) //위에서 만든 타임아웃 설정을 적용시키고
.build(); //빌드한다.
// This part is the part that approach to a List<GeoInfo>
// for(GeoInfo gi : gs.getGeoXY()) {
// logger.info(gi.toString());
// }
Mono<SweatherRootRes> response = wc.get()
.uri(uriBuilder -> uriBuilder.path("/getVilageFcst")
.queryParam("serviceKey", serviceKey)
.queryParam("numOfRows", numOfRows)
.queryParam("pageNo", pageNo)
.queryParam("dataType", dataType)
.queryParam("base_date", base_date)
.queryParam("base_time", base_time)
.queryParam("nx", nx) //지역정보
.queryParam("ny", ny) //지역정보
.build()) //위 쿼리들로 uri 빌드를 하고
.retrieve() //http 요청하고
.onStatus(HttpStatus::is4xxClientError,
error -> Mono.error(new RuntimeException("API not found")))
.onStatus(HttpStatus::is5xxServerError,
error -> Mono.error(new RuntimeException("Server is not responding")))
.bodyToMono(SweatherRootRes.class);//Mono로 값을 받고
//비동기 방식으로 약간 콜백 메소드와 같은 역할을 하는것 같다.그래서 이부분은 api 연결이 성공했을때 들어오는 부분인것 같다.
response.subscribe(res -> {
result.setResponse(res.getResponse());
if(result.getResponse().getBody()!= null) {
logger.info("http request success");
getTemp(result.getResponse().getBody().getItems());
logger.info(temp.toString());
}else {
logger.error("http reqeust has failed");
}
});
}
public void getTemp(Items items) {
int count = 0;
for(Item item : items.getItem()) {
if(count >= 2 )
break;
if(item.getCategory().equals("TMX")) {
temp.setBestMax(item.getFcstValue());
temp.setNx(item.getNx());
temp.setNy(item.getNy());
temp.setDate(item.getBaseDate());
count++;
}
if(item.getCategory().equals("TMN")){
temp.setBestMin(item.getFcstValue());
count++;
}
}
}
}

Confluent Kafka Avro deserializer for spring boot kafka listener

Does somebody implemented confluent-kafka messages deserializer to consume kafka messages by spring "#KafkaListener"-s ?
Here is my answer, which I've implemented based on: "io.confluent.kafka.serializers.AbstractKafkaAvroDeserializer"
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Map;
import javax.xml.bind.DatatypeConverter;
import org.apache.avro.Schema;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificRecordBase;
import org.apache.kafka.common.errors.SerializationException;
import org.apache.kafka.common.serialization.Deserializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class AvroConfluentDeserializer<T extends SpecificRecordBase> implements Deserializer<T> {
private static final Logger LOG = LoggerFactory.getLogger(AvroConfluentDeserializer.class);
protected static final byte MAGIC_BYTE = 0x0;
protected static final int idSize = 4;
private final DecoderFactory decoderFactory = DecoderFactory.get();
protected final Class<T> targetType;
public AvroConfluentDeserializer(Class<T> targetType) {
this.targetType = targetType;
}
#Override
public void close() {
// No-op
}
#Override
public void configure(Map<String, ?> arg0, boolean arg1) {
// No-op
}
#Override
public T deserialize(String topic, byte[] data) {
try {
T result = null;
if (data != null) {
LOG.info("data='{}'", DatatypeConverter.printHexBinary(data));
result = (T) deserializePayload(data, targetType.newInstance().getSchema());
LOG.info("deserialized data='{}'", result);
}
return result;
} catch (Exception ex) {
throw new SerializationException(
"Can't deserialize data '" + Arrays.toString(data) + "' from topic '" + topic + "'", ex);
}
}
protected T deserializePayload(byte[] payload, Schema schema) throws SerializationException {
int id = -1;
try {
ByteBuffer buffer = getByteBuffer(payload);
id = buffer.getInt();
int length = buffer.limit() - 1 - idSize;
int start = buffer.position() + buffer.arrayOffset();
DatumReader<T> reader = new SpecificDatumReader<T>(schema);
return reader.read(null, decoderFactory.binaryDecoder(buffer.array(), start, length, null));
} catch (IOException | RuntimeException e) {
throw new SerializationException("Error deserializing Avro message for id " + id, e);
}
}
private ByteBuffer getByteBuffer(byte[] payload) {
ByteBuffer buffer = ByteBuffer.wrap(payload);
if (buffer.get() != MAGIC_BYTE) {
throw new SerializationException("Unknown magic byte!");
}
return buffer;
}
}

Twitter tweets not getting stored in Kafka Topic

I am using Hortonworks. I am trying to fetch tweets from Twitter and store them in Kafka Topic. I am doing this using Kafka API in Java. The tweets which are coming are displayed in eclipse console, but they are not stored in Kafka Topic.
Following is my java code :
In eclipse argument, I am passing Twitter access tokens, topic name and keyword (hashtag).
import java.util.Arrays;
import java.util.Properties;
import java.util.concurrent.LinkedBlockingQueue;
import twitter4j.*;
import twitter4j.conf.*;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.conf.ConfigurationBuilder;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
public class KafkaTwitterProducer {
public static void main(String[] args) throws Exception {
final LinkedBlockingQueue<Status> queue = new LinkedBlockingQueue<Status>(10);
if (args.length < 4) {
System.out.println(
"Usage: KafkaTwitterProducer <twitter-consumer-key> <twitter-consumer-secret> <twitter-access-token> <twitter-access-token-secret> <topic-name> <twitter-search-keywords>");
return;
}
String consumerKey = args[0].toString();
String consumerSecret = args[1].toString();
String accessToken = args[2].toString();
String accessTokenSecret = args[3].toString();
String topicName = args[4].toString();
String[] arguments = args.clone();
String[] keyWords = Arrays.copyOfRange(arguments, 5, arguments.length);
// Set twitter oAuth tokens in the configuration
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true).setOAuthConsumerKey(consumerKey).setOAuthConsumerSecret(consumerSecret)
.setOAuthAccessToken(accessToken).setOAuthAccessTokenSecret(accessTokenSecret);
// Create twitterstream using the configuration
TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
StatusListener listener = new StatusListener() {
public void onStatus(Status status) {
queue.offer(status);
}
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
//System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
}
public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
//System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
}
public void onScrubGeo(long userId, long upToStatusId) {
//System.out.println("Got scrub_geo event userId:" + userId + "upToStatusId:" + upToStatusId);
}
public void onStallWarning(StallWarning warning) {
//System.out.println("Got stall warning:" + warning + "\n");
}
public void onException(Exception ex) {
ex.printStackTrace();
}
};
twitterStream.addListener(listener);
// Filter keywords
FilterQuery query = new FilterQuery().track(keyWords);
twitterStream.filter(query);
// Thread.sleep(5000);
// Add Kafka producer config settings
Properties props = new Properties();
props.put("metadata.broker.list", "localhost:9092");
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("linger.ms", 1);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<String, String>(props);
int i = 0;
int j = 0;
// poll for new tweets in the queue. If new tweets are added, send them
// to the topic
while (true) {
Status ret = queue.poll();
if (ret == null) {
Thread.sleep(100);
// i++;
} else {
for (HashtagEntity hashtage : ret.getHashtagEntities()) {
System.out.println("Tweet:" + ret);
System.out.println("Hashtag: " + hashtage.getText());
// producer.send(new ProducerRecord<String, String>(
// topicName, Integer.toString(j++), hashtage.getText()));
producer.send(new ProducerRecord<String, String>(topicName, Integer.toString(j++), ret.getText()));
}
}
}
// producer.close();
// Thread.sleep(500);
// twitterStream.shutdown();
}
}

Java : Google Reverse Geocoding with jackson API

I want to get the address using the google Maps API. In my project we are using Jackson parser.
I want to know that, how can i get my expected result i.e. :-
"A10, Dhamidhar Rd, Yashkamal Society, Vasna, Ahmedabad, Gujarat 380007, India"
I want to ignore all other fields in the API. As there are too many objects in the json file.
only want to fetch :
" "formatted_address" : "A10, Dhamidhar Rd, Yashkamal Society, Vasna, Ahmedabad, Gujarat 380007, India","
http://maps.google.com/maps/api/geocode/json?latlng=23.0043673,72.5411868999996&sensor=false
Thank you
This is what I have tried
package com.example.api.batch;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.util.JSONPObject;
public class GeocodeAddressParser {
#SuppressWarnings("deprecation")
public void getLocationInfo( String lat, String lng) throws JsonProcessingException, IOException {
HttpGet httpGet = new HttpGet("http://maps.google.com/maps/api/geocode/json?latlng="+lat+","+lng+"&sensor=false");
#SuppressWarnings("resource")
HttpClient client = new DefaultHttpClient();
HttpResponse response;
StringBuilder stringBuilder = new StringBuilder();
try {
response = client.execute(httpGet);
HttpEntity entity = response.getEntity();
InputStream stream = entity.getContent();
int b;
while ((b = stream.read()) != -1) {
stringBuilder.append((char) b);
}
} catch (ClientProtocolException e) {
} catch (IOException e) {
}
ObjectMapper mapper = new ObjectMapper();
JsonNode array = mapper.readValue(stringBuilder.toString(), JsonNode.class);
JsonNode object = array.get(0);
String reportKey = object.get("results").textValue();
// logger.info("ExportController : generatesExportExcel : parameters: {}", reportKey);
// System.out.println("Map Keys:\n"+rawData.getStatus());
//
// List<GeocodeGetResult> locations = rawData.getResults();
//
//
// for(int i=0; i < locations.size(); i++){
// GeocodeGetResult object = locations.get(i);
// System.out.println(object);
// }
// for(int i=0; i < locations.size(); i++){
// SourceLocation object = locations.get(i);
// //System.out.println(object.getClass().getName()+" "+object);
// SourceLocation converted = convertSourceLocation(object);
// System.out.println(converted);
// toBeInserted.add(converted);
// }
// JSONPObject jsonObject = new JSONPObject();
// try {
// // ObjectNode node = mapper.createObjectNode();
// JsonNode actualObj = mapper.readTree(stringBuilder.toString());
// // jsonObject = new JSONObject();
// } catch (JsonParseException e) {
// e.printStackTrace();
// }
// return mapper;
}
public static void main(String[] args) throws JsonProcessingException, IOException{
GeocodeAddressParser ref = new GeocodeAddressParser();
ref.getLocationInfo("23.0043673","72.5411868999996");
// ObjectMapper location;
// String location_string;
// try {
// //Get JSON Array called "results" and then get the 0th complete object as JSON
// location = ret.getJSONArray("results").getJSONObject(0);
// // Get the value of the attribute whose name is "formatted_string"
// location_string = location.getString("formatted_address");
// Log.d("test", "formattted address:" + location_string);
// } catch (JSONException e1) {
// e1.printStackTrace();
//
// }
}
}
Change your getLocationInfo method and replace the following part.
ObjectMapper mapper = new ObjectMapper();
JsonNode array = mapper.readValue(stringBuilder.toString(), JsonNode.class);
JsonNode object = array.get("results").get(0);
String reportKey = object.get("formatted_address").textValue();
System.out.println(reportKey);
In your given code,
JsonNode object = array.get(0);
This will return null(object contains null), because the result from the api link will return a object not an array. So, there is no 0 element here.

Is this ClassCastException a HtmlUnit bug?

I'm new to htmlunit (2.23) and I can't get this test to work:
I'm getting this ClassCastException thrown out of HtmlUnit and I don't know if it is a bug, or if I am doing something wrong.
java.lang.ClassCastException: com.gargoylesoftware.htmlunit.TextPage cannot be cast to com.gargoylesoftware.htmlunit.html.HtmlPage
at com.gargoylesoftware.htmlunit.WebClient.makeWebResponseForJavaScriptUrl(WebClient.java:1241)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:375)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:304)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:451)
at com.gargoylesoftware.htmlunit.WebClient.getPage(WebClient.java:436)
at org.wyttenbach.dale.mlec.OutageTest.test(OutageTest.java:46)
...
The code
import java.awt.Desktop;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.junit.Assert;
import org.junit.Test;
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException;
import com.gargoylesoftware.htmlunit.JavaScriptPage;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.TextPage;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class OutageTest {
private static final String SITE_URL = "https://ebill.mlecmn.net/woViewer/";
private static final String OUTAGE_MAP_URL = SITE_URL + "mapviewer.html?config=Outage+Web+Map";
#Test
public void test() throws FailingHttpStatusCodeException, MalformedURLException, IOException {
try (final WebClient webClient = new WebClient()) {
webClient.waitForBackgroundJavaScript(20000);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getOptions().setUseInsecureSSL(true);
Map<String, Page> urls = new HashMap<String, Page>();
LinkedList<String> urlsToVisit = new LinkedList<String>();
urlsToVisit.add(OUTAGE_MAP_URL);
while (!urlsToVisit.isEmpty()) {
String url = urlsToVisit.remove();
if (urls.containsKey(url)) {
continue;
}
Page page = webClient.getPage(url);
urls.put(url, page);
if (page instanceof HtmlPage) {
HtmlPage page2 = (HtmlPage) page;
System.err.println("================================================================");
System.err.println(page2.asXml());
System.err.println("================================================================");
Assert.assertFalse("Outage in Nordland township: " + url, page2.asText().contains("Nordland"));
urlsToVisit.addAll(extractLinks(page2));
} else if (page instanceof JavaScriptPage) {
JavaScriptPage page2 = (JavaScriptPage) page;
Assert.assertFalse("Outage in Nordland township: " + url, page2.getContent().contains("Nordland"));
} else if (page instanceof TextPage) {
TextPage page2 = (TextPage) page;
Assert.assertFalse("Outage in Nordland township: " + url, page2.getContent().contains("Nordland"));
} else {
System.err.println(String.format("%s => %s", url, page.getClass().getName()));
}
}
} catch (AssertionError e) {
reportOutage();
throw e;
}
}
private Collection<String> extractLinks(HtmlPage page) {
List<String> links = new ArrayList<String>();
for (DomElement x : page.getElementsByTagName("script")) {
String src = x.getAttribute("src");
if (!src.contains(":")) {
src = SITE_URL + src;
System.err.println("script src="+src);
}
links.add(src);
}
for (DomElement x : page.getElementsByTagName("link")) {
String href = x.getAttribute("href");
if (!href.contains(":")) {
href = SITE_URL + href;
System.err.println("link href="+href);
}
links.add(href);
}
// Causes ClassCastException com.gargoylesoftware.htmlunit.TextPage cannot be cast to com.gargoylesoftware.htmlunit.html.HtmlPage
//at com.gargoylesoftware.htmlunit.WebClient.makeWebResponseForJavaScriptUrl(WebClient.java:1241)
for (DomElement x : page.getElementsByTagName("iframe")) {
String src = x.getAttribute("src");
if (!src.contains(":")) {
src = SITE_URL + src;
System.err.println("iframe src="+src);
}
links.add(src);
}
return links;
}
private void reportOutage() {
try {
Desktop.getDesktop().browse(new URI(OUTAGE_MAP_URL));
} catch (Exception e) {
e.printStackTrace();
}
}
}
More or less yes - but i have to do a more deeper analysis.
But there is some hope for you ;-)
Your code tries to extract urls from a given web page. During the process you are adding the url 'javascript:""' to your list of urls to be processes. This url results in this class cast exception. If you do not add this url to the list, the test is working (at least for me).

Resources