Beam Java pipeline with Python transform in GC Dataflow_v2

Beam Java pipeline with Python transform in GC Dataflow_v2 - maven

I'm trying to create a multi language pipeline in Google Dataflow_v2, creating a pipeline in Apache Beam Java SDK and using a transform in Python SDK.
My expansion service in Python runs correctly and when I run my Java pipeline it does reach my Python transform but then it throws me an error.
Failed to execute goal org.codehaus.mojo:exec-maven-plugin:3.0.0:java (default-cli) on project Job: An exception occured while executing the Java class. UNIMPLEMENTED: Method not found!
This is my expansion service with my transform in Python
import argparse, logging, signal, sys, grpc
import apache_beam as beam
from apache_beam.pipeline import PipelineOptions
from apache_beam.portability.api import beam_expansion_api_pb2_grpc
from apache_beam.runners.portability import expansion_service
from apache_beam.transforms import ptransform
from apache_beam.utils import thread_pool_executor
_LOGGER = logging.getLogger(__name__)
URN = "beam:transforms:xlang:pythontransform3"
class WriteToGS(beam.DoFn):
def process(self, element):
beam.io.WriteToText("gs://path/to/wordcount.txt")
#ptransform.PTransform.register_urn(URN, None)
class PythonTransform(ptransform.PTransform):
def __init__(self):
super().__init__()
def expand(self, pcoll):
_LOGGER.info('Python transform reached')
(pcoll | "Python transform" >> beam.ParDo(WriteToGS()))
def to_runner_api_parameter(self, unused_context):
return URN, None
def from_runner_api_parameter(
unused_ptransform, unused_paramter, unused_context):
return PythonTransform()
server = None
def cleanup(unused_signum, unused_frame):
_LOGGER.info('Shutting down expansion service.')
server.stop(None)
def main(unused_argv):
parser = argparse.ArgumentParser()
parser.add_argument(
'-p', '--port', type=int, help='port on which to serve the job api')
options = parser.parse_args()
global server
server = grpc.server(thread_pool_executor.shared_unbounded_instance())
beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
expansion_service.ExpansionServiceServicer(
PipelineOptions(["--experiments", "beam_fn_api", "--sdk_location", "container"])),
server)
server.add_insecure_port('localhost:{}'.format(options.port))
server.start()
_LOGGER.info('Listening for expansion requests at %d', options.port)
signal.signal(signal.SIGTERM, cleanup)
signal.signal(signal.SIGINT, cleanup)
# blocking main thread forever.
signal.pause()
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
main(sys.argv)
This is my pipeline in Java
import org.apache.beam.runners.core.construction.External;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.io.TextIO;
import org.apache.beam.sdk.metrics.Counter;
import org.apache.beam.sdk.metrics.Distribution;
import org.apache.beam.sdk.metrics.Metrics;
import org.apache.beam.sdk.options.Default;
import org.apache.beam.sdk.options.Description;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.transforms.DoFn;
import org.apache.beam.sdk.transforms.PTransform;
import org.apache.beam.sdk.transforms.ParDo;
import org.apache.beam.sdk.values.PCollection;
public class WordCount {
static class ExtractWordsFn extends DoFn<String, String> {
private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines");
private final Distribution lineLenDist =
Metrics.distribution(ExtractWordsFn.class, "lineLenDistro");
#ProcessElement
public void processElement(#Element String element, OutputReceiver<String> receiver) {
lineLenDist.update(element.length());
if (element.trim().isEmpty()) {
emptyLines.inc();
}
// Split the line into words.
String[] words = element.split("[^\\p{L}]+", -1);
// Output each word encountered into the output PCollection.
for (String word : words) {
if (!word.isEmpty()) {
receiver.output(word);
}
}
}
}
public static class CountWords
extends PTransform<PCollection<String>, PCollection<String>> {
#Override
public PCollection<String> expand(PCollection<String> lines) {
// Convert lines of text into individual words.
PCollection<String> words = lines.apply(ParDo.of(new ExtractWordsFn()));
return words;
}
}
public interface WordCountOptions extends PipelineOptions {
#Description("Path of the file to read from")
#Default.String("gs://apache-beam-samples/shakespeare/kinglear.txt")
String getInputFile();
void setInputFile(String value);
}
static void runWordCount(WordCountOptions options) {
Pipeline p = Pipeline.create(options);
p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
.apply(new CountWords())
.apply("ExternalPythonTransform",
External.of("beam:transforms:xlang:pythontransform3", new byte [] {}, "localhost:9098"));
p.run().waitUntilFinish();
}
public static void main(String[] args) {
WordCountOptions options =
PipelineOptionsFactory.fromArgs(args).withValidation().as(WordCountOptions.class);
runWordCount(options);
}
}

Related

BDD Jbehave stories while executing results in Pending

Recently I started working on BDD using JBehave.
So far if I run using maven, my maven project is getting successfully build. And then its coming into the story file but then its not proceeding further.
I tried by running with junit but I am getting the same result..
I think my problem is with executor file.
I searched in many sites and even Jbehave.org and many stackoverflow queries..But in vain
Help me to come out of this problem...Let me know if you need any additional information
I spent so much time rectifying this.But couldn't able to find the solution.
Here is my runner file..
package runnerFile;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.jbehave.core.configuration.Configuration;
import org.jbehave.core.configuration.MostUsefulConfiguration;
import org.jbehave.core.io.CodeLocations;
import org.jbehave.core.io.LoadFromClasspath;
import org.jbehave.core.io.StoryFinder;
import org.jbehave.core.junit.JUnitStories;
import org.jbehave.core.junit.JUnitStory;
import org.jbehave.core.reporters.Format;
import org.jbehave.core.reporters.StoryReporterBuilder;
import org.jbehave.core.steps.InjectableStepsFactory;
import org.jbehave.core.steps.InstanceStepsFactory;
import org.jbehave.core.steps.ScanningStepsFactory;
import org.jbehave.core.steps.Steps;
public class TestRunner extends JUnitStories{
#Override
public Configuration configuration() {
return new MostUsefulConfiguration()
.useStoryLoader(
new LoadFromClasspath(this.getClass().getClassLoader()))
.useStoryReporterBuilder(
new StoryReporterBuilder()
.withDefaultFormats()
.withFormats(Format.HTML, Format.CONSOLE)
.withRelativeDirectory("jbehave-report")
);
}
#Override
public InjectableStepsFactory stepsFactory() {
// ArrayList<Object> stepFileList = new ArrayList<Object>();
ArrayList<Steps> stepFileList = new ArrayList<Steps>();
stepFileList.add(new Steps(configuration()));
return new InstanceStepsFactory(configuration(), stepFileList);
//return new ScanningStepsFactory(configuration(), "org.jbehave.examples.core.steps", "my.other.steps"`enter code here` ).matchingNames(".*Steps").notMatchingNames(".*SkipSteps");
}
#Override
protected List<String> storyPaths() {
return new StoryFinder().
findPaths(CodeLocations.codeLocationFromClass(
this.getClass()),
Arrays.asList("**/TC_2.story"),
Arrays.asList(""));
}
}
I kept my story file inside src/test/resources . and step definition inside src/test/java
****story:****
**src/test/resources**
Narrative:
In order to communicate effectively to the business some functionality
As a development team
I want to use Behaviour-Driven Development
Scenario: A scenario is a collection of executable steps of different type
Given I launch the url
When I login with username <Username> and password <Password>
Then I should see the homepage
Examples:
|Username|Password|
|test#gmail.com|test1234|
**stepDefinition**
**src/test/java:**
package definition;
import org.jbehave.core.annotations.Given;
import org.jbehave.core.annotations.Named;
import org.jbehave.core.annotations.Then;
import org.jbehave.core.annotations.When;
import pages.Homepage_Pages;
public class HomePage {
Homepage_Pages home;
#Given("I launch the url")
public void url()
{
home.launchUrl();
}
#When("I login with username <Username> and password <Password>")
public void login(#Named("Username") String Username, #Named("Password") String Password)
{
System.out.println(Username);
}
#Then("I should see the homepage")
public void homePageVerification()
{
System.out.println("Heello");
}
}
Maven Console:

Try the following code, which is a stripped-down simple testrunner that does nothing fancy, but simply runs all stories found in sub-folders of the main folder, and includes all step classes in the define steps files location. My original had a lot of those things hard-coded but I changed them to final Strings so it should be easy enough to replace your situation and run with this file. Obviously, change "com.yourpackage.steps" with whatever package folder you place your steps files in. Hope this helps.
package testrunner;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.jbehave.core.configuration.Configuration;
import org.jbehave.core.configuration.MostUsefulConfiguration;
import org.jbehave.core.embedder.EmbedderControls;
import org.jbehave.core.io.CodeLocations;
import org.jbehave.core.io.StoryFinder;
import org.jbehave.core.junit.JUnitStories;
import org.jbehave.core.reporters.CrossReference;
import org.jbehave.core.reporters.Format;
import org.jbehave.core.reporters.StoryReporterBuilder;
import org.jbehave.core.steps.InjectableStepsFactory;
import org.jbehave.core.steps.InstanceStepsFactory;
import org.junit.runner.RunWith;
import de.codecentric.jbehave.junit.monitoring.JUnitReportingRunner;
#RunWith(JUnitReportingRunner.class)
public class TestRunner extends JUnitStories {
private Configuration configuration;
public TestRunner() {
super();
CrossReference crossReference = new CrossReference();
configuration = new MostUsefulConfiguration();
configuration.useStoryReporterBuilder(
new StoryReporterBuilder().withFormats(Format.HTML, Format.STATS, Format.CONSOLE)
.withCodeLocation(CodeLocations.codeLocationFromPath("target/."))
.withCrossReference(crossReference));
EmbedderControls embedderControls = configuredEmbedder().embedderControls();
embedderControls.doBatch(false);
embedderControls.doGenerateViewAfterStories(true);
embedderControls.doSkip(false);
embedderControls.doVerboseFailures(true);
embedderControls.doVerboseFiltering(true);
embedderControls.useThreads(1);
embedderControls.useStoryTimeouts("1800");
}
#Override
protected List<String> storyPaths()
{
return new StoryFinder().findPaths(CodeLocations.codeLocationFromClass(this.getClass()), "**/*.story", "");
}
#Override
public Configuration configuration() {
return configuration;
}
#Override
public InjectableStepsFactory stepsFactory() {
final String stepsPackage = "com.yourpackage.steps";
final String stepsLoc = "src/test/java/" + stepsPackage.replace(".", "/");
List<Object> stepList = new ArrayList<Object>();
File steps = new File(stepsLoc);
File[] fileList = steps.listFiles();
int size = fileList.length;
for (int i = 0; i < size; i++) {
if (fileList[i].isFile()) { // also returns folders (directories)
String value = fileList[i].getName().replace(".java", ""); // strip extensions
if (!value.toLowerCase().contains("testrunner")) { // ignore testrunner itself
try {
Object stepObject = Class.forName((stepsPackage + "." + value)).newInstance();
stepList.add(stepObject);
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}
}
}
return new InstanceStepsFactory(configuration(), stepList);
}
}

how to give flag option in stanford-nlp program?

The site suggest that i can use several flags
https://nlp.stanford.edu/software/openie.html
But how to use it, I tried doing it this way
import edu.stanford.nlp.ie.util.RelationTriple;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.naturalli.NaturalLogicAnnotations;
import edu.stanford.nlp.util.CoreMap;
import java.util.Collection;
import java.util.Properties;
/**
* A demo illustrating how to call the OpenIE system programmatically.
*/
public class OpenIEDemo {
public static void main(String[] args) throws Exception {
// Create the Stanford CoreNLP pipeline
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
props.setProperty("openieformat","ollie");
props.setProperty("openieresolve_coref","1");
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
// Annotate an example document.
Annotation doc = new Annotation("Obama was born in Hawaii. He is our president.");
pipeline.annotate(doc);
// Loop over sentences in the document
for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) {
// Get the OpenIE triples for the sentence
Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
// Print the triples
for (RelationTriple triple : triples) {
System.out.println(triple.confidence + "\t" +
triple.subjectLemmaGloss() + "\t" +
triple.relationLemmaGloss() + "\t" +
triple.objectLemmaGloss());
}
}
}
}
I have added
props.setProperty("openieformat","ollie");
props.setProperty("openieresolve_coref","1");
But its not working

For StanfordCoreNLP, flags/properties for individual annotators are set with an annotator.flag name. And boolean flags have value "false" or "true". So, what you have is close to right, but needs to be:
props.setProperty("openie.format","ollie");
props.setProperty("openie.resolve_coref","true");

How to get protobuf extension field in ProtobufAnnotationSerializer

I am a new to protocol-buffers and try to figure out how to extend a message type in the Stanford CoreNLP library as described here: https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.html
The problem: I can set the extension field but i can't get it. I boiled the problem down to the code below. In the original message the field name is [edu.stanford.nlp.pipeline.myNewField] but is replaced by the field number 101 in the deserialized message.
How can i get the value of myNewField?
PS: This post https://stackoverflow.com/questions/28815214/how-to-set-get-protobufs-extension-field-in-go suggests that it should be as easy as calling getExtension(MyAppProtos.myNewField)
custom.proto
syntax = "proto2";
package edu.stanford.nlp.pipeline;
option java_package = "com.example.my.awesome.nlp.app";
option java_outer_classname = "MyAppProtos";
import "CoreNLP.proto";
extend Sentence {
optional uint32 myNewField = 101;
}
ProtoTest.java
import com.example.my.awesome.nlp.app.MyAppProtos;
import com.google.protobuf.ExtensionRegistry;
import com.google.protobuf.InvalidProtocolBufferException;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence;
public class ProtoTest {
static {
ExtensionRegistry registry = ExtensionRegistry.newInstance();
registry.add(MyAppProtos.myNewField);
CoreNLPProtos.registerAllExtensions(registry);
}
public static void main(String[] args) throws InvalidProtocolBufferException {
Sentence originalSentence = Sentence.newBuilder()
.setText("Hello world!")
.setTokenOffsetBegin(0)
.setTokenOffsetEnd(12)
.setExtension(MyAppProtos.myNewField, 13)
.build();
System.out.println("Original:\n" + originalSentence);
byte[] serialized = originalSentence.toByteArray();
Sentence deserializedSentence = Sentence.parseFrom(serialized);
System.out.println("Deserialized:\n" + deserializedSentence);
Integer myNewField = deserializedSentence.getExtension(MyAppProtos.myNewField);
System.out.println("MyNewField: " + myNewField);
}
}
Output:
Original:
tokenOffsetBegin: 0
tokenOffsetEnd: 12
text: "Hello world!"
[edu.stanford.nlp.pipeline.myNewField]: 13
Deserialized:
tokenOffsetBegin: 0
tokenOffsetEnd: 12
text: "Hello world!"
101: 13
MyNewField: 0
Update
Because this question was about extending CoreNLP message types and using them with the ProtobufAnnotationSerializer, here is what my extended serializer looks like:
import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import com.example.my.awesome.nlp.app.MyAppProtos;
import com.google.protobuf.ExtensionRegistry;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.CoreNLPProtos;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence;
import edu.stanford.nlp.pipeline.CoreNLPProtos.Sentence.Builder;
import edu.stanford.nlp.pipeline.ProtobufAnnotationSerializer;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
public class MySerializer extends ProtobufAnnotationSerializer {
private static ExtensionRegistry registry;
static {
registry = ExtensionRegistry.newInstance();
registry.add(MyAppProtos.myNewField);
CoreNLPProtos.registerAllExtensions(registry);
}
#Override
protected Builder toProtoBuilder(CoreMap sentence, Set<Class<?>> keysToSerialize) {
keysToSerialize.remove(MyAnnotation.class);
Builder builder = super.toProtoBuilder(sentence, keysToSerialize);
builder.setExtension(MyAppProtos.myNewField, 13);
return builder;
}
#Override
public Pair<Annotation, InputStream> read(InputStream is)
throws IOException, ClassNotFoundException, ClassCastException {
CoreNLPProtos.Document doc = CoreNLPProtos.Document.parseDelimitedFrom(is, registry);
return Pair.makePair(fromProto(doc), is);
}
#Override
protected CoreMap fromProtoNoTokens(Sentence proto) {
CoreMap result = super.fromProtoNoTokens(proto);
result.set(MyAnnotation.class, proto.getExtension(MyAppProtos.myNewField));
return result;
}
}

The mistake was that i didn't provide the parseFrom call with the extension registry.
Changing Sentence deserializedSentence = Sentence.parseFrom(serialized); to Sentence deserializedSentence = Sentence.parseFrom(serialized, registry); did the job!

incompatible types: Object cannot be converted to CoreLabel

I'm trying to use the Stanford tokenizer with the following example from their website:
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.DocumentPreprocessor;
import edu.stanford.nlp.process.PTBTokenizer;
public class TokenizerDemo {
public static void main(String[] args) throws IOException {
for (String arg : args) {
// option #1: By sentence.
DocumentPreprocessor dp = new DocumentPreprocessor(arg);
for (List sentence : dp) {
System.out.println(sentence);
}
// option #2: By token
PTBTokenizer ptbt = new PTBTokenizer(new FileReader(arg),
new CoreLabelTokenFactory(), "");
for (CoreLabel label; ptbt.hasNext(); ) {
label = ptbt.next();
System.out.println(label);
}
}
}
}
and I get the following error when I try to compile it:
TokenizerDemo.java:24: error: incompatible types: Object cannot be converted to CoreLabel
label = ptbt.next();
Does anyone know what the reason might be? In case you are interested, I'm using Java 1.8 and made sure that CLASSPATH contains the jar file.

Try parameterizing the PTBTokenizer class. For example:
PTBTokenizer<CoreLabel> ptbt = new PTBTokenizer<>(new FileReader(arg),
new CoreLabelTokenFactory(), "");

Felix lists OSGI Bundle as Active but Gogo Shell Command Not accessible (dependency related)

This basic code succeeds at making the command scopeA:test accessible in the shell:
package com.A;
import org.apache.felix.ipojo.annotations.Component;
import org.apache.felix.ipojo.annotations.Instantiate;
import org.apache.felix.ipojo.annotations.Provides;
import org.apache.felix.ipojo.annotations.Requires;
import org.apache.felix.ipojo.annotations.ServiceProperty;
import org.apache.felix.service.command.Descriptor;
#Component(immediate = true)
#Instantiate
#Provides(specifications = Commands.class)
public final class Commands {
#ServiceProperty(name = "osgi.command.scope", value = "scopeA")
String scope;
#ServiceProperty(name = "osgi.command.function", value = "{}")
String[] function = new String[] {
"test"
};
#Descriptor("Example")
public void test() {
System.out.println("hello");
}
}
However, if I add a constructor that depends on another OSGI component, it the command is no longer accessible and "help" doesn't list it. Yet the bundle can still be loading into an active state.
package com.A;
import org.apache.felix.ipojo.annotations.Component;
import org.apache.felix.ipojo.annotations.Instantiate;
import org.apache.felix.ipojo.annotations.Provides;
import org.apache.felix.ipojo.annotations.Requires;
import org.apache.felix.ipojo.annotations.ServiceProperty;
import org.apache.felix.service.command.Descriptor;
import com.B;
#Component(immediate = true)
#Instantiate
#Provides(specifications = Commands.class)
public final class Commands {
public Commands(#Requires B b) {
}
#ServiceProperty(name = "osgi.command.scope", value = "scopeA")
String scope;
#ServiceProperty(name = "osgi.command.function", value = "{}")
String[] function = new String[] {
"test"
};
#Descriptor("Example")
public void test() {
System.out.println("hello");
}
}
The contents of B is simply:
import org.apache.felix.ipojo.annotations.Component;
import org.apache.felix.ipojo.annotations.Instantiate;
import org.apache.felix.ipojo.annotations.Provides;
#Component(immediate = true)
#Instantiate
#Provides
final class B {
}
Any ideas why the command is no longer listed? Tips to find more information on the state so that I can better debug this?

The problem is that commands needs the #Requires to be on a field rather than in the constructor.
#Requires
B b;
The constructor also must be removed.
This is because gogo has a special method of invoking the component.

also for me this needs to be changed
#ServiceProperty(name = "osgi.command.function", value = "{}")
String[] function = new String[] {
"test"
};
to
#ServiceProperty(name = "osgi.command.function", value = "{test}")
String[] function;

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Beam Java pipeline with Python transform in GC Dataflow_v2 - maven

Related

BDD Jbehave stories while executing results in Pending

how to give flag option in stanford-nlp program?

How to get protobuf extension field in ProtobufAnnotationSerializer

incompatible types: Object cannot be converted to CoreLabel

Felix lists OSGI Bundle as Active but Gogo Shell Command Not accessible (dependency related)

Categories

Resources