Apache Flink: SpiltStream vs side outputs

Apache Flink: SpiltStream vs side outputs - apache-flink

From this question, I understand that SplitStream in Apache Flink is now deprecated and it's recommended to use side-outputs instead.
Can someone give an example of how side-output can replace the splitStream?
e.g. How can the code snippet below be modified to use side-output?
DataStream mainDataStream = some definition
SplitStream<some-type> splitStream = mainDataStream.select("some-string")

Rather than
SplitStream<Integer> split = someDataStream.split(new OutputSelector<Integer>() {
#Override
public Iterable<String> select(Integer value) {
List<String> output = new ArrayList<String>();
if (value % 2 == 0) {
output.add("even");
}
else {
output.add("odd");
}
return output;
}
});
DataStream<Integer> evens = split.select("even");
DataStream<Integer> odds = split.select("odd");
you can now do
final OutputTag<Integer> evenTag = new OutputTag<String>("even"){};
final OutputTag<Integer> oddTag = new OutputTag<String>("odd"){};
SingleOutputStreamOperator<Integer> mainDataStream = someDataStream
.process(new ProcessFunction<Integer, Integer>() {
#Override
public void processElement(
Integer value,
Context ctx,
Collector<Integer> out) throws Exception {
if (value % 2 == 0) {
ctx.output(evenTag, value);
} else {
ctx.output(oddTag, value);
}
});
DataStream<Integer> evens = mainDataStream.getSideOutput(evenTag);
DataStream<Integer> odds = mainDataStream.getSideOutput(oddTag);
Note that, unlike split streams, the side output streams can be of different types.

Related

Flink CEP not working with inEventTime() but works with inProcessingTime() when appied on a pattern

I am working on following program and have set WatermarkStrategy however when I run the program using inEventTime() method on pattern it does not give any output.
Note : the same program works when I use inProcessingTime() on pattern.
public class FlinkCEPTest {
#SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
ParameterTool parameter = ParameterTool.fromArgs(args);
final String bootstrapServers = parameter.get("kafka.broker", "localhost:9092,broker:29092");
final String inputTopic_1 = parameter.get("input.topic.1","acctopic");
final String inputTopic_2 = parameter.get("input.topic.2","txntopic");
final String outputTopic = parameter.get("output.topic.q","alerttopic");
final String groupID = parameter.get("group.id","flink-demo-grp-id");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
KafkaSource<EventMessage> source_1 = KafkaSource.<EventMessage>builder()
.setBootstrapServers(bootstrapServers)
.setTopics(inputTopic_1).setGroupId(groupID)
.setStartingOffsets(OffsetsInitializer.latest())
.setDeserializer(new EventSchema())
.build();
DataStream<EventMessage> text_1 = env.fromSource(source_1,
WatermarkStrategy
.<EventMessage>forBoundedOutOfOrderness(Duration.ofSeconds(300))
.withTimestampAssigner((event, trtimestamp)-> {
//System.err.println("Kafka ingetstion ts : " + trtimestamp);
//System.err.println("Event ts : "+ event.getTxnDate().getTime());
return event.getTxnDate().getTime();})
, "Kafka Source 1");
DataStream<EventMessage> partitionedInput = text_1.keyBy(evt -> evt.getAccountId());
//partitionedInput.print();
Pattern<EventMessage, ?> relaxedAlarmPattern = Pattern.<EventMessage>begin("first").subtype(EventMessage.class)
.where(new SimpleCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value) throws Exception {
return value.getEvent().equalsIgnoreCase("PASSWORD_CHANGE_SUCC");
}
}).followedBy("second").subtype(EventMessage.class).where(new IterativeCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value, Context<EventMessage> ctx) throws Exception {
Iterable<EventMessage> test = ctx.getEventsForPattern("first");
Integer accid = 0;
for (EventMessage te : test) {
accid = te.getAccountId();
}
return value.getEvent().equalsIgnoreCase("BENIFICIARY_ADDED")
&& value.getAccountId().equals(accid);
}
}).followedBy("third").subtype(EventMessage.class).where(new IterativeCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value, Context<EventMessage> ctx) throws Exception {
Integer accid = 0;
Iterable<EventMessage> test = ctx.getEventsForPattern("first");
for (EventMessage te : test) {
accid = te.getAccountId();
}
return value.getEvent().equalsIgnoreCase("TXN_NEW")
&& value.getAccountId().equals(accid) && value.getAmt() <= 10;
}
}).followedBy("last").subtype(EventMessage.class).where(new IterativeCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value, Context<EventMessage> ctx) throws Exception {
Integer accid = 0;
Iterable<EventMessage> test = ctx.getEventsForPattern("first");
for (EventMessage te : test) {
accid = te.getAccountId();
}
return value.getEvent().equalsIgnoreCase("TXN_NEW")
&& value.getAccountId().equals(accid) && value.getAmt() >= 100 ;
}
}).within(Time.seconds(300));
PatternStream<EventMessage> patternStream = CEP.pattern(partitionedInput, relaxedAlarmPattern)
.inEventTime();
//.inProcessingTime();
DataStream<String> alarms = patternStream.select(new PatternSelectFunction<EventMessage, String>() {
private static final long serialVersionUID = 1L;
#Override
public String select(Map<String, List<EventMessage>> pattern) throws Exception {
EventMessage first = (EventMessage) pattern.get("first").get(0);
EventMessage middle = (EventMessage) pattern.get("second").get(0);
EventMessage third = (EventMessage) pattern.get("third").get(0);
EventMessage last = (EventMessage) pattern.get("last").get(0);
return "WARNING : Possible fraud scenario [ Party ID " + first.getPartyId()
+ " recently changed his password and added a beneficiary and later made transcations of "
+ third.getAmt() + " and " + last.getAmt()+" ]";
}
});
alarms.print();
env.execute(" CEP ");
}
}
If I change the following line
PatternStream<EventMessage> patternStream = CEP.pattern(partitionedInput, relaxedAlarmPattern).inEventTime();
To
PatternStream<EventMessage> patternStream = CEP.pattern(partitionedInput, relaxedAlarmPattern).inProcessingTime();
The code works,any suggestions how can I make it work with inEventTime() method.

Usually with Kafka sources the issue is that the parallelism is higher than the number of partitions or not all partitions receive data which doesn't let the watermarks advance forward. You can solve this by adjusting the parallelism or use withIdleness with your watermark strategy.
See more info in the Kafka connector docs.

Flink AggregateFunction find sum by multiple keys( validation process and testing)

I am using Apache flink on Kinesis Data Analytics.
Flink Version : 1.13.2
Jave : 1.11
I am consuming json messages from kafka. Sample Input records look as below
null {"plateNumber":"506b9910-74a7-4c3e-a885-b5e9717efe3a","vignetteStickerId":"9e69df3f-d728-4fc8-9b09-42104588f772","currentTimestamp":"2022/04/07 16:19:55","timestamp":1649362795.444459000,"vehicleType":"TRUCK","vehicleModelType":"TOYOTA"}
null {"plateNumber":"5ffe0326-571e-4b97-8f7b-4f49aebb6993","vignetteStickerId":"6c2e1342-b096-4cc9-a92c-df61571c2c7d","currentTimestamp":"2022/04/07 16:20:00","timestamp":1649362800.638060000,"vehicleType":"CAR","vehicleModelType":"HONDA"}
null {"plateNumber":"d15f49f9-5550-4780-b260-83f3116ba64a","vignetteStickerId":"1366fbfe-7d0a-475f-9249-261ef1dd6de2","currentTimestamp":"2022/04/07 16:20:05","timestamp":1649362805.643749000,"vehicleType":"TRUCK","vehicleModelType":"TOYOTA"}
null {"plateNumber":"803508fb-9701-438e-9028-01bb8d96a804","vignetteStickerId":"b534369f-533e-4c15-ac3f-fc28cf0f3aba","currentTimestamp":"2022/04/07 16:20:10","timestamp":1649362810.648813000,"vehicleType":"CAR","vehicleModelType":"FORD"}
I want to aggregate sum these records into 20 seconds window using vehicleType (CAR OR TRUCK) and vehicleModelType (TOYOTA,HONDA or FORD) . SQL Analogy (sum() ,Group by vehicleType, vehicleModelType)
I am using Aggregate function to achieve this.
import static java.util.Objects.isNull;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.springframework.stereotype.Component;
import com.helecloud.streams.demo.model.Vehicle;
import com.helecloud.streams.demo.model.VehicleStatistics;
#Component
public class VehicleStatisticsAggregator implements AggregateFunction<Vehicle, VehicleStatistics, VehicleStatistics> {
/**
*
*/
private static final long serialVersionUID = 1L;
#Override
public VehicleStatistics createAccumulator() {
System.out.println("Creating Accumulator!!");
return new VehicleStatistics();
}
#Override
public VehicleStatistics add(Vehicle vehicle, VehicleStatistics vehicleStatistics) {
System.out.println("vehicle in add method : " + vehicle);
if (isNull(vehicleStatistics.getVehicleType())) {
vehicleStatistics.setVehicleType(vehicle.getVehicleType());
}
if (isNull(vehicleStatistics.getVehicleModelType())) {
vehicleStatistics.setVehicleModelType(vehicle.getVehicleModelType());
}
// if(isNull(vehicleStatistics.getStart())) {
//
// vehicleStatistics.setStart(vehicle.getTimestamp());
// }
// if(isNull(vehicleStatistics.getCurrentTimestamp())) {
//
// vehicleStatistics.setCurrentTimestamp(vehicle.getCurrentTimestamp());
// }
if (isNull(vehicleStatistics.getCount())) {
vehicleStatistics.setCount(1);
} else {
System.out.println("incrementing count for : vehicleStatistics : " + vehicleStatistics);
vehicleStatistics.setCount(vehicleStatistics.getCount() + 1);
}
vehicleStatistics.setEnd(vehicle.getTimestamp());
System.out.println("vehicleStatistics in add : " + vehicleStatistics);
return vehicleStatistics;
}
#Override
public VehicleStatistics getResult(VehicleStatistics vehicleStatistics) {
System.out.println("vehicleStatistics in getResult : " + vehicleStatistics);
return vehicleStatistics;
}
#Override
public VehicleStatistics merge(VehicleStatistics vehicleStatistics, VehicleStatistics accumulator) {
System.out.println("Coming to merge!!");
VehicleStatistics vs = new VehicleStatistics(
// vehicleStatistics.getStart(),
accumulator.getEnd(),
// vehicleStatistics.getCurrentTimestamp(),
vehicleStatistics.getVehicleType(), vehicleStatistics.getVehicleModelType(),
vehicleStatistics.getCount() + accumulator.getCount());
System.out.println("VehicleStatistics in Merge :" + vs);
return vs;
}
}
In the above code I am also not seeing the merge code being called.
Below is the main processing code
#Service
public class ProcessingService {
#Value("${kafka.bootstrap-servers}")
private String kafkaAddress;
#Value("${kafka.group-id}")
private String kafkaGroupId;
public static final String TOPIC = "flink_input";
public static final String VEHICLE_STATISTICS_TOPIC = "flink_output";
#Autowired
private VehicleDeserializationSchema vehicleDeserializationSchema;
#Autowired
private VehicleStatisticsSerializationSchema vehicleStatisticsSerializationSchema;
#PostConstruct
public void startFlinkStreamProcessing() {
try {
processVehicleStatistic();
} catch (Exception e) {
// log.error("Cannot process", e);
e.printStackTrace();
}
}
public void processVehicleStatistic() {
try {
StreamExecutionEnvironment environment = StreamExecutionEnvironment.getExecutionEnvironment();
FlinkKafkaConsumer<Vehicle> consumer = createVehicleConsumerForTopic(TOPIC, kafkaAddress, kafkaGroupId);
consumer.setStartFromLatest();
System.out.println("Starting to consume!!");
consumer.assignTimestampsAndWatermarks(WatermarkStrategy.forMonotonousTimestamps());
FlinkKafkaProducer<VehicleStatistics> producer = createVehicleStatisticsProducer(VEHICLE_STATISTICS_TOPIC, kafkaAddress);
DataStream<Vehicle> inputMessagesStream = environment.addSource(consumer);
inputMessagesStream
.keyBy((vehicle -> vehicle.getVehicleType().ordinal()))
// .keyBy(vehicle -> vehicle.getVehicleModelType().ordinal())
// .keyBy(new KeySelector<Vehicle, Tuple2<VehicleType, VehicleModelType>>() {
//
// /**
// *
// */
// private static final long serialVersionUID = 1L;
//
// #Override
// public Tuple2<VehicleType, VehicleModelType> getKey(Vehicle vehicle) throws Exception {
// return Tuple2.of(vehicle.getVehicleType(), vehicle.getVehicleModelType());
// }
// })
// .filter(v -> CAR.equals(v.getVehicleType()))
.window(TumblingEventTimeWindows.of(Time.seconds(20)))
// .windowAll(TumblingEventTimeWindows.of(Time.seconds(10)))
.aggregate(new VehicleStatisticsAggregator())
.addSink(producer);
System.out.println("Adding to Sink!!");
environment.execute("Car Truck Counts By Model");
} catch(Exception e) {
e.printStackTrace();;
}
}
private FlinkKafkaConsumer<Vehicle> createVehicleConsumerForTopic(String topic, String kafkaAddress, String kafkaGroup ) {
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", kafkaAddress);
properties.setProperty("group.id", kafkaGroup);
return new FlinkKafkaConsumer<>(topic, vehicleDeserializationSchema, properties);
}
private FlinkKafkaProducer<VehicleStatistics> createVehicleStatisticsProducer(String topic, String kafkaAddress){
return new FlinkKafkaProducer<>(kafkaAddress, topic, vehicleStatisticsSerializationSchema);
}
}
I am getting the result as below.
null {"end":1649362835.665466000,"vehicleType":"TRUCK","vehicleModelType":"HONDA","count":3}
null {"end":1649362825.656024000,"vehicleType":"CAR","vehicleModelType":"TOYOTA","count":1}
null {"end":1649362850.675786000,"vehicleType":"CAR","vehicleModelType":"TOYOTA","count":3}
null {"end":1649362855.677596000,"vehicleType":"TRUCK","vehicleModelType":"TOYOTA","count":1}
But is there a way to validate this ?
Also other question is I am trying to aggregate the result based on multiple keys is AggregateFunction the correct way to do this.
I am asking this as I saw this How can I sum multiple fields in Flink?
So If I have to aggregate sum on multiple fields can aggregate function accomplish the same ?(the way I wrote the code)
Kindly let me know. Thanks in advance.

Merge will only be called if you are using windows that merge -- in other words, if you are using session windows, or a custom merging window.
The correct way to aggregate based on multiple keys is to use keyBy with a composite type, such as Tuple2<VehicleType, VehicleModelType>>. Each time you call keyBy the stream is repartitioned from scratch (and not in addition to any previous partitioning).

Use Memcache in Dataflow: NullPointerException at NamespaceManager.get

I am trying to access GAE Memcache and Datastore APIs from Dataflow.
I have followed How to use memcache in dataflow? and setup Remote API https://cloud.google.com/appengine/docs/java/tools/remoteapi
In my pipeline I have written
public static void main(String[] args) throws IOException {
RemoteApiOptions remApiOpts = new RemoteApiOptions()
.server("xxx.appspot.com", 443)
.useApplicationDefaultCredential();
RemoteApiInstaller installer = new RemoteApiInstaller();
installer.install(remApiOpts);
try {
DatastoreConfigManager2.registerConfig("myconfig");
final String topic = DatastoreConfigManager2.getString("pubsub.topic");
final String stagingDir = DatastoreConfigManager2.getString("dataflow.staging");
...
bqRows.apply(BigQueryIO.Write
.named("Insert row")
.to(new SerializableFunction<BoundedWindow, String>() {
#Override
public String apply(BoundedWindow window) {
// The cast below is safe because CalendarWindows.days(1) produces IntervalWindows.
IntervalWindow day = (IntervalWindow) window;
String dataset = DatastoreConfigManager2.getString("dataflow.bigquery.dataset");
String tablePrefix = DatastoreConfigManager2.getString("dataflow.bigquery.tablenametemplate");
String dayString = DateTimeFormat.forPattern("yyyyMMdd")
.print(day.start());
String tableName = dataset + "." + tablePrefix + dayString;
LOG.info("Writing to BigQuery " + tableName);
return tableName;
}
})
where DatastoreConfigManager2 is
public class DatastoreConfigManager2 {
private static final DatastoreService DATASTORE = DatastoreServiceFactory.getDatastoreService();
private static final MemcacheService MEMCACHE = MemcacheServiceFactory.getMemcacheService();
static {
MEMCACHE.setErrorHandler(ErrorHandlers.getConsistentLogAndContinue(Level.INFO));
}
private static Set<String> configs = Sets.newConcurrentHashSet();
public static void registerConfig(String name) {
configs.add(name);
}
private static class DatastoreCallbacks {
// https://cloud.google.com/appengine/docs/java/datastore/callbacks
#PostPut
public void updateCacheOnPut(PutContext context) {
Entity entity = context.getCurrentElement();
if (configs.contains(entity.getKind())) {
String id = (String) entity.getProperty("id");
String value = (String) entity.getProperty("value");
MEMCACHE.put(id, value);
}
}
}
private static String lookup(String id) {
String value = (String) MEMCACHE.get(id);
if (value != null) return value;
else {
for (String config : configs) {
try {
PreparedQuery pq = DATASTORE.prepare(new Query(config)
.setFilter(new FilterPredicate("id", FilterOperator.EQUAL, id)));
for (Entity entity : pq.asIterable()) {
value = (String) entity.getProperty("value"); // use last
}
if (value != null) MEMCACHE.put(id, value);
} catch (Exception e) {
e.printStackTrace();
}
}
}
return value;
}
public static String getString(String id) {
return lookup(id);
}
}
When my pipeline runs on Dataflow I get the exception
Caused by: java.lang.NullPointerException
at com.google.appengine.api.NamespaceManager.get(NamespaceManager.java:101)
at com.google.appengine.api.memcache.BaseMemcacheServiceImpl.getEffectiveNamespace(BaseMemcacheServiceImpl.java:65)
at com.google.appengine.api.memcache.AsyncMemcacheServiceImpl.doGet(AsyncMemcacheServiceImpl.java:401)
at com.google.appengine.api.memcache.AsyncMemcacheServiceImpl.get(AsyncMemcacheServiceImpl.java:412)
at com.google.appengine.api.memcache.MemcacheServiceImpl.get(MemcacheServiceImpl.java:49)
at my.training.google.common.config.DatastoreConfigManager2.lookup(DatastoreConfigManager2.java:80)
at my.training.google.common.config.DatastoreConfigManager2.getString(DatastoreConfigManager2.java:117)
at my.training.google.mss.pipeline.InsertIntoBqWithCalendarWindow$1.apply(InsertIntoBqWithCalendarWindow.java:101)
at my.training.google.mss.pipeline.InsertIntoBqWithCalendarWindow$1.apply(InsertIntoBqWithCalendarWindow.java:95)
at com.google.cloud.dataflow.sdk.io.BigQueryIO$Write$Bound$TranslateTableSpecFunction.apply(BigQueryIO.java:1496)
at com.google.cloud.dataflow.sdk.io.BigQueryIO$Write$Bound$TranslateTableSpecFunction.apply(BigQueryIO.java:1486)
at com.google.cloud.dataflow.sdk.io.BigQueryIO$TagWithUniqueIdsAndTable.tableSpecFromWindow(BigQueryIO.java:2641)
at com.google.cloud.dataflow.sdk.io.BigQueryIO$TagWithUniqueIdsAndTable.processElement(BigQueryIO.java:2618)
Any suggestions? Thanks in advance.
EDIT: my functional requirement is building a pipeline with some configurable steps based on datastore entries.

Increasing heap by excessive use oft Java ScriptEngine (Jyhton)

We have a JavaEE application that uses jython to execute some python scripts. By and by the used heapspace gets bigger and bigger until there is no more heapspace left. In a heapdump i can se that there are a lot of Py*-classes.
So i wrote a small test-program:
TestApp
public class TestApp {
private final ScriptEngineManager scriptEngineManager = new ScriptEngineManager();
private HashMap<String, ScriptEngine> scriptEngines = new HashMap<String, ScriptEngine>();
private final String scriptContainerPath = "";
public static void main(String[] args) throws InterruptedException {
int counter = 1;
while(true) {
System.out.println("iteration: " + counter);
TestApp testApp = new TestApp();
testApp.execute();
counter++;
Thread.sleep(100);
}
}
void execute() {
File scriptContainer = new File(scriptContainerPath);
File[] scripts = scriptContainer.listFiles();
if (scripts != null && scripts.length > 0) {
Arrays.sort(scripts, new Comparator<File>() {
#Override
public int compare(File file1, File file2) {
return file1.getName().compareTo(file2.getName());
}
});
for (File script : scripts) {
String engineName = ScriptExecutor.getEngineNameByExtension(script.getName());
if(!scriptEngines.containsKey(engineName)) {
scriptEngines.put(engineName, scriptEngineManager.getEngineByName(engineName));
}
ScriptEngine scriptEngine = scriptEngines.get(engineName);
try {
ScriptExecutor scriptExecutor = new ScriptExecutor(scriptEngine, script, null);
Boolean disqualify = scriptExecutor.getBooleanScriptValue("disqualify");
String reason = scriptExecutor.getStringScriptValue("reason");
System.out.println("disqualify: " + disqualify);
System.out.println("reason: " + reason);
} catch (Exception e) {
e.printStackTrace();
}
}
// cleanup
for(Map.Entry<String, ScriptEngine> entry : scriptEngines.entrySet()) {
ScriptEngine engine = entry.getValue();
engine.getContext().setErrorWriter(null);
engine.getContext().setReader(null);
engine.getContext().setWriter(null);
}
}
}
}
ScriptExecutor
public class ScriptExecutor {
private final static String pythonExtension = "py";
private final static String pythonEngine = "python";
private final ScriptEngine scriptEngine;
public ScriptExecutor(ScriptEngine se, File file, Map<String, Object> keyValues) throws FileNotFoundException, ScriptException {
scriptEngine = se;
if (keyValues != null) {
for (Map.Entry<String, Object> entry : keyValues.entrySet()) {
scriptEngine.put(entry.getKey(), entry.getValue());
}
}
// execute script
Reader reader = null;
try {
reader = new FileReader(file);
scriptEngine.eval(reader);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
// nothing to do
}
}
}
}
public Boolean getBooleanScriptValue(String key) {
// convert Object to Boolean
}
public String getStringScriptValue(String key) {
// convert Object to String
}
public static String getEngineNameByExtension(String fileName) {
String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
if (pythonExtension.equalsIgnoreCase(extension)) {
System.out.println("Found engine " + pythonEngine + " for extension " + extension + ".");
return pythonEngine;
}
throw new RuntimeException("No suitable engine found for extension " + extension);
}
}
In the specified directory are 14 python scripts that all look like this:
disqualify = True
reason = "reason"
I start this program with the following VM-arguments:
-Xrs -Xms16M -Xmx16M -XX:MaxPermSize=32M -XX:NewRatio=3 -Dsun.rmi.dgc.client.gcInterval=300000 -Dsun.rmi.dgc.server.gcInterval=300000 -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSParallelRemarkEnabled -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -server
These are the arguments our AppServer is running with. Only Xms, Xmx and MaxPermSize are smaller in my testcase.
When I run this application I can see that the CMS Old Gen pool increases to its max size. After that the Par Eden Space pool increases. In addition at any time the ParNewGC does not run anymore. The cleanup part improved the situation but didn't resolve the problem. Has anybody an idea why my heap isn't completly cleaned?

I think I have found a solution for my problem: I removed the JSR223 stuff und now use the PythonInterpreter directly.

Easy way to dynamically invoke web services (without JDK or proxy classes)

In Python I can consume a web service so easily:
from suds.client import Client
client = Client('http://www.example.org/MyService/wsdl/myservice.wsdl') #create client
result = client.service.myWSMethod("Bubi", 15) #invoke method
print result #print the result returned by the WS method
I'd like to reach such a simple usage with Java.
With Axis or CXF you have to create a web service client, i.e. a package which reproduces all web service methods so that we can invoke them as if they where normal methods. Let's call it proxy classes; usually they are generated by wsdl2java tool.
Useful and user-friendly. But any time I add/modify a web service method and I want to use it in a client program I need to regenerate proxy classes.
So I found CXF DynamicClientFactory, this technique avoids the use of proxy classes:
import org.apache.cxf.endpoint.Client;
import org.apache.cxf.endpoint.dynamic.DynamicClientFactory;
//...
//create client
DynamicClientFactory dcf = DynamicClientFactory.newInstance();
Client client = dcf.createClient("http://www.example.org/MyService/wsdl/myservice.wsdl");
//invoke method
Object[] res = client.invoke("myWSMethod", "Bubi");
//print the result
System.out.println("Response:\n" + res[0]);
But unfortunately it creates and compiles proxy classes runtime, hence requires JDK on the production machine. I have to avoid this, or at least I can't rely on it.
My question:
Is there another way to dinamically invoke any method of a web service in Java, without having a JDK at runtime and without generating "static" proxy classes? Maybe with a different library? Thanks!

I know this is a really old question but if you are still interested you could use soap-ws github project: https://github.com/reficio/soap-ws
Here you have a sample usage really simple:
Wsdl wsdl = Wsdl.parse("http://www.webservicex.net/CurrencyConvertor.asmx?WSDL");
SoapBuilder builder = wsdl.binding()
.localPart("CurrencyConvertorSoap")
.find();
SoapOperation operation = builder.operation()
.soapAction("http://www.webserviceX.NET/ConversionRate")
.find();
Request request = builder.buildInputMessage(operation)
SoapClient client = SoapClient.builder()
.endpointUrl("http://www.webservicex.net/CurrencyConvertor.asmx")
.build();
String response = client.post(request);
As you can see it is really simple.

With CXF 3.x this could be possible with StaxDataBinding. Follow below steps to get the basics. Of course, this could be enhanced to your needs.
Create StaxDataBinding something like below. Note below code can be enhanced to your sophistication.
class StaxDataBinding extends AbstractInterceptorProvidingDataBinding {
private XMLStreamDataReader xsrReader;
private XMLStreamDataWriter xswWriter;
public StaxDataBinding() {
super();
this.xsrReader = new XMLStreamDataReader();
this.xswWriter = new XMLStreamDataWriter();
inInterceptors.add(new StaxInEndingInterceptor(Phase.POST_INVOKE));
inFaultInterceptors.add(new StaxInEndingInterceptor(Phase.POST_INVOKE));
inInterceptors.add(RemoveStaxInEndingInterceptor.INSTANCE);
inFaultInterceptors.add(RemoveStaxInEndingInterceptor.INSTANCE);
}
static class RemoveStaxInEndingInterceptor
extends AbstractPhaseInterceptor<Message> {
static final RemoveStaxInEndingInterceptor INSTANCE = new RemoveStaxInEndingInterceptor();
public RemoveStaxInEndingInterceptor() {
super(Phase.PRE_INVOKE);
addBefore(StaxInEndingInterceptor.class.getName());
}
public void handleMessage(Message message) throws Fault {
message.getInterceptorChain().remove(StaxInEndingInterceptor.INSTANCE);
}
}
public void initialize(Service service) {
for (ServiceInfo serviceInfo : service.getServiceInfos()) {
SchemaCollection schemaCollection = serviceInfo.getXmlSchemaCollection();
if (schemaCollection.getXmlSchemas().length > 1) {
// Schemas are already populated.
continue;
}
new ServiceModelVisitor(serviceInfo) {
public void begin(MessagePartInfo part) {
if (part.getTypeQName() != null
|| part.getElementQName() != null) {
return;
}
part.setTypeQName(Constants.XSD_ANYTYPE);
}
}.walk();
}
}
#SuppressWarnings("unchecked")
public <T> DataReader<T> createReader(Class<T> cls) {
if (cls == XMLStreamReader.class) {
return (DataReader<T>) xsrReader;
}
else {
throw new UnsupportedOperationException(
"The type " + cls.getName() + " is not supported.");
}
}
public Class<?>[] getSupportedReaderFormats() {
return new Class[] { XMLStreamReader.class };
}
#SuppressWarnings("unchecked")
public <T> DataWriter<T> createWriter(Class<T> cls) {
if (cls == XMLStreamWriter.class) {
return (DataWriter<T>) xswWriter;
}
else {
throw new UnsupportedOperationException(
"The type " + cls.getName() + " is not supported.");
}
}
public Class<?>[] getSupportedWriterFormats() {
return new Class[] { XMLStreamWriter.class, Node.class };
}
public static class XMLStreamDataReader implements DataReader<XMLStreamReader> {
public Object read(MessagePartInfo part, XMLStreamReader input) {
return read(null, input, part.getTypeClass());
}
public Object read(QName name, XMLStreamReader input, Class<?> type) {
return input;
}
public Object read(XMLStreamReader reader) {
return reader;
}
public void setSchema(Schema s) {
}
public void setAttachments(Collection<Attachment> attachments) {
}
public void setProperty(String prop, Object value) {
}
}
public static class XMLStreamDataWriter implements DataWriter<XMLStreamWriter> {
private static final Logger LOG = LogUtils
.getL7dLogger(XMLStreamDataWriter.class);
public void write(Object obj, MessagePartInfo part, XMLStreamWriter writer) {
try {
if (!doWrite(obj, writer)) {
// WRITE YOUR LOGIC HOW you WANT TO HANDLE THE INPUT DATA
//BELOW CODE JUST CALLS toString() METHOD
if (part.isElement()) {
QName element = part.getElementQName();
writer.writeStartElement(element.getNamespaceURI(),
element.getLocalPart());
if (obj != null) {
writer.writeCharacters(obj.toString());
}
writer.writeEndElement();
}
}
}
catch (XMLStreamException e) {
throw new Fault("COULD_NOT_READ_XML_STREAM", LOG, e);
}
}
public void write(Object obj, XMLStreamWriter writer) {
try {
if (!doWrite(obj, writer)) {
throw new UnsupportedOperationException("Data types of "
+ obj.getClass() + " are not supported.");
}
}
catch (XMLStreamException e) {
throw new Fault("COULD_NOT_READ_XML_STREAM", LOG, e);
}
}
private boolean doWrite(Object obj, XMLStreamWriter writer)
throws XMLStreamException {
if (obj instanceof XMLStreamReader) {
XMLStreamReader xmlStreamReader = (XMLStreamReader) obj;
StaxUtils.copy(xmlStreamReader, writer);
xmlStreamReader.close();
return true;
}
else if (obj instanceof XMLStreamWriterCallback) {
((XMLStreamWriterCallback) obj).write(writer);
return true;
}
return false;
}
public void setSchema(Schema s) {
}
public void setAttachments(Collection<Attachment> attachments) {
}
public void setProperty(String key, Object value) {
}
}
}
Prepare your input to match the expected input, something like below
private Object[] prepareInput(BindingOperationInfo operInfo, String[] paramNames,
String[] paramValues) {
List<Object> inputs = new ArrayList<Object>();
List<MessagePartInfo> parts = operInfo.getInput().getMessageParts();
if (parts != null && parts.size() > 0) {
for (MessagePartInfo partInfo : parts) {
QName element = partInfo.getElementQName();
String localPart = element.getLocalPart();
// whatever your input data you need to match data value for given element
// below code assumes names are paramNames variable and value in paramValues
for (int i = 0; i < paramNames.length; i++) {
if (paramNames[i].equals(localPart)) {
inputs.add(findParamValue(paramNames, paramValues, localPart));
}
}
}
}
return inputs.toArray();
}
Now set the proper data binding and pass the data
Bus bus = CXFBusFactory.getThreadDefaultBus();
WSDLServiceFactory sf = new WSDLServiceFactory(bus, wsdl);
sf.setAllowElementRefs(false);
Service svc = sf.create();
Client client = new ClientImpl(bus, svc, null,
SimpleEndpointImplFactory.getSingleton());
StaxDataBinding databinding = new StaxDataBinding();
svc.setDataBinding(databinding);
bus.getFeatures().add(new StaxDataBindingFeature());
BindingOperationInfo operInfo = ...//find the operation you need (see below)
Object[] inputs = prepareInput(operInfo, paramNames, paramValues);
client.invoke("operationname", inputs);
If needed you can match operation name something like below
private BindingOperationInfo findBindingOperation(Service service,
String operationName) {
for (ServiceInfo serviceInfo : service.getServiceInfos()) {
Collection<BindingInfo> bindingInfos = serviceInfo.getBindings();
for (BindingInfo bindingInfo : bindingInfos) {
Collection<BindingOperationInfo> operInfos = bindingInfo.getOperations();
for (BindingOperationInfo operInfo : operInfos) {
if (operInfo.getName().getLocalPart().equals(operationName)) {
if (operInfo.isUnwrappedCapable()) {
return operInfo.getUnwrappedOperation();
}
return operInfo;
}
}
}
}
return null;
}

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Apache Flink: SpiltStream vs side outputs - apache-flink

Related

Flink CEP not working with inEventTime() but works with inProcessingTime() when appied on a pattern

Flink AggregateFunction find sum by multiple keys( validation process and testing)

Use Memcache in Dataflow: NullPointerException at NamespaceManager.get

Increasing heap by excessive use oft Java ScriptEngine (Jyhton)

Easy way to dynamically invoke web services (without JDK or proxy classes)

Categories

Resources