Implementing Checkpointed interface: snapshotState() in flink is not being called - apache-flink

I have created a test below. I am testing snapshots. I supposed that snapshotState and restoreState should be called but it seems that it is not happening. Why?
The main code:
public class CheckpointedTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
env.setParallelism(2);
List<Integer> list = new ArrayList<Integer>();
for (int i = 0; i < 10; i++) {
list.add(i);
}
DataStream<Integer> test = env.fromCollection(list);
test.map(new CheckpointedFunction());
env.execute();
}
}
The function implementation is the following:
public class CheckpointedFunction implements MapFunction<Integer, Integer>, Checkpointed<Integer> {
private Integer count = 0;
#Override
public Integer map(Integer value) throws Exception {
System.out.println("count: " + count);
Thread.sleep((long) (Math.random()*4000));
return count++;
}
#Override
public Integer snapshotState(long checkpointId, long checkpointTimestamp) throws Exception {
System.out.println("Snapshot count: " + count);
return count;
}
#Override
public void restoreState(Integer state) throws Exception {
this.count = state;
System.out.println("Restored count: " + count);
}
}

Related

Flink CEP not working with inEventTime() but works with inProcessingTime() when appied on a pattern

I am working on following program and have set WatermarkStrategy however when I run the program using inEventTime() method on pattern it does not give any output.
Note : the same program works when I use inProcessingTime() on pattern.
public class FlinkCEPTest {
#SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
ParameterTool parameter = ParameterTool.fromArgs(args);
final String bootstrapServers = parameter.get("kafka.broker", "localhost:9092,broker:29092");
final String inputTopic_1 = parameter.get("input.topic.1","acctopic");
final String inputTopic_2 = parameter.get("input.topic.2","txntopic");
final String outputTopic = parameter.get("output.topic.q","alerttopic");
final String groupID = parameter.get("group.id","flink-demo-grp-id");
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
KafkaSource<EventMessage> source_1 = KafkaSource.<EventMessage>builder()
.setBootstrapServers(bootstrapServers)
.setTopics(inputTopic_1).setGroupId(groupID)
.setStartingOffsets(OffsetsInitializer.latest())
.setDeserializer(new EventSchema())
.build();
DataStream<EventMessage> text_1 = env.fromSource(source_1,
WatermarkStrategy
.<EventMessage>forBoundedOutOfOrderness(Duration.ofSeconds(300))
.withTimestampAssigner((event, trtimestamp)-> {
//System.err.println("Kafka ingetstion ts : " + trtimestamp);
//System.err.println("Event ts : "+ event.getTxnDate().getTime());
return event.getTxnDate().getTime();})
, "Kafka Source 1");
DataStream<EventMessage> partitionedInput = text_1.keyBy(evt -> evt.getAccountId());
//partitionedInput.print();
Pattern<EventMessage, ?> relaxedAlarmPattern = Pattern.<EventMessage>begin("first").subtype(EventMessage.class)
.where(new SimpleCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value) throws Exception {
return value.getEvent().equalsIgnoreCase("PASSWORD_CHANGE_SUCC");
}
}).followedBy("second").subtype(EventMessage.class).where(new IterativeCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value, Context<EventMessage> ctx) throws Exception {
Iterable<EventMessage> test = ctx.getEventsForPattern("first");
Integer accid = 0;
for (EventMessage te : test) {
accid = te.getAccountId();
}
return value.getEvent().equalsIgnoreCase("BENIFICIARY_ADDED")
&& value.getAccountId().equals(accid);
}
}).followedBy("third").subtype(EventMessage.class).where(new IterativeCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value, Context<EventMessage> ctx) throws Exception {
Integer accid = 0;
Iterable<EventMessage> test = ctx.getEventsForPattern("first");
for (EventMessage te : test) {
accid = te.getAccountId();
}
return value.getEvent().equalsIgnoreCase("TXN_NEW")
&& value.getAccountId().equals(accid) && value.getAmt() <= 10;
}
}).followedBy("last").subtype(EventMessage.class).where(new IterativeCondition<EventMessage>() {
private static final long serialVersionUID = 1L;
#Override
public boolean filter(EventMessage value, Context<EventMessage> ctx) throws Exception {
Integer accid = 0;
Iterable<EventMessage> test = ctx.getEventsForPattern("first");
for (EventMessage te : test) {
accid = te.getAccountId();
}
return value.getEvent().equalsIgnoreCase("TXN_NEW")
&& value.getAccountId().equals(accid) && value.getAmt() >= 100 ;
}
}).within(Time.seconds(300));
PatternStream<EventMessage> patternStream = CEP.pattern(partitionedInput, relaxedAlarmPattern)
.inEventTime();
//.inProcessingTime();
DataStream<String> alarms = patternStream.select(new PatternSelectFunction<EventMessage, String>() {
private static final long serialVersionUID = 1L;
#Override
public String select(Map<String, List<EventMessage>> pattern) throws Exception {
EventMessage first = (EventMessage) pattern.get("first").get(0);
EventMessage middle = (EventMessage) pattern.get("second").get(0);
EventMessage third = (EventMessage) pattern.get("third").get(0);
EventMessage last = (EventMessage) pattern.get("last").get(0);
return "WARNING : Possible fraud scenario [ Party ID " + first.getPartyId()
+ " recently changed his password and added a beneficiary and later made transcations of "
+ third.getAmt() + " and " + last.getAmt()+" ]";
}
});
alarms.print();
env.execute(" CEP ");
}
}
If I change the following line
PatternStream<EventMessage> patternStream = CEP.pattern(partitionedInput, relaxedAlarmPattern).inEventTime();
To
PatternStream<EventMessage> patternStream = CEP.pattern(partitionedInput, relaxedAlarmPattern).inProcessingTime();
The code works,any suggestions how can I make it work with inEventTime() method.
Usually with Kafka sources the issue is that the parallelism is higher than the number of partitions or not all partitions receive data which doesn't let the watermarks advance forward. You can solve this by adjusting the parallelism or use withIdleness with your watermark strategy.
See more info in the Kafka connector docs.

Flink pre shuffle aggregation is not working

I am trying to do pre shuffle aggregation in flink. Following is the MapBundle implementation.
public class TaxiFareMapBundleFunction extends MapBundleFunction<Long, TaxiFare, TaxiFare, TaxiFare> {
#Override
public TaxiFare addInput(#Nullable TaxiFare value, TaxiFare input) throws Exception {
if (value == null) {
return input;
}
value.tip = value.tip + input.tip;
return value;
}
#Override
public void finishBundle(Map<Long, TaxiFare> buffer, Collector<TaxiFare> out) throws Exception {
for (Map.Entry<Long, TaxiFare> entry : buffer.entrySet()) {
out.collect(entry.getValue());
}
}
}
I am using "CountBundleTrigger.java" . But the pre-shuffle aggregation is not working as the "count" variable is always 0. Please let me know If I am missing something.
#Override
public void onElement(T element) throws Exception {
count++;
if (count >= maxCount) {
callback.finishBundle();
reset();
}
}
Here is the main code.
MapBundleFunction<Long, TaxiFare, TaxiFare, TaxiFare> mapBundleFunction = new TaxiFareMapBundleFunction();
BundleTrigger<TaxiFare> bundleTrigger = new CountBundleTrigger<>(10);
KeySelector<TaxiFare, Long> taxiFareLongKeySelector = new KeySelector<TaxiFare, Long>() {
#Override
public Long getKey(TaxiFare value) throws Exception {
return value.driverId;
}
};
DataStream<Tuple3<Long, Long, Float>> hourlyTips =
// fares.keyBy((TaxiFare fare) -> fare.driverId)
//
.window(TumblingEventTimeWindows.of(Time.hours(1))).process(new AddTips());;
fares.transform("preshuffle", TypeInformation.of(TaxiFare.class),
new TaxiFareStream(mapBundleFunction, bundleTrigger,
taxiFareLongKeySelector
))
.assignTimestampsAndWatermarks(new
BoundedOutOfOrdernessTimestampExtractor<TaxiFare>(Time.seconds(20)) {
#Override
public long extractTimestamp(TaxiFare element) {
return element.startTime.getEpochSecond();
}
})
.keyBy((TaxiFare fare) -> fare.driverId)
.window(TumblingProcessingTimeWindows.of(Time.minutes(1)))
.process(new AddTips());
DataStream<Tuple3<Long, Long, Float>> hourlyMax =
hourlyTips.windowAll(TumblingEventTimeWindows.of(Time.hours(1))).maxBy(2);
Here is the code for TaxiFareStream.java.
public class TaxiFareStream extends MapBundleOperator<Long, TaxiFare, TaxiFare, TaxiFare> {
private KeySelector<TaxiFare, Long> keySelector;
public TaxiFareStream(MapBundleFunction<Long, TaxiFare,
TaxiFare, TaxiFare> userFunction,
BundleTrigger<TaxiFare> bundleTrigger,
KeySelector<TaxiFare, Long> keySelector) {
super(userFunction, bundleTrigger, keySelector);
this.keySelector = keySelector;
}
#Override
protected Long getKey(TaxiFare input) throws Exception {
return keySelector.getKey(input);
}
}
Update
I have created the following class but I am seeing an error. I think it is not able to serialize the class MapStreamBundleOperator.java
public class MapStreamBundleOperator<K, V, IN, OUT> extends
AbstractMapStreamBundleOperator<K, V, IN, OUT> {
private static final long serialVersionUID = 6556268125924098320L;
/** KeySelector is used to extract key for bundle map. */
private final KeySelector<IN, K> keySelector;
public MapStreamBundleOperator(MapBundleFunction<K, V, IN, OUT> function, BundleTrigger<IN> bundleTrigger,
KeySelector<IN, K> keySelector) {
super(function, bundleTrigger);
this.keySelector = keySelector;
}
#Override
protected K getKey(IN input) throws Exception {
return this.keySelector.getKey(input);
}
}
`
2021-08-27 05:06:04,814 ERROR FlinkDefaults.class - Stream execution failed
org.apache.flink.streaming.runtime.tasks.StreamTaskException: Cannot serialize operator object class org.apache.flink.streaming.api.operators.SimpleUdfStreamOperatorFactory.
at org.apache.flink.streaming.api.graph.StreamConfig.setStreamOperatorFactory(StreamConfig.java:247)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.setVertexConfig(StreamingJobGraphGenerator.java:497)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.createChain(StreamingJobGraphGenerator.java:318)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.createChain(StreamingJobGraphGenerator.java:297)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.createChain(StreamingJobGraphGenerator.java:297)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.setChaining(StreamingJobGraphGenerator.java:264)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.createJobGraph(StreamingJobGraphGenerator.java:173)
at org.apache.flink.streaming.api.graph.StreamingJobGraphGenerator.createJobGraph(StreamingJobGraphGenerator.java:113)
at org.apache.flink.streaming.api.graph.StreamGraph.getJobGraph(StreamGraph.java:850)
at org.apache.flink.client.StreamGraphTranslator.translateToJobGraph(StreamGraphTranslator.java:52)
at org.apache.flink.client.FlinkPipelineTranslationUtil.getJobGraph(FlinkPipelineTranslationUtil.java:43)
at org.apache.flink.client.deployment.executors.PipelineExecutorUtils.getJobGraph(PipelineExecutorUtils.java:55)
at org.apache.flink.client.deployment.executors.AbstractJobClusterExecutor.execute(AbstractJobClusterExecutor.java:62)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.executeAsync(StreamExecutionEnvironment.java:1810)
at org.apache.flink.client.program.StreamContextEnvironment.executeAsync(StreamContextEnvironment.java:128)
at org.apache.flink.client.program.StreamContextEnvironment.execute(StreamContextEnvironment.java:76)
at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1697)
at com.pinterest.xenon.flink.FlinkDefaults$.run(FlinkDefaults.scala:46)
at com.pinterest.xenon.flink.FlinkWorkflow.run(FlinkWorkflow.scala:74)
at com.pinterest.xenon.flink.WorkflowLauncher$.executeWorkflow(WorkflowLauncher.scala:43)
at com.pinterest.xenon.flink.WorkflowLauncher$.delayedEndpoint$com$pinterest$xenon$flink$WorkflowLauncher$1(WorkflowLauncher.scala:25)
at com.pinterest.xenon.flink.WorkflowLauncher$delayedInit$body.apply(WorkflowLauncher.scala:9)
at scala.Function0$class.apply$mcV$sp(Function0.scala:34)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at com.pinterest.xenon.flink.WorkflowLauncher$.main(WorkflowLauncher.scala:9)
at com.pinterest.xenon.flink.WorkflowLauncher.main(WorkflowLauncher.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.client.program.PackagedProgram.callMainMethod(PackagedProgram.java:288)
at org.apache.flink.client.program.PackagedProgram.invokeInteractiveModeForExecution(PackagedProgram.java:198)
at org.apache.flink.client.ClientUtils.executeProgram(ClientUtils.java:168)
at org.apache.flink.client.cli.CliFrontend.executeProgram(CliFrontend.java:699)
at org.apache.flink.client.cli.CliFrontend.run(CliFrontend.java:232)
at org.apache.flink.client.cli.CliFrontend.parseParameters(CliFrontend.java:916)
at org.apache.flink.client.cli.CliFrontend.lambda$main$10(CliFrontend.java:992)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
at org.apache.flink.runtime.security.contexts.HadoopSecurityContext.runSecured(HadoopSecurityContext.java:41)
at org.apache.flink.client.cli.CliFrontend.main(CliFrontend.java:992)
Caused by: java.io.NotSerializableException: visibility.mabs.src.main.java.com.pinterest.mabs.MabsFlinkJob
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1184)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
`
I would not rely on the official MapBundleOperator since David already said that this is not very well documented. I will answer this question based on my own AbstractMapStreamBundleOperator. I think that you are missing the counter numOfElements++; inside the processElement() method. And it is also better to use generic types. Use this code:
public abstract class AbstractMapStreamBundleOperator<K, V, IN, OUT>
extends AbstractUdfStreamOperator<OUT, MapBundleFunction<K, V, IN, OUT>>
implements OneInputStreamOperator<IN, OUT>, BundleTriggerCallback {
private static final long serialVersionUID = 1L;
private final Map<K, V> bundle;
private final BundleTrigger<IN> bundleTrigger;
private transient TimestampedCollector<OUT> collector;
private transient int numOfElements = 0;
public AbstractMapStreamBundleOperator(MapBundleFunction<K, V, IN, OUT> function, BundleTrigger<IN> bundleTrigger) {
super(function);
chainingStrategy = ChainingStrategy.ALWAYS;
this.bundle = new HashMap<>();
this.bundleTrigger = checkNotNull(bundleTrigger, "bundleTrigger is null");
}
#Override
public void open() throws Exception {
super.open();
numOfElements = 0;
collector = new TimestampedCollector<>(output);
bundleTrigger.registerCallback(this);
// reset trigger
bundleTrigger.reset();
}
#Override
public void processElement(StreamRecord<IN> element) throws Exception {
// get the key and value for the map bundle
final IN input = element.getValue();
final K bundleKey = getKey(input);
final V bundleValue = this.bundle.get(bundleKey);
// get a new value after adding this element to bundle
final V newBundleValue = userFunction.addInput(bundleValue, input);
// update to map bundle
bundle.put(bundleKey, newBundleValue);
numOfElements++;
bundleTrigger.onElement(input);
}
protected abstract K getKey(final IN input) throws Exception;
#Override
public void finishBundle() throws Exception {
if (!bundle.isEmpty()) {
numOfElements = 0;
userFunction.finishBundle(bundle, collector);
bundle.clear();
}
bundleTrigger.reset();
}
}
Then create the MapStreamBundleOperator like you already have. Use this code:
public class MapStreamBundleOperator<K, V, IN, OUT> extends AbstractMapStreamBundleOperator<K, V, IN, OUT> {
private final KeySelector<IN, K> keySelector;
public MapStreamBundleOperator(MapBundleFunction<K, V, IN, OUT> function, BundleTrigger<IN> bundleTrigger,
KeySelector<IN, K> keySelector) {
super(function, bundleTrigger);
this.keySelector = keySelector;
}
#Override
protected K getKey(IN input) throws Exception {
return this.keySelector.getKey(input);
}
}
The counter inside the trigger is that makes the Bundle operator flush the events to the next phase. The CountBundleTrigger is like below. Use this code. You will need also the BundleTriggerCallback.
public class CountBundleTrigger<T> implements BundleTrigger<T> {
private final long maxCount;
private transient BundleTriggerCallback callback;
private transient long count = 0;
public CountBundleTrigger(long maxCount) {
Preconditions.checkArgument(maxCount > 0, "maxCount must be greater than 0");
this.maxCount = maxCount;
}
#Override
public void registerCallback(BundleTriggerCallback callback) {
this.callback = Preconditions.checkNotNull(callback, "callback is null");
}
#Override
public void onElement(T element) throws Exception {
count++;
if (count >= maxCount) {
callback.finishBundle();
reset();
}
}
#Override
public void reset() { count = 0; }
#Override
public String explain() {
return "CountBundleTrigger with size " + maxCount;
}
}
Then you have to create one of this trigger to pass on your operator. Here I am creating a bundle of 100 TaxiFare events. Take this example with another POJO. I wrote the MapBundleTaxiFareImpl here but you can create your UDF based on this one.
private OneInputStreamOperator<Tuple2<Long, TaxiFare>, Tuple2<Long, TaxiFare>> getPreAggOperator() {
MapBundleFunction<Long, TaxiFare, Tuple2<Long, TaxiFare>, Tuple2<Long, TaxiFare>> myMapBundleFunction = new MapBundleTaxiFareImpl();
CountBundleTrigger<Tuple2<Long, TaxiFare>> bundleTrigger = new CountBundleTrigger<Tuple2<Long, TaxiFare>>(100);
return new MapStreamBundleOperator<>(myMapBundleFunction, bundleTrigger, keyBundleSelector);
}
In the end you call this new operator somewhere using the transform(). Take this example with another POJO.
stream
...
.transform("my-pre-agg",
TypeInformation.of(new TypeHint<Tuple2<Long, TaxiFare>>(){}), getPreAggOperator())
...
I this that it is all that you need. Try to use those class and if it is missing something it is probably on the gitrepository that I put the links. i hope you can make it work.

Apache Flink: Job throws stack overflow error

I'm trying to execute this simple job in Apache Flink.
public class StreamingJob {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties inputProperties = new Properties();
ObjectMapper mapper = new ObjectMapper();
DataStream<String> eventStream = env
.addSource(new FileSourceFunction("/path/to/file"));
DataStream<ObjectNode> eventStreamObject = eventStream
.map(x -> mapper.readValue(x, ObjectNode.class));
DataStream<ObjectNode> eventStreamWithTime = eventStreamObject
.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<ObjectNode>() {
#Override
public long extractAscendingTimestamp(ObjectNode element) {
String data = element.get("ts").asText();
if(data.endsWith("Z")) {
data = data.substring(0, data.length() -1);
}
return LocalDateTime.parse(data).toEpochSecond(ZoneOffset.UTC);
}
});
eventStreamObject.print();
env.execute("Local job");
}
}
FileSourceFunction is a custom SourceFunction
public class FileSourceFunction implements SourceFunction<String> {
/**
*
*/
private static final long serialVersionUID = 1L;
private String fileName;
private volatile boolean isRunning = true;
public FileSourceFunction(String fileName) {
this.fileName = fileName;
}
#Override
public void run(SourceContext<String> ctx) throws Exception {
// TODO Auto-generated method stub
try (BufferedReader br = new BufferedReader(new FileReader(fileName))) {
try (Stream<String> stream = br.lines()) {
Iterator<String> it = stream.iterator();
while (isRunning && it.hasNext()) {
synchronized (ctx.getCheckpointLock()) {
ctx.collect(it.next());
}
}
}
}
}
#Override
public void cancel() {
isRunning = false;
}
}
When I run the job it throws an StackOverFlowError. I'm using apache Flink 1.8.1.

Apache Flink 1.3 table api rowtime strange behavior

The following code sample not work in 1.3
public class TumblingWindow {
public static void main(String[] args) throws Exception {
List<Content> data = new ArrayList<Content>();
data.add(new Content(1L, "Hi"));
data.add(new Content(2L, "Hallo"));
data.add(new Content(3L, "Hello"));
data.add(new Content(4L, "Hello"));
data.add(new Content(7L, "Hello"));
data.add(new Content(8L, "Hello world"));
data.add(new Content(16L, "Hello world"));
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
final StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env);
DataStream<Content> stream = env.fromCollection(data);
DataStream<Content> stream2 = stream.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor<Content>(Time.milliseconds(1)) {
/**
*
*/
private static final long serialVersionUID = 410512296011057717L;
#Override
public long extractTimestamp(Content element) {
return element.getRecordTime();
}
});
Table table = tableEnv.fromDataStream(stream2,
"urlKey,httpGetMessageCount,httpPostMessageCount" + ",uplink,downlink,statusCode,statusCodeCount,rowtime.rowtime");
table.window(Tumble.over("1.hours").on("rowtime").as("w")).groupBy("w, urlKey")
.select("w.start,urlKey,uplink.sum,downlink.sum,httpGetMessageCount.sum,httpPostMessageCount.sum ");
env.execute();
}
public static class Content implements Serializable {
private String urlKey;
private long recordTime;
// private String recordTimeStr;
private long httpGetMessageCount;
private long httpPostMessageCount;
private long uplink;
private long downlink;
private long statusCode;
private long statusCodeCount;
public Content() {
super();
}
public Content(long recordTime, String urlKey) {
super();
this.recordTime = recordTime;
this.urlKey = urlKey;
}
public String getUrlKey() {
return urlKey;
}
public void setUrlKey(String urlKey) {
this.urlKey = urlKey;
}
public long getRecordTime() {
return recordTime;
}
public void setRecordTime(long recordTime) {
this.recordTime = recordTime;
}
public long getHttpGetMessageCount() {
return httpGetMessageCount;
}
public void setHttpGetMessageCount(long httpGetMessageCount) {
this.httpGetMessageCount = httpGetMessageCount;
}
public long getHttpPostMessageCount() {
return httpPostMessageCount;
}
public void setHttpPostMessageCount(long httpPostMessageCount) {
this.httpPostMessageCount = httpPostMessageCount;
}
public long getUplink() {
return uplink;
}
public void setUplink(long uplink) {
this.uplink = uplink;
}
public long getDownlink() {
return downlink;
}
public void setDownlink(long downlink) {
this.downlink = downlink;
}
public long getStatusCode() {
return statusCode;
}
public void setStatusCode(long statusCode) {
this.statusCode = statusCode;
}
public long getStatusCodeCount() {
return statusCodeCount;
}
public void setStatusCodeCount(long statusCodeCount) {
this.statusCodeCount = statusCodeCount;
}
}
private class TimestampWithEqualWatermark implements AssignerWithPunctuatedWatermarks<Object[]> {
/**
*
*/
private static final long serialVersionUID = 1L;
#Override
public long extractTimestamp(Object[] element, long previousElementTimestamp) {
// TODO Auto-generated method stub
return (long) element[0];
}
#Override
public Watermark checkAndGetNextWatermark(Object[] lastElement, long extractedTimestamp) {
return new Watermark(extractedTimestamp);
}
}
}
will raise following exception
Exception in thread "main" org.apache.flink.table.api.TableException: The rowtime attribute can only be replace a field with a valid time type, such as Timestamp or Long.
at org.apache.flink.table.api.StreamTableEnvironment$$anonfun$validateAndExtractTimeAttributes$1.apply(StreamTableEnvironment.scala:450)
at org.apache.flink.table.api.StreamTableEnvironment$$anonfun$validateAndExtractTimeAttributes$1.apply(StreamTableEnvironment.scala:440)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
at org.apache.flink.table.api.StreamTableEnvironment.validateAndExtractTimeAttributes(StreamTableEnvironment.scala:440)
at org.apache.flink.table.api.StreamTableEnvironment.registerDataStreamInternal(StreamTableEnvironment.scala:401)
at org.apache.flink.table.api.java.StreamTableEnvironment.fromDataStream(StreamTableEnvironment.scala:88)
at com.taiwanmobile.cep.noc.TumblingWindow.main(TumblingWindow.java:53)
But if I delete statusCodeCount in fromDataStream, this sample runs successfully without Exception.
Table table = tableEnv.fromDataStream(stream2,
"urlKey,httpGetMessageCount,httpPostMessageCount" + ",uplink,downlink,statusCode,statusCodeCount,rowtime.rowtime");
table.window(Tumble.over("1.hours").on("rowtime").as("w")).groupBy("w, urlKey")
.select("w.start,urlKey,uplink.sum,downlink.sum,httpGetMessageCount.sum,httpPostMessageCount.sum ");
Any suggestion?
This is bug that is filed as FLINK-6881. As a workaround you could define your own StreamTableSource that implements DefinedRowtimeAttribute (see also this documentation draft). A table source also nicely hides the underlying DataStream API which makes table programs more compact.

How to write a "Custom Source" on Apache Flink

I wanna to write a DataSource which is DataStream from Tarantool-java https://github.com/tarantool/tarantool-java.
can anybody give me a guide on how to write DataSource by User-defined.
this is my code here:
package tarantooljava.streaming.flink_connecter;
import org.apache.flink.configuration.Configuration;
import org.tarantool.TarantoolConnection16;
import org.tarantool.TarantoolConnection16Impl;
import splunk.test.TestSchema;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import static java.util.Objects.requireNonNull;
/**
* Created by jaryzhen on 16/4/19.
*/
public class FlinkTarantoolJavaSpace<T> extends FlinkTarantoolJavaSpaceBase<T>{
private ConsumerThread<T> consumerThread;
public FlinkTarantoolJavaSpace(String ip, int port, String user, String pwd) throws IOException {
FlinkTarantoolJavaSpace(ip,port,user,pwd,11);
}
public List<T> FlinkTarantoolJavaSpace(String ip, int port, String user, String pwd, int a) throws IOException {
requireNonNull(ip, "topics");
TarantoolConnection16 con = new TarantoolConnection16Impl(ip, port);
con.auth(user, pwd);
final TestSchema schema = con.schema(new TestSchema());
List select0 = null;
for (int i=0 ; i <100 ; i=i+2) {
select0 = con.select(schema.tester.id, schema.tester.primary, Arrays.asList(i), 0, 30, 0);
//System.out.println("select0:" +i+ select0);
}
// System.out.println(a.size());
// System.out.println(a.get(0));
con.close();
return select0;
}
#Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
}
#Override
public void run(SourceContext<T> sourceContext) throws Exception {
consumerThread = new ConsumerThread<>(this, sourceContext);
}
#Override
public void cancel() {
// set ourselves as not running
boolean running = false;
if(true) {
} else {
// the consumer thread is not running, so we have to interrupt our own thread
}
}
#Override
public void close() throws Exception {
cancel();
super.close();
}
// ------------------------------------------------------------------------
// Checkpoint and restore
// ----------------------------------------------------------------
private static class ConsumerThread<T> extends Thread {
private FlinkTarantoolJavaSpace<T> flinConsumer;
private SourceContext<T> sourceContext;
private boolean running = true;
public ConsumerThread(FlinkTarantoolJavaSpace<T> flinkConsumer, SourceContext<T> sourceContext) {
this.sourceContext = sourceContext;
this.flinkConsumer=flinkConsumer;
}
#Override
public void run() {
}
/**
* Try to shutdown the thread
*/
public void shutdown() {
this.running = false;
}
}
}

Resources