I have the following simple flink application running within IDE, and I do a checkpoint every 5 seconds, and would like to write the checkpoint data into directory file:///d:/applog/out/mycheckpoint/, but after running for a while, i stop the application,but I didn't find anything under the directory file:///d:/applog/out/mycheckpoint/
The code is:
import java.util.Date
import io.github.streamingwithflink.util.DateUtil
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor}
import org.apache.flink.api.scala._
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.runtime.state.{FunctionInitializationContext, FunctionSnapshotContext}
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
object SourceFunctionExample {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(4)
env.getCheckpointConfig.setCheckpointInterval(5 * 1000)
env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION)
env.setStateBackend(new FsStateBackend("file:///d:/applog/out/mycheckpoint/"))
val numbers: DataStream[Long] = env.addSource(new ReplayableCountSource)
numbers.print()
env.execute()
}
}
class ReplayableCountSource extends SourceFunction[Long] with CheckpointedFunction {
var isRunning: Boolean = true
var cnt: Long = _
var offsetState: ListState[Long] = _
override def run(ctx: SourceFunction.SourceContext[Long]): Unit = {
while (isRunning && cnt < Long.MaxValue) {
ctx.getCheckpointLock.synchronized {
// increment cnt
cnt += 1
ctx.collect(cnt)
}
Thread.sleep(200)
}
}
override def cancel(): Unit = isRunning = false
override def snapshotState(snapshotCtx: FunctionSnapshotContext): Unit = {
println("snapshotState is called at " + DateUtil.format(new Date) + s", cnt is ${cnt}")
// remove previous cnt
offsetState.clear()
// add current cnt
offsetState.add(cnt)
}
override def initializeState(initCtx: FunctionInitializationContext): Unit = {
// obtain operator list state to store the current cnt
val desc = new ListStateDescriptor[Long]("offset", classOf[Long])
offsetState = initCtx.getOperatorStateStore.getListState(desc)
// initialize cnt variable from the checkpoint
val it = offsetState.get()
cnt = if (null == it || !it.iterator().hasNext) {
-1L
} else {
it.iterator().next()
}
println("initializeState is called at " + DateUtil.format(new Date) + s", cnt is ${cnt}")
}
}
I tested the application on Windows and Linux and in both cases the checkpoint files were created as expected.
Note that the program keeps running if a checkpoint fails, for example due to some permission errors or invalid path.
Flink logs a WARN message with the exception that caused the checkpoint to fail.
Related
I am using Flink 1.12.0 and have a data collection and use that to try out the event time group window.Following is the full code.
package org.example.sqlexploration
import java.sql.Timestamp
import java.text.SimpleDateFormat
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.table.api.{AnyWithOperations, FieldExpression}
import org.apache.flink.table.api.bridge.scala._
import org.apache.flink.types.Row
import org.example.sqlexploration.Implicits.String2Timestamp
case class MyStock(id: String, event_time: Timestamp, price: Int)
object Implicits {
implicit class String2Timestamp(strDate: String) {
def ts = {
val milli = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").parse(strDate).getTime
new Timestamp(milli)
}
}
}
object Main {
def main(args: Array[String]): Unit = {
val elements = Seq(
MyStock("id1", "2020-01-04 11:36:10".ts, 1),
MyStock("id1", "2020-01-04 11:36:15".ts, 2),
MyStock("id1", "2020-01-04 11:36:13".ts, 4),
MyStock("id1", "2020-01-04 11:36:18".ts, 8),
MyStock("id1", "2020-01-04 11:36:12".ts, 16)
)
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val ds: DataStream[MyStock] = env.fromCollection(elements).assignTimestampsAndWatermarks(new AssignerWithPunctuatedWatermarks[MyStock] {
var max_seen = Long.MinValue
override def checkAndGetNextWatermark(stock: MyStock, l: Long): Watermark = {
val ts = stock.event_time.getTime
if (max_seen < ts) {
max_seen = ts
}
new Watermark(max_seen - 2000) //allow 2 seconds lateness
}
override def extractTimestamp(stock: MyStock, l: Long): Long = stock.event_time.getTime
})
val tenv = StreamTableEnvironment.create(env)
tenv.createTemporaryView("sourceView", ds, $"id", $"price", $"event_time".rowtime() as "rt")
val sql =
"""
select id,
sum(price) as total_price,
tumble_start(rt, interval '2' second) as proc_start,
tumble_end(rt, interval '2' second) as proc_end
from sourceView
group by id, tumble(rt, interval '2' second)
""".stripMargin(' ')
tenv.sqlQuery(sql).toAppendStream[Row].print()
env.execute()
}
}
In my application, I have set parallism to be 1 and use AssignerWithPunctuatedWatermarks implementation allowing 2 seconds lateness. The tumble event time window is 2 seconds interval,
The result output is:
id1,1,2020-01-04T03:36:10,2020-01-04T03:36:12
id1,4,2020-01-04T03:36:12,2020-01-04T03:36:14
id1,2,2020-01-04T03:36:14,2020-01-04T03:36:16
id1,8,2020-01-04T03:36:18,2020-01-04T03:36:20
I don't understand why id1,4,2020-01-04T03:36:12,2020-01-04T03:36:14 is contained in the result.
The event that lead to the above window creation is: MyStock("id1", "2020-01-04 11:36:13".ts, 4) . It is late because the watermark reaches 2020-01-04 11:36:13. Isn't the event excluded when the event time equals to the watermark?
I am reading data from Kafka and trying to write it to the HDFS file system in ORC format. I have used the below link reference from their official website. But I can see that Flink write exact same content for all data and make so many files and all files are ok 103KB
https://ci.apache.org/projects/flink/flink-docs-release-1.11/dev/connectors/streamfile_sink.html#orc-format
Please find my code below.
object BeaconBatchIngest extends StreamingBase {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
def getTopicConfig(configs: List[Config]): Map[String, String] = (for (config: Config <- configs) yield (config.getString("sourceTopic"), config.getString("destinationTopic"))).toMap
def setKafkaConfig():Unit ={
val kafkaParams = new Properties()
kafkaParams.setProperty("bootstrap.servers","")
kafkaParams.setProperty("zookeeper.connect","")
kafkaParams.setProperty("group.id", DEFAULT_KAFKA_GROUP_ID)
kafkaParams.setProperty("auto.offset.reset", "latest")
val kafka_consumer:FlinkKafkaConsumer[String] = new FlinkKafkaConsumer[String]("sourceTopics", new SimpleStringSchema(),kafkaParams)
kafka_consumer.setStartFromLatest()
val stream: DataStream[DataParse] = env.addSource(kafka_consumer).map(new temp)
val schema: String = "struct<_col0:string,_col1:bigint,_col2:string,_col3:string,_col4:string>"
val writerProperties = new Properties()
writerProperties.setProperty("orc.compress", "ZLIB")
val writerFactory = new OrcBulkWriterFactory(new PersonVectorizer(schema),writerProperties,new org.apache.hadoop.conf.Configuration);
val sink: StreamingFileSink[DataParse] = StreamingFileSink
.forBulkFormat(new Path("hdfs://warehousestore/hive/warehouse/metrics_test.db/upp_raw_prod/hour=1/"), writerFactory)
.build()
stream.addSink(sink)
}
def main(args: Array[String]): Unit = {
setKafkaConfig()
env.enableCheckpointing(5000)
env.execute("Kafka_Flink_HIVE")
}
}
class temp extends MapFunction[String,DataParse]{
override def map(record: String): DataParse = {
new DataParse(record)
}
}
class DataParse(data : String){
val parsedJason = parse(data)
val timestamp = compact(render(parsedJason \ "timestamp")).replaceAll("\"", "").toLong
val event = compact(render(parsedJason \ "event")).replaceAll("\"", "")
val source_id = compact(render(parsedJason \ "source_id")).replaceAll("\"", "")
val app = compact(render(parsedJason \ "app")).replaceAll("\"", "")
val json = data
}
class PersonVectorizer(schema: String) extends Vectorizer[DataParse](schema) {
override def vectorize(element: DataParse, batch: VectorizedRowBatch): Unit = {
val eventColVector = batch.cols(0).asInstanceOf[BytesColumnVector]
val timeColVector = batch.cols(1).asInstanceOf[LongColumnVector]
val sourceIdColVector = batch.cols(2).asInstanceOf[BytesColumnVector]
val appColVector = batch.cols(3).asInstanceOf[BytesColumnVector]
val jsonColVector = batch.cols(4).asInstanceOf[BytesColumnVector]
timeColVector.vector(batch.size + 1) = element.timestamp
eventColVector.setVal(batch.size + 1, element.event.getBytes(StandardCharsets.UTF_8))
sourceIdColVector.setVal(batch.size + 1, element.source_id.getBytes(StandardCharsets.UTF_8))
appColVector.setVal(batch.size + 1, element.app.getBytes(StandardCharsets.UTF_8))
jsonColVector.setVal(batch.size + 1, element.json.getBytes(StandardCharsets.UTF_8))
}
}
With bulk formats (such as ORC), the StreamingFileSink rolls over to new files with every checkpoint. If you reduce the checkpointing interval (currently 5 seconds), it won't write so many files.
I am trying to make a link between a Python script and a simulation running on UnetSim.
I want to send messages from Python and receive them in UnetStack using a Python_Agent.groovy that I created and added to container of each node in the simulation. I also want to do the reverse.
I used the fjage documentation (https://buildmedia.readthedocs.org/media/pdf/fjage/dev/fjage.pdf) to help me. The problem is that in gateway class Services there is not the PYTHON_AGENT Service that I created. I can understand that as my enum Services does not modified class Services where there are NODE_INFO, PHYSICAL etc...
My question is then how the example in the documentation 1.6.3 works ? And is it applicable to my case ?
Here is my code :
PythonSocketExample.py
from unetpy import *
from fjagepy import *
import socket
node_address = '001'
host = socket.gethostname()
sock = UnetSocket(host, int(node_address) + 1100)
gw = sock.getGateway()
py_agent = gw.agentForService(Services.PYTHON_AGENT)
py_agent << DatagramReq(data = '$A001')
rsp = py_agent.receive()
print (rsp)
UnetSimulation.groovy
//! Simulation : Initialisation python
import org.arl.fjage.RealTimePlatform
import org.arl.fjage.*
import org.arl.unet.*
import org.arl.unet.phy.*
import org.arl.unet.sim.*
import org.arl.unet.sim.channels.*
import static org.arl.unet.Services.*
import static org.arl.unet.phy.Physical.*
//import java.net.ServerSocket
///////////////////////////////////////////////////////////////////////////////
// simulation settings
platform = RealTimePlatform // use real-time mode
///////////////////////////////////////////////////////////////////////////////
// channel and modem settings
channel = [
model: ProtocolChannelModel,
soundSpeed: 1500.mps,
communicationRange: 3.km,
interferenceRange: 3.km
]
modem.model = USMARTModem
modem.dataRate = [640.bps, 6400.bps]
modem.frameLength = [16.bytes, 64.bytes]
modem.powerLevel = [0.dB, -10.dB]
modem.headerLength = 0
modem.preambleDuration = 0
modem.txDelay = 0
///////////////////////////////////////////////////////////////////////////////
// nodes settings and geometry
def beacons = 2..4 // 3 anchors from 2 to 4
def sensors = 5..104 // 100 sensors from 5 to 104
def nodeLocation = [:]
def D = 4000.m // side of the simulation area
def L = 400.m // distance between two node
nodeLocation[1] = [D/2-L, D/2 -L, -5.m] //masterAnchor
nodeLocation[2] = [D/2+L, D/2 -L, -5.m]
nodeLocation[3] = [D/2, D/2+L, -5.m]
nodeLocation[4] = [D/2, D/2, -500.m]
sensors.each { myAddr ->
nodeLocation[myAddr] = [rnd(0, D), rnd(0, D), rnd(-480.m, -500.m)]
}
///////////////////////////////////////////////////////////////////////////////
// simulation details
simulate {
node '1', address: 1, location: nodeLocation[1], web: 8101, api: 1101, shell: true, stack: {
container -> container.add 'py_agent' + 1, new Python_Agent()
}
beacons.each { myAddr ->
def myNode = node("${myAddr}", address: myAddr, location: nodeLocation[myAddr], web: 8100 + myAddr , api: 1100 + myAddr,
stack: {container ->
container.add 'py_agent' + myAddr, new Python_Agent()})
}
sensors.each { myAddr ->
def myNode = node("${myAddr}", address: myAddr, location: nodeLocation[myAddr], web: 8100 + myAddr, api: 1100 + myAddr,
stack: {container ->
container.add 'py_agent' + myAddr, new Python_Agent()})
}
Python_Agent.groovy
import org.arl.fjage.*
import org.arl.unet.*
enum Services {
PYTHON_AGENT
}
class Python_Agent extends UnetAgent {
String fromNode;
String toNode;
String toClient;
def nodeInfo;
def myLocation;
def myAddress;
def phy;
void setup() {
register Services.PYTHON_AGENT
}
void startup() {
// TODO
nodeInfo = agentForService(Services.NODE_INFO)
myLocation = nodeInfo.location
myAddress = nodeInfo.address
println('pyAgent ' + myAddress + ' works')
}
void processMessage(Message msg) {
if (msg instanceof DatagramNtf /*&& msg.protocol == NODE_STATUS_PROTOCOL*/) {
println("Node "+ myAddress+ ' receiving ' + msg.text +' from ' + msg.from +" protocol is "+ msg.protocol)
toNode = phy.energy
}
}
}
The first error that i get is :
1 error
org.codehaus.groovy.control.MultipleCompilationErrorsException: startup failed:
C:\Users\mathi\OneDrive\Bureau\unet-3.0.0\FakeModem\python.groovy: 85: Enum constructor calls are only allowed inside the enum class
. At [85:50] # line 85, column 50.
container.add 'py_agent' + 1, new Python
^
then if I comment the enum part and modifie the setup part, the simulation works
void setup() {
register 'PYTHON_AGENT'
}
When I run PythonSocketExample.py, I get the error
Traceback (most recent call last):
File "PythonSocketExample.py", line 11, in <module>
py_agent = gw.agentForService(Services.PYTHON_AGENT)
AttributeError: type object 'Services' has no attribute 'PYTHON_AGENT'
The end of the log on UnetStack is here:
1582820223131 INFO Python_Agent/84#1903:println pyAgent 84 works
1582820223132 INFO Python_Agent/39#1633:println pyAgent 39 works
1582820415798 INFO org.arl.unet.sim.SimulationMasterContainer#48:connected Incoming connection tcp:///137.195.214.230:1101//137.195.214.230.62913
1582820415875 INFO org.arl.unet.sim.SimulationMasterContainer#2131:connectionClosed Connection tcp:///137.195.214.230:1101//137.195.214.230.62913 closed
Thank you for your help
EDIT
Thanks to your message and some research I am now able to send and receive message between UnetStack and Python by using MessageBehavior and GenericMessage.
I want my simulation to receive more than one message but as my add new MessageBehavior is in startup() of my PythonAgent.groovy I need as much add new MessageBehavior as message that I send.
I tested to put it on a processMessage(Message msg) but it seems this method does not recognize GenericMessage().
The question could be how to use MessageBehavior more than one time...
Here is my code :
Python
ping_test.py
# Need to run the script two times. First time UnetStack don't get the Message
# I don't know where the bug come from
serport = serial.Serial()
## SET SELF ADDRESS
nodeID = '001'
nodeID_ping = '002'
command = b'$A' + nodeID.encode()
serport.write(command)
ack_msg = serport.readline()
print('ack_msg : ', ack_msg)
ping_command = b'$P' + nodeID_ping.encode()
serport.write(ping_command)
ack_msg = serport.readline()
print('ack_msg :', ack_msg)
rsp_msg = serport.readline()
print('rsp_msg :', rsp_msg)
FakeSerial_V2.py
from unetpy import *
import socket
import clientSocket
# a Serial class emulator
class Serial:
## init(): the constructor. Many of the arguments have default values
# and can be skipped when calling the constructor.
def __init__( self, port='5000', baudrate = 19200, timeout=1, write_timeout=1,
bytesize = 8, parity = 'N', stopbits = 1, xonxoff=0,
rtscts = 0):
self.last_instruction = ''
self.nodeID = ''
self.remote_nodeID = ''
self.command = ''
self._isOpen = True
self._receivedData = ''
self._data = 'It was the best of times.\nIt was the worst of times.\n'
self.phy = ''
self.pySocket = ''
## write()
# writes a string of characters to the Arduino
def write( self, string):
self.command = string.decode()
_type = None
print( 'FakeSerial got: ' + self.command)
# SET_ADDRESS
if (self.command[0:2] == '$A' and len(self.command) == 5):
_type = 'set_address'
self.nodeID = string[2:]
self.pySocket = clientSocket.clientSocket(self.nodeID) # initialize the clientSocket class
self.pySocket.sendData(_type) # need to fix the rsp Generic Message on UnetStack
self.last_instruction = 'SET_ADDRESS_INSTRUCTION'
# PING
elif (self.command[0:2] == '$P' and len(self.command) == 5):
_type = 'ping'
to_addr = self.command[2:]
# print(to_addr, type(to_addr))
self.pySocket.sendData(_type, to_addr)
self.last_instruction = "PING_INSTRUCTION"
else:
print("write FAILURE")
## readline()
# reads characters from the fake Arduino until a \n is found.
def readline( self ):
self._receivedData = self.pySocket.receiveData()
return self._receivedData
clientSocket.py
import socket
from unetpy import *
from fjagepy import *
class clientSocket:
def __init__(self, nodeID = '001'):
self.host = socket.gethostname()
self.nodeID = int(nodeID)
self.port = int(nodeID) + 1100
self.sock = UnetSocket(self.host, self.port)
self.gw = self.sock.getGateway()
self.pyagent = 'pyagent' + str(self.nodeID)
def sendData(self, _type, to_addr = '000' , data = 'None'):
IDreq = 1
# gmsg = GenericMessage(perf = Performative.REQUEST, recipient = pyagent)
# gmsg.IDreq = IDreq
# self.gw.send(gmsg)
IDreq = IDreq + 1
gmsg2 = GenericMessage(perf = Performative.REQUEST, recipient = self.pyagent)
gmsg2.type = _type
gmsg2.from_addr = self.nodeID
gmsg2.to_addr = int(to_addr)
gmsg2.data = data
gmsg2.IDreq = IDreq
self.gw.send(gmsg2)
IDreq = 0
def receiveData( self ):
rgmsg = self.gw.receive(GenericMessage, 4000)
print ('UnetStack state :', rgmsg.state)
# print ('rsp :', rgmsg.data)
# print('Ping time is', rgmsg.time_ping, 'ms')
return rgmsg.data
Groovy
sim1.groovy
import org.arl.fjage.RealTimePlatform
import org.arl.fjage.*
import org.arl.unet.*
import org.arl.unet.sim.channels.*
platform = RealTimePlatform // use real-time mode
///////////////////////////////////////////////////////////////////////////////
// channel and modem settings
channel = [
model: ProtocolChannelModel,
soundSpeed: 1500.mps,
communicationRange: 3.km,
interferenceRange: 3.km
]
modem.model = USMARTModem
modem.dataRate = [640.bps, 6400.bps]
modem.frameLength = [16.bytes, 64.bytes]
modem.powerLevel = [0.dB, -10.dB]
modem.headerLength = 0
modem.preambleDuration = 0
modem.txDelay = 0
simulate {
node '1', address: 1, web: 8101, api: 1101, stack: {
container -> container.add 'pyagent1', new PythonAgent()
}
node '2', address: 2,location: [500.m ,500.m, -500.m], web: 8102, api: 1102, stack: {
container -> container.add 'pyagent2', new PythonAgent()
}
}
PythonAgent.groovy
import org.arl.fjage.*
import org.arl.unet.*
import org.arl.unet.phy.RxFrameNtf
import org.arl.unet.phy.TxFrameNtf
class PythonAgent extends UnetAgent {
final static int PING_PROTOCOL = 10;
final static int NODE_STATUS_PROTOCOL = 11;
final static int BROADCAST_PROTOCOL = 12;
final static int UNICAST_PROTOCOL = 13;
final static int UNICAST_ACK_PROTOCOL = 14;
final static int TEST_MSG_PROTOCOL = 15;
final static int ECHO_PROTOCOL = 16;
final static int QUALITY_PROTOCOL = 17;
def nodeInfo;
def phy;
def myLocation;
def myAddress;
def IDreq = 0;
def time_ping = null;
def function_state = null;
def data_to_py = null;
void startup() {
println(agentID.name + ' running')
nodeInfo = agentForService Services.NODE_INFO
phy = agentForService Services.PHYSICAL
myLocation = nodeInfo.location
myAddress = nodeInfo.address
subscribe topic(phy)
add new MessageBehavior(GenericMessage, { req ->
println("In PythonAgent::MessageBehavior req ="+req)
if (req.performative) println("req.performative is " + req.performative)
else println("req.performative is null")
def ack = new GenericMessage(req, Performative.INFORM)
def rsp = new GenericMessage(req, Performative.INFORM)
println('IDreq = ' + req.IDreq)
if ((req.performative == Performative.REQUEST) && (req.IDreq == 2)) {
// IDreq = req.IDreq
// println('IDreq = ' + IDreq)
//log.info "Generic message request of type ${req.type}"
function_state = 'None';
data_to_py = 'None';
switch (req.type) {
case 'set_address':
println("Handling set_address")
ack.state = "Handling set_address"
ack.data = '#A' + corrected_address(myAddress);
send ack;
rsp.data = ack.data; break;
}
}
})
add new MessageBehavior(GenericMessage, { req ->
println("In PythonAgent::MessageBehavior req ="+req)
if (req.performative) println("req.performative is " + req.performative)
else println("req.performative is null")
def ack = new GenericMessage(req, Performative.INFORM)
def rsp = new GenericMessage(req, Performative.INFORM)
println('IDreq = ' + req.IDreq)
if ((req.performative == Performative.REQUEST) && (req.IDreq == 2)) {
// IDreq = req.IDreq
// println('IDreq = ' + IDreq)
//log.info "Generic message request of type ${req.type}"
function_state = 'None';
data_to_py = 'None';
switch (req.type) {
case 'set_address':
println("Handling set_address")
ack.state = "Handling set_address"
ack.data = '#A' + corrected_address(myAddress);
send ack;
rsp.data = ack.data; break;
case 'loc':
//println("Handling localisation request");
sendUPSBeacon(); break;
case 'ping':
println("Handling ping request");
ack.state = "Handling ping request"; ack.data = '$P' + corrected_address(req.to_addr);
send ack;
ping(req.to_addr);
rsp.time_ping = time_ping; break;
case 'exe':
//println("Handling exe request");
exe(); break;
case 'sense':
//println("Handling sense request");
sense(); break;
default: println "Unknown request";
}
//println "In USMARTBaseAnchorDaemon::MessageBehavior, just after exe"
rsp.state = function_state
rsp.data = data_to_py
println "In PythonAgent::MessageBehavior, rsp is " + rsp
send rsp
}
})
}
void ping(to_addr) {
println "Pinging ${to_addr} at ${nanoTime()}"
DatagramReq req = new DatagramReq(to: to_addr, protocol: PING_PROTOCOL)
phy << req
def txNtf = receive(TxFrameNtf, 10000) // TO-DO:check protocol
def rxNtf = receive({ it instanceof RxFrameNtf && it.from == req.to}, 10000)
if (txNtf && rxNtf && rxNtf.from == req.to) {
time_ping = (rxNtf.rxTime-txNtf.txTime)/1000 //in ms
println("Response from ${rxNtf.from}: ")
println("rxTime=${rxNtf.rxTime}")
println("txTime=${txNtf.txTime}")
println("Response from ${rxNtf.from}: time = ${time_ping}ms")
function_state = 'Ping processed'
data_to_py = "#R" + corrected_address(to_addr) + 'T' + rxNtf.data
}
else {
function_state = 'Ping Request timeout'
println (function_state)
}
}
#Override
void processMessage(Message msg) {
// pong
if (msg instanceof DatagramNtf && msg.protocol == PING_PROTOCOL) {
println("pong : Node "+ myAddress + ' from ' + msg.from +" protocol is "+ msg.protocol)
send new DatagramReq(recipient: msg.sender, to: msg.from, data: phy.energy as byte[], protocol: PING_PROTOCOL)
println ('processMessage energy : ' + phy.energy)
}
}
String corrected_address(address) {
address = address.toString()
if (address.size() == 1) address = '00' + address
if (address.size() == 2) address = '0' + address
return address
}
}
USMARTModem.groovy
import org.arl.fjage.Message
import org.arl.unet.sim.HalfDuplexModem
import org.arl.fjage.*
import org.arl.unet.*
import org.arl.unet.phy.*
import org.arl.unet.sim.*
import org.arl.unet.sim.channels.*
import static org.arl.unet.Services.*
import static org.arl.unet.phy.Physical.*
/*
Ptx= V*Itx //power consumed in transmission in watt
Prx = V*Irx //power consumed in receiving packets in watt
Etx = Math.floor(avgSent)*(Ptx*0.3675)
energyAll = (Math.floor(avgSent)*(Ptx*0.3675)) + (Math.floor(avgReceived)*(Prx*0.3675)) // total energy consumed for all the packets sent and received throughout the simulation
// EtxSubset = Math.floor(avgTxCountNs)*(Ptx*0.3675) // energy consumed in transmitiing 25% of packets in Joul
bytesDelivered = Math.floor(avgReceived)* modem.frameLength[1]
JPerBit = energyAll/(bytesDelivered * 8)
*/
//Duration of data packet in seconds = data packet size (in bits)/bandwidth (in bps) = (15*8)/50000 = 0.0024
class USMARTModem extends HalfDuplexModem {
static final def txPower = -17.dB
static final def acousticDataRate = 640.bps
static final def payLoadSize = 5.bytes
static final def headerDuration = (30+75+200)/1000 //in seconds --> in our case nanomodem v3 provides us with the header (in ms) to add to the actual payload in the frame length.. refer to the modem datasheet
static final def V = 5 // supply voltage in volt
static final def Itx = 0.3, Irx = 0.005, Iidle = 0.0025 //current in Am
float payLoadDuration = (payLoadSize*8)/acousticDataRate //in seconds
float dataPacketDuration = payLoadDuration +headerDuration //in seconds
float energy = 2000 //initial energy in Joule
float test = energy+1
float Ptx = V*Itx, Prx=V*Irx, Pidle = V*Iidle //power in watt
float totalEtx =0
float totalErx =0
float totalEidle =0
float totalEnergyConsumed =0
float Etx = Ptx * dataPacketDuration //Energy in Joul
float Erx = Prx * dataPacketDuration
float Eidle = Pidle * dataPacketDuration
// float power = 0.00001995262315 //in watt (-17 in db=10log(p/1mw) .. so p = 10to the power -1.7 = 0.00001995262315
// BigDecimal Ptx = (Math.pow(10.0,(txPower/10) ))/1000 //????
// BigDecimal Etx= Ptx *((frameLength[1]*8)/640) // This is consumed energy (in transmission) Etx = Ptx*time it takes to tramsnit tha packet
//float Etx =10
#Override
boolean send(Message m) {
if (m instanceof TxFrameNtf)
{
energy -= Etx// Remaining energy
totalEtx += Etx //total energy consumed in tx
}
if (m instanceof RxFrameNtf)
{
energy -= Erx // Remaining energy
totalErx += Erx //total energy consumed in rx
}
if(!busy)
{
energy-= Eidle //Remaining energy
totalEidle += Eidle //total energy consumed while Eidle
}
totalEnergyConsumed = totalEtx+totalErx+totalEidle
return super.send(m)
}
}
Sorry for the very very long post...I think everything was necessary to understand the code
A few problems in your original code:
You don't need to create a service, since you can address the agent by its name. This should be sufficient for your example here.
To process a request (DatagramReq from your Python code), you should override the processRequest() method in the agent.
Here's a simplified example based on your original code:
PythonAgent.groovy:
import org.arl.fjage.*
import org.arl.unet.*
class PythonAgent extends UnetAgent {
void startup() {
println('pyAgent running')
}
#Override
Message processRequest(Message msg) {
if (msg instanceof DatagramReq) {
println('Got a DatagramNtf')
// do whatever you want with the request
return new Message(msg, Performative.AGREE)
}
return null
}
}
sim1.groovy:
import org.arl.fjage.RealTimePlatform
platform = RealTimePlatform // use real-time mode
simulate {
node '1', address: 1, web: 8101, api: 1101, stack: {
container -> container.add 'pyagent', new PythonAgent()
}
}
and test1.py:
from unetpy import *
from fjagepy import *
sock = UnetSocket('localhost', 1101) # node 1's API port as per sim script
gw = sock.getGateway()
pyagent = gw.agent('pyagent') # agent name as per sim script
rsp = pyagent << DatagramReq()
print(rsp)
Thank you, I did not knew I needed to #Override. I still have a question, how can I put data in my DatagramReq that I can extract in UnetStack ?
I tried this as a first solution looking at the Handbook but it doesn't works..
PythonAgent.groovy
import org.arl.fjage.*
import org.arl.unet.*
class PythonAgent extends UnetAgent {
void startup() {
println('pyAgent running')
}
#Override
Message processRequest(Message msg) {
if (msg instanceof DatagramReq) {
println('Got a DatagramNtf')
println(msg.data)
// do whatever you want with the request
return new Message(msg, Performative.AGREE)
}
return null
}
}
test1.py
from unetpy import *
from fjagepy import *
sock = UnetSocket('localhost', 1101) # node 1's API port as per sim script
gw = sock.getGateway()
pyagent = gw.agent('pyagent') # agent name as per sim script
rsp1 = pyagent << DatagramReq( data = [42])
rsp2 = pyagent << DatagramReq( data = 'data_string')
print(rsp1, rsp2)
On the Python terminal I will get Agree None. I can transmit an array but not a string ?
The log print
Incoming connection tcp:///127.0.0.1:1101//127.0.0.1.51208
1583166206032 INFO PythonAgent/1#2643:println Got a DatagramNtf
1583166206032 INFO PythonAgent/1#2643:println [B#4e3d88df
1583166206033 WARNING org.arl.fjage.remote.ConnectionHandler#2670:run Bad JSON request: java.lang.IllegalArgumentException: Illegal base64 character 5f in {"action": "send", "relay": true, "message": { "clazz": "org.arl.unet.DatagramReq", "data": {"msgID":"492ac9dd-c2bf-4c0c-9198-3b32fb416f33","perf":"REQUEST","recipient":"pyagent","sender":"PythonGW-c8e66e0f-b5d5-433b-bfa9-09be708ab4c9","inReplyTo":null,"data":"data_string"} }}
1583166207081 INFO org.arl.unet.sim.SimulationMasterContainer#2670:connectionClosed Connection tcp:///127.0.0.1:1101//127.0.0.1.51208 closed
[B#4e3d88df correspond to [42] but I don't know how to decode it. And in fact I am more interested in sending string than array. I have a track about using PDU but how could it works with Python ?
I am experimenting with how to propagate back-pressure correctly when I have ConnectedStreams as part of my computation graph. The problem is: I have two sources and one ingests data faster than the other, think we want to replay some data and one source has rare events that we use to enrich the other source. These two sources are then connected in a stream that expects them to be at least somewhat synchronized, merges them together somehow (making tuple, enriching, ...) and returns a result.
With single input streams its fairly easy to implement backpressure, you simply have to spend long time in the processElement function. With connectedstreams my initial idea was to have some logic in each of the processFunctions that waits for the other stream to catch up. For example I could have a buffer thats time-span limited (large enough span to fit a watermark) and the function would not accept events that would make this span pass a threshold. For example:
leftLock.aquire { nonEmptySignal =>
while (queueSpan() > capacity.toMillis && lastTs() < ctx.timestamp()) {
println("WAITING")
nonEmptySignal.await()
}
queueOp { queue =>
println(s"Left Event $value recieved ${Thread.currentThread()}")
queue.add(Left(value))
}
ctx.timerService().registerEventTimeTimer(value.ts)
}
Full code of my example is below (its written with two locks assuming access from two different threads, which is not the case - i think):
import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
import java.util.concurrent.locks.{Condition, ReentrantLock}
import scala.collection.JavaConverters._
import com.google.common.collect.MinMaxPriorityQueue
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation}
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment
import org.apache.flink.streaming.api.functions.co.CoProcessFunction
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.util.Collector
import scala.collection.mutable
import scala.concurrent.duration._
trait Timestamped {
val ts: Long
}
case class StateObject(ts: Long, state: String) extends Timestamped
case class DataObject(ts: Long, data: String) extends Timestamped
case class StatefulDataObject(ts: Long, state: Option[String], data: String) extends Timestamped
class DataSource[A](factory: Long => A, rate: Int, speedUpFactor: Long = 0) extends RichSourceFunction[A] {
private val max = new AtomicLong()
private val isRunning = new AtomicBoolean(false)
private val speedUp = new AtomicLong(0)
private val WatermarkDelay = 5 seconds
override def cancel(): Unit = {
isRunning.set(false)
}
override def run(ctx: SourceFunction.SourceContext[A]): Unit = {
isRunning.set(true)
while (isRunning.get()) {
val time = System.currentTimeMillis() + speedUp.addAndGet(speedUpFactor)
val event = factory(time)
ctx.collectWithTimestamp(event, time)
println(s"Event $event sourced $speedUpFactor")
val watermark = time - WatermarkDelay.toMillis
if (max.get() < watermark) {
ctx.emitWatermark(new Watermark(time - WatermarkDelay.toMillis))
max.set(watermark)
}
Thread.sleep(rate)
}
}
}
class ConditionalOperator {
private val lock = new ReentrantLock()
private val signal: Condition = lock.newCondition()
def aquire[B](func: Condition => B): B = {
lock.lock()
try {
func(signal)
} finally {
lock.unlock()
}
}
}
class BlockingCoProcessFunction(capacity: FiniteDuration = 20 seconds)
extends CoProcessFunction[StateObject, DataObject, StatefulDataObject] {
private type MergedType = Either[StateObject, DataObject]
private lazy val leftLock = new ConditionalOperator()
private lazy val rightLock = new ConditionalOperator()
private var queueState: ValueState[MinMaxPriorityQueue[MergedType]] = _
private var dataState: ValueState[StateObject] = _
override def open(parameters: Configuration): Unit = {
super.open(parameters)
queueState = getRuntimeContext.getState(new ValueStateDescriptor[MinMaxPriorityQueue[MergedType]](
"event-queue",
TypeInformation.of(new TypeHint[MinMaxPriorityQueue[MergedType]]() {})
))
dataState = getRuntimeContext.getState(new ValueStateDescriptor[StateObject](
"event-state",
TypeInformation.of(new TypeHint[StateObject]() {})
))
}
override def processElement1(value: StateObject,
ctx: CoProcessFunction[StateObject, DataObject, StatefulDataObject]#Context,
out: Collector[StatefulDataObject]): Unit = {
leftLock.aquire { nonEmptySignal =>
while (queueSpan() > capacity.toMillis && lastTs() < ctx.timestamp()) {
println("WAITING")
nonEmptySignal.await()
}
queueOp { queue =>
println(s"Left Event $value recieved ${Thread.currentThread()}")
queue.add(Left(value))
}
ctx.timerService().registerEventTimeTimer(value.ts)
}
}
override def processElement2(value: DataObject,
ctx: CoProcessFunction[StateObject, DataObject, StatefulDataObject]#Context,
out: Collector[StatefulDataObject]): Unit = {
rightLock.aquire { nonEmptySignal =>
while (queueSpan() > capacity.toMillis && lastTs() < ctx.timestamp()) {
println("WAITING")
nonEmptySignal.await()
}
queueOp { queue =>
println(s"Right Event $value recieved ${Thread.currentThread()}")
queue.add(Right(value))
}
ctx.timerService().registerEventTimeTimer(value.ts)
}
}
override def onTimer(timestamp: Long,
ctx: CoProcessFunction[StateObject, DataObject, StatefulDataObject]#OnTimerContext,
out: Collector[StatefulDataObject]): Unit = {
println(s"Watermarked $timestamp")
leftLock.aquire { leftSignal =>
rightLock.aquire { rightSignal =>
queueOp { queue =>
while (Option(queue.peekFirst()).exists(x => timestampOf(x) <= timestamp)) {
queue.poll() match {
case Left(state) =>
dataState.update(state)
leftSignal.signal()
case Right(event) =>
println(s"Event $event emitted ${Thread.currentThread()}")
out.collect(
StatefulDataObject(
event.ts,
Option(dataState.value()).map(_.state),
event.data
)
)
rightSignal.signal()
}
}
}
}
}
}
private def queueOp[B](func: MinMaxPriorityQueue[MergedType] => B): B = queueState.synchronized {
val queue = Option(queueState.value()).
getOrElse(
MinMaxPriorityQueue.
orderedBy(Ordering.by((x: MergedType) => timestampOf(x))).create[MergedType]()
)
val result = func(queue)
queueState.update(queue)
result
}
private def timestampOf(data: MergedType): Long = data match {
case Left(y) =>
y.ts
case Right(y) =>
y.ts
}
private def queueSpan(): Long = {
queueOp { queue =>
val firstTs = Option(queue.peekFirst()).map(timestampOf).getOrElse(Long.MaxValue)
val lastTs = Option(queue.peekLast()).map(timestampOf).getOrElse(Long.MinValue)
println(s"Span: $firstTs - $lastTs = ${lastTs - firstTs}")
lastTs - firstTs
}
}
private def lastTs(): Long = {
queueOp { queue =>
Option(queue.peekLast()).map(timestampOf).getOrElse(Long.MinValue)
}
}
}
object BackpressureTest {
var data = new mutable.ArrayBuffer[DataObject]()
def main(args: Array[String]): Unit = {
val streamConfig = new Configuration()
val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(streamConfig))
env.getConfig.disableSysoutLogging()
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val stateSource = env.addSource(new DataSource(ts => StateObject(ts, ts.toString), 1000))
val dataSource = env.addSource(new DataSource(ts => DataObject(ts, ts.toString), 100, 100))
stateSource.
connect(dataSource).
keyBy(_ => "", _ => "").
process(new BlockingCoProcessFunction()).
print()
env.execute()
}
}
The problem with connected streams is it seems you cant simply block in one of the processFunctions when its stream is too far ahead, since that blocks the other processFunction aswell. On the other hand if i simply accepted all events in this job eventually the process function would run out of memory. Since it would buffer the whole stream that is ahead.
So my question is: Is it possible to propagate backpressure into each of the streams in ConnectedStreams separately and if so, how? Or alternatively, is there any other nice way to deal with this issue? Possibly all the sources communicating somehow to keep them mostly at the same event-time?
From my reading of the code in StreamTwoInputProcessor, it looks to me like the processInput() method is responsible for implementing the policy in question. Perhaps one could implement a variant that reads from whichever stream has the lower watermark, so long as it has unread input. Not sure what impact that would have overall, however.
Im trying to write a simple akka stream rest endpoint and client for consuming this stream. But then i try to run server and client, client is able to consume only part of stream. I can't see any exception during execution.
Here are my server and client:
import akka.NotUsed
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.common.{EntityStreamingSupport, JsonEntityStreamingSupport}
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport
import akka.stream.{ActorAttributes, ActorMaterializer, Attributes, Supervision}
import akka.stream.scaladsl.{Flow, Source}
import akka.util.ByteString
import spray.json.DefaultJsonProtocol
import scala.io.StdIn
import scala.util.Random
object WebServer {
object Model {
case class Person(id: Int = Random.nextInt(), fName: String = Random.nextString(10), sName: String = Random.nextString(10))
}
object JsonProtocol extends SprayJsonSupport with DefaultJsonProtocol {
implicit val personFormat = jsonFormat(Model.Person.apply, "id", "firstName", "secondaryName")
}
def main(args: Array[String]) {
implicit val system = ActorSystem("my-system")
implicit val materializer = ActorMaterializer()
implicit val executionContext = system.dispatcher
val start = ByteString.empty
val sep = ByteString("\n")
val end = ByteString.empty
import JsonProtocol._
implicit val jsonStreamingSupport: JsonEntityStreamingSupport = EntityStreamingSupport.json()
.withFramingRenderer(Flow[ByteString].intersperse(start, sep, end))
.withParallelMarshalling(parallelism = 8, unordered = false)
val decider: Supervision.Decider = {
case ex: Throwable => {
println("Exception occurs")
ex.printStackTrace()
Supervision.Resume
}
}
val persons: Source[Model.Person, NotUsed] = Source.fromIterator(
() => (0 to 1000000).map(id => Model.Person(id = id)).iterator
)
.withAttributes(ActorAttributes.supervisionStrategy(decider))
.map(p => { println(p); p })
val route =
path("persons") {
get {
complete(persons)
}
}
val bindingFuture = Http().bindAndHandle(route, "localhost", 8080)
println(s"Server online at http://localhost:8080/\nPress RETURN to stop...")
StdIn.readLine()
bindingFuture
.flatMap(_.unbind())
.onComplete(_ => {
println("Stopping http server ...")
system.terminate()
})
}
}
and client:
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpRequest, Uri}
import akka.stream.{ActorAttributes, ActorMaterializer, Supervision}
import scala.util.{Failure, Success}
object WebClient {
def main(args: Array[String]): Unit = {
implicit val system = ActorSystem()
implicit val materializer = ActorMaterializer()
implicit val executionContext = system.dispatcher
val request = HttpRequest(uri = Uri("http://localhost:8080/persons"))
val response = Http().singleRequest(request)
val attributes = ActorAttributes.withSupervisionStrategy {
case ex: Throwable => {
println("Exception occurs")
ex.printStackTrace
Supervision.Resume
}
}
response.map(r => {
r.entity.dataBytes.withAttributes(attributes)
}).onComplete {
case Success(db) => db.map(bs => bs.utf8String).runForeach(println)
case Failure(ex) => ex.printStackTrace()
}
}
}
it works for 100, 1000, 10 000 persons but does not work for > 100 000'.
It looks like there is some limit for stream but i can't find it
Last record has been printed by server on my local machine is (with number 79101):
Person(79101,ⰷ瑐劲죗醂竜泲늎制䠸,䮳硝沢并⎗ᝨᫌꊭᐽ酡)
Last record on client is(with number 79048):
{"id":79048,"firstName":"췁頔䚐龫暀࡙頨捜昗㢵","secondaryName":"⏉ݾ袈庩컆◁ꄹ葪䑥Ϻ"}
Maybe somebody know why it happens?
I found a solution. I have to explicitly add r.entity.withoutSizeLimit() on client and after that all works as expected