akka-http How to use MergeHub to throttle requests from client side - akka-stream

I use Source.queue to queue up HttpRequests and throttle it on the client side to download files from a remote server. I understand that Source.queue is not threadsafe and we need to use MergeHub to make it threadsafe. Following is the piece of code that uses Source.queue and uses cachedHostConnectionPool.
import java.io.File
import akka.actor.Actor
import akka.event.Logging
import akka.http.scaladsl.Http
import akka.http.scaladsl.client.RequestBuilding
import akka.http.scaladsl.model.{HttpResponse, HttpRequest, Uri}
import akka.stream._
import akka.stream.scaladsl._
import akka.util.ByteString
import com.typesafe.config.ConfigFactory
import scala.concurrent.{Promise, Future}
import scala.concurrent.duration._
import scala.util.{Failure, Success}
class HttpClient extends Actor with RequestBuilding {
implicit val system = context.system
val logger = Logging(system, this)
implicit lazy val materializer = ActorMaterializer()
val config = ConfigFactory.load()
val remoteHost = config.getString("pool.connection.host")
val remoteHostPort = config.getInt("pool.connection.port")
val queueSize = config.getInt("pool.queueSize")
val throttleSize = config.getInt("pool.throttle.numberOfRequests")
val throttleDuration = config.getInt("pool.throttle.duration")
import scala.concurrent.ExecutionContext.Implicits.global
val connectionPool = Http().cachedHostConnectionPool[Promise[HttpResponse]](host = remoteHost, port = remoteHostPort)
// Construct a Queue
val requestQueue =
Source.queue[(HttpRequest, Promise[HttpResponse])](queueSize, OverflowStrategy.backpressure)
.throttle(throttleSize, throttleDuration.seconds, 1, ThrottleMode.shaping)
.via(connectionPool)
.toMat(Sink.foreach({
case ((Success(resp), p)) => p.success(resp)
case ((Failure(error), p)) => p.failure(error)
}))(Keep.left)
.run()
// Convert Promise[HttpResponse] to Future[HttpResponse]
def queueRequest(request: HttpRequest): Future[HttpResponse] = {
val responsePromise = Promise[HttpResponse]()
requestQueue.offer(request -> responsePromise).flatMap {
case QueueOfferResult.Enqueued => responsePromise.future
case QueueOfferResult.Dropped => Future.failed(new RuntimeException("Queue overflowed. Try again later."))
case QueueOfferResult.Failure(ex) => Future.failed(ex)
case QueueOfferResult.QueueClosed => Future.failed(new RuntimeException("Queue was closed (pool shut down) while running the request. Try again later."))
}
}
def receive = {
case "download" =>
val uri = Uri(s"http://localhost:8080/file_csv.csv")
downloadFile(uri, new File("/tmp/compass_audience.csv"))
}
def downloadFile(uri: Uri, destinationFilePath: File) = {
def fileSink: Sink[ByteString, Future[IOResult]] =
Flow[ByteString].buffer(512, OverflowStrategy.backpressure)
.toMat(FileIO.toPath(destinationFilePath.toPath)) (Keep.right)
// Submit to queue and execute HttpRequest and write HttpResponse to file
Source.fromFuture(queueRequest(Get(uri)))
.flatMapConcat(_.entity.dataBytes)
.via(Framing.delimiter(ByteString("\n"), maximumFrameLength = 10000, allowTruncation = true))
.map(_.utf8String)
.map(d => s"$d\n")
.map(ByteString(_))
.runWith(fileSink)
}
}
However, when I use MergeHub, it returns Sink[(HttpRequest, Promise[HttpResponse]), NotUsed]. I need to extract the response.entity.dataBytes and write the response to a file using a filesink. I am not able figure out how to use MergeHub to achieve this. Any help will be appreciated.
val hub: Sink[(HttpRequest, Promise[HttpResponse]), NotUsed] =
MergeHub.source[(HttpRequest, Promise[HttpResponse])](perProducerBufferSize = queueSize)
.throttle(throttleSize, throttleDuration.seconds, 1, ThrottleMode.shaping)
.via(connectionPool)
.toMat(Sink.foreach({
case ((Success(resp), p)) => p.success(resp)
case ((Failure(error), p)) => p.failure(error)
}))(Keep.left)
.run()

Source.Queue is actually thread safe now. If you want to use MergeHub:
private lazy val poolFlow: Flow[(HttpRequest, Promise[HttpResponse]), (Try[HttpResponse], Promise[HttpResponse]), Http.HostConnectionPool] =
Http().cachedHostConnectionPool[Promise[HttpResponse]](host).tail.head, port, connectionPoolSettings)
val ServerSink =
poolFlow.toMat(Sink.foreach({
case ((Success(resp), p)) => p.success(resp)
case ((Failure(e), p)) => p.failure(e)
}))(Keep.left)
// Attach a MergeHub Source to the consumer. This will materialize to a
// corresponding Sink.
val runnableGraph: RunnableGraph[Sink[(HttpRequest, Promise[HttpResponse]), NotUsed]] =
MergeHub.source[(HttpRequest, Promise[HttpResponse])](perProducerBufferSize = 16).to(ServerSink)
val toConsumer: Sink[(HttpRequest, Promise[HttpResponse]), NotUsed] = runnableGraph.run()
protected[akkahttp] def executeRequest[T](httpRequest: HttpRequest, unmarshal: HttpResponse => Future[T]): Future[T] = {
val responsePromise = Promise[HttpResponse]()
Source.single((httpRequest -> responsePromise)).runWith(toConsumer)
responsePromise.future.flatMap(handleHttpResponse(_, unmarshal))
)
}
}

Related

Scala Akka RestartSource onFailuresWithBackoff not working complaining dead letters encountered

I have created a RetryFlow like below,following this article https://doc.akka.io/docs/akka/current/stream/operators/RetryFlow/withBackoff.html
private val httpFlow: Flow[HttpRequest, HttpResponse, Any] =
Http().outgoingConnection(host, port)
val retryFlow: Flow[HttpRequest, HttpResponse, Any] =
RetryFlow.withBackoff(minBackoff = 10.millis, maxBackoff = 5.seconds, randomFactor = 0d, maxRetries = 20, httpFlow)(
decideRetry = {
case (request: HttpRequest, response) =>
response.status match {
case StatusCodes.OK => Some(request)
case _ => None
}
})
And then processing flow like
val request = Post(Uri(path), some_json_payload)(
stringMarshaller(MediaTypes.`application/json`),
system.executionContext
)
httpClient.processRequest(request, retryFlow) map (...)
My http client looks like below, added a RestartSettings and restarting source on onFailuresWithBackoff
class HttpClient(implicit val system: ActorSystem[_]) {
implicit val materializer: Materializer = Materializer(system)
private def checkSuccess(response: HttpResponse, path: String): Either[Throwable, HttpResponse] =
Either.cond(
response.status == StatusCodes.OK,
response,
new Throwable(s"Could not GET $path, Service returned ${response.status.toString()}"),
)
private def httpRequest(
request: HttpRequest,
flow: Flow[HttpRequest, HttpResponse, Any],
): Future[Either[Throwable, HttpResponse]] =
Source.single(request).via(flow)
.runWith(Sink.head) map { response =>
checkSuccess(response, request.uri.path.toString())
}
private def unmarshallString(response: Either[Throwable, HttpResponse]): Future[Either[Throwable, String]] =
response.fold(
err => Future(err.asLeft[String]),
r => Unmarshal(r.entity).to[String].map(_.asRight[Throwable]),
)
def processRequest(request: HttpRequest, flow: Flow[HttpRequest, HttpResponse, Any]): Future[Enclosed[Json]] =
httpRequest(request, flow) flatMap unmarshallString map (_.flatMap(parse))
}
I am pretty new especially in akka , what I am missing here .

Is there any way to zip two or more streams with order of event time in Flink?

Suppose we have a stream of data with this format:
example of input data stream:
case class InputElement(key:String,objectType:String,value:Boolean)
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
val inputStream:DataSet[InputElement] = env.fromElements(
InputElement("k1","t1",true)
,InputElement("k2","t1",true)
,InputElement("k2","t2",true)
,InputElement("k1","t2",false)
,InputElement("k1","t2",true)
,InputElement("k1","t1",false)
,InputElement("k2","t2",false)
)
it is semantically equal to have these streams:
val inputStream_k1_t1 = env.fromElements(
InputElement("k1","t1",true),
InputElement("k1","t1",false)
)
val inputStream_k1_t2 = env.fromElements(
InputElement("k1","t2",false),
,InputElement("k1","t2",true)
)
val inputStream_k2_t1 = env.fromElements(
InputElement("k2","t1",true)
)
val inputStream_k2_t2 = env.fromElements(
InputElement("k2","t2",true),
InputElement("k2","t2",false)
)
I want to have an output type like this:
case class OutputElement(key:String,values:Map[String,Boolean])
expected output data stream for the example input data:
val expectedOutputStream = env.fromElements(
OutputElement("k1",Map( "t1"->true ,"t2"->false)),
OutputElement("k2",Map("t1"->true,"t2"->true)),
OutputElement("k1",Map("t1"->false,"t2"->true)),
OutputElement("k2",Map("t2"->false))
)
==========================================
edit 1:
after some considerations about the problem the subject of the question changed:
I want to have another input stream that shows which keys are subscribed to which object types:
case class SubscribeRule(strategy:String,patterns:Set[String])
val subscribeStream: DataStream[SubscribeRule] = env.fromElements(
SubscribeRule("s1",Set("p1","p2")),
SubscribeRule("s2",Set("p1","p2"))
)
now I want to have this output:
(the result stream does not emit any thing till all the subscribed objectType are received:
val expectedOutputStream = env.fromElements(
OutputElement("k1",Map( "t1"->true ,"t2"->false)),
OutputElement("k2",Map("t1"->true,"t2"->true)),
OutputElement("k1",Map("t1"->false,"t2"->true)),
// OutputElement("k2",Map("t2"->false)) # this element will emit when a k2-t1 input message recieved
)
App.scala:
import org.apache.flink.api.common.state.MapStateDescriptor
import org.apache.flink.api.scala.createTypeInformation
import org.apache.flink.streaming.api.datastream.BroadcastStream
import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, StreamExecutionEnvironment}
object App {
case class updateStateResult(updatedState:Map[String,List[Boolean]],output:Map[String,Boolean])
case class InputElement(key:String,objectType:String,passed:Boolean)
case class SubscribeRule(strategy:String,patterns:Set[String])
case class OutputElement(key:String,result:Map[String,Boolean])
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
// checkpoint every 10 seconds
val subscribeStream: DataStream[SubscribeRule] = env.fromElements(
SubscribeRule("s1",Set("p1","p2")),
SubscribeRule("s2",Set("p1","p2"))
)
val broadcastStateDescriptor =
new MapStateDescriptor[String, Set[String]]("subscribes", classOf[String], classOf[Set[String]])
val subscribeStreamBroadcast: BroadcastStream[SubscribeRule] =
subscribeStream.broadcast(broadcastStateDescriptor)
val inputStream = env.fromElements(
InputElement("s1","p1",true),
InputElement("s1","p2",true),
InputElement("s2","p1",false),
InputElement("s2","p2",true),
InputElement("s2","p2",false),
InputElement("s1","p1",false),
InputElement("s2","p1",true),
InputElement("s1","p2",true),
)
val expected = List(
OutputElement("s1",Map("p2"->true,"p1"->true)),
OutputElement("s2",Map("p2"->true,"p1"->false)),
OutputElement("s2",Map("p2"->false,"p1"->true)),
OutputElement("s1",Map("p2"->true,"p1"->false))
)
val keyedInputStream: KeyedStream[InputElement, String] = inputStream.keyBy(_.key)
val result = keyedInputStream
.connect(subscribeStreamBroadcast)
.process(new ZippingFunc())
result.print
env.execute("test stream")
}
}
ZippingFunc.scala
import App.{InputElement, OutputElement, SubscribeRule, updateStateResult}
import org.apache.flink.api.common.state.{ MapState, MapStateDescriptor, ReadOnlyBroadcastState}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction
import org.apache.flink.util.Collector
import java.util.{Map => JavaMap}
import scala.collection.JavaConverters.{iterableAsScalaIterableConverter, mapAsJavaMapConverter}
class ZippingFunc extends KeyedBroadcastProcessFunction[String, InputElement,SubscribeRule , OutputElement] {
private var localState: MapState[String,List[Boolean]] = _
private lazy val broadcastStateDesc =
new MapStateDescriptor[String, Set[String]]("subscribes", classOf[String], classOf[Set[String]])
override def open(parameters: Configuration) {
val localStateDesc: MapStateDescriptor[String,List[Boolean]] =
new MapStateDescriptor[String, List[Boolean]]("sourceMap1", classOf[String], classOf[List[Boolean]])
localState = getRuntimeContext.getMapState(localStateDesc)
}
def updateVar(objectType:String,value:Boolean): Option[Map[String, Boolean]] ={
val values = localState.get(objectType)
localState.put(objectType, value::values)
pickOutputs(localState.entries().asScala).map((ur: updateStateResult) => {
localState.putAll(ur.updatedState.asJava)
ur.output
})
}
def pickOutputs(entries: Iterable[JavaMap.Entry[String, List[Boolean]]]): Option[updateStateResult] = {
val mapped: Iterable[Option[(String, Boolean, List[Boolean])]] = entries.map(
(x: JavaMap.Entry[String, List[Boolean]]) => {
val key: String = x.getKey
val value: List[Boolean] = x.getValue
val head: Option[Boolean] = value.headOption
head.map(
h => {
(key, h, value.tail)
}
)
}
)
sequenceOption(mapped).map((x: List[(String, Boolean, List[Boolean])]) => {
updateStateResult(
x.map(y => (y._1, y._3)).toMap,
x.map(y => (y._1, y._2)).toMap
)
}
)
}
def sequenceOption[A](l:Iterable[Option[A]]): Option[List[A]] =
{
l.foldLeft[Option[List[A]]](Some(List.empty[A]))(
(acc: Option[List[A]], e: Option[A]) =>{
for {
xs <- acc
x <- e
} yield x :: xs
}
)
}
override def processElement(value: InputElement, ctx: KeyedBroadcastProcessFunction[String, InputElement, SubscribeRule, OutputElement]#ReadOnlyContext, out: Collector[OutputElement]): Unit = {
val bs: ReadOnlyBroadcastState[String, Set[String]] = ctx.getBroadcastState(broadcastStateDesc)
if(bs.contains(value.key)) {
val allPatterns: Set[String] = bs.get(value.key)
allPatterns.map((patternName: String) =>
if (!localState.contains(patternName))
localState.put(patternName, List.empty)
)
updateVar(value.objectType, value.passed)
.map((r: Map[String, Boolean]) =>
out.collect(OutputElement(value.key, r))
)
}
}
// )
override def processBroadcastElement(value: SubscribeRule, ctx: KeyedBroadcastProcessFunction[String, InputElement, SubscribeRule, OutputElement]#Context, out: Collector[OutputElement]): Unit = {
val bs = ctx.getBroadcastState(broadcastStateDesc)
bs.put(value.strategy,value.patterns)
}
}

Backpressure in connected flink streams

I am experimenting with how to propagate back-pressure correctly when I have ConnectedStreams as part of my computation graph. The problem is: I have two sources and one ingests data faster than the other, think we want to replay some data and one source has rare events that we use to enrich the other source. These two sources are then connected in a stream that expects them to be at least somewhat synchronized, merges them together somehow (making tuple, enriching, ...) and returns a result.
With single input streams its fairly easy to implement backpressure, you simply have to spend long time in the processElement function. With connectedstreams my initial idea was to have some logic in each of the processFunctions that waits for the other stream to catch up. For example I could have a buffer thats time-span limited (large enough span to fit a watermark) and the function would not accept events that would make this span pass a threshold. For example:
leftLock.aquire { nonEmptySignal =>
while (queueSpan() > capacity.toMillis && lastTs() < ctx.timestamp()) {
println("WAITING")
nonEmptySignal.await()
}
queueOp { queue =>
println(s"Left Event $value recieved ${Thread.currentThread()}")
queue.add(Left(value))
}
ctx.timerService().registerEventTimeTimer(value.ts)
}
Full code of my example is below (its written with two locks assuming access from two different threads, which is not the case - i think):
import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
import java.util.concurrent.locks.{Condition, ReentrantLock}
import scala.collection.JavaConverters._
import com.google.common.collect.MinMaxPriorityQueue
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.api.common.typeinfo.{TypeHint, TypeInformation}
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.api.scala._
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.environment.LocalStreamEnvironment
import org.apache.flink.streaming.api.functions.co.CoProcessFunction
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.api.watermark.Watermark
import org.apache.flink.util.Collector
import scala.collection.mutable
import scala.concurrent.duration._
trait Timestamped {
val ts: Long
}
case class StateObject(ts: Long, state: String) extends Timestamped
case class DataObject(ts: Long, data: String) extends Timestamped
case class StatefulDataObject(ts: Long, state: Option[String], data: String) extends Timestamped
class DataSource[A](factory: Long => A, rate: Int, speedUpFactor: Long = 0) extends RichSourceFunction[A] {
private val max = new AtomicLong()
private val isRunning = new AtomicBoolean(false)
private val speedUp = new AtomicLong(0)
private val WatermarkDelay = 5 seconds
override def cancel(): Unit = {
isRunning.set(false)
}
override def run(ctx: SourceFunction.SourceContext[A]): Unit = {
isRunning.set(true)
while (isRunning.get()) {
val time = System.currentTimeMillis() + speedUp.addAndGet(speedUpFactor)
val event = factory(time)
ctx.collectWithTimestamp(event, time)
println(s"Event $event sourced $speedUpFactor")
val watermark = time - WatermarkDelay.toMillis
if (max.get() < watermark) {
ctx.emitWatermark(new Watermark(time - WatermarkDelay.toMillis))
max.set(watermark)
}
Thread.sleep(rate)
}
}
}
class ConditionalOperator {
private val lock = new ReentrantLock()
private val signal: Condition = lock.newCondition()
def aquire[B](func: Condition => B): B = {
lock.lock()
try {
func(signal)
} finally {
lock.unlock()
}
}
}
class BlockingCoProcessFunction(capacity: FiniteDuration = 20 seconds)
extends CoProcessFunction[StateObject, DataObject, StatefulDataObject] {
private type MergedType = Either[StateObject, DataObject]
private lazy val leftLock = new ConditionalOperator()
private lazy val rightLock = new ConditionalOperator()
private var queueState: ValueState[MinMaxPriorityQueue[MergedType]] = _
private var dataState: ValueState[StateObject] = _
override def open(parameters: Configuration): Unit = {
super.open(parameters)
queueState = getRuntimeContext.getState(new ValueStateDescriptor[MinMaxPriorityQueue[MergedType]](
"event-queue",
TypeInformation.of(new TypeHint[MinMaxPriorityQueue[MergedType]]() {})
))
dataState = getRuntimeContext.getState(new ValueStateDescriptor[StateObject](
"event-state",
TypeInformation.of(new TypeHint[StateObject]() {})
))
}
override def processElement1(value: StateObject,
ctx: CoProcessFunction[StateObject, DataObject, StatefulDataObject]#Context,
out: Collector[StatefulDataObject]): Unit = {
leftLock.aquire { nonEmptySignal =>
while (queueSpan() > capacity.toMillis && lastTs() < ctx.timestamp()) {
println("WAITING")
nonEmptySignal.await()
}
queueOp { queue =>
println(s"Left Event $value recieved ${Thread.currentThread()}")
queue.add(Left(value))
}
ctx.timerService().registerEventTimeTimer(value.ts)
}
}
override def processElement2(value: DataObject,
ctx: CoProcessFunction[StateObject, DataObject, StatefulDataObject]#Context,
out: Collector[StatefulDataObject]): Unit = {
rightLock.aquire { nonEmptySignal =>
while (queueSpan() > capacity.toMillis && lastTs() < ctx.timestamp()) {
println("WAITING")
nonEmptySignal.await()
}
queueOp { queue =>
println(s"Right Event $value recieved ${Thread.currentThread()}")
queue.add(Right(value))
}
ctx.timerService().registerEventTimeTimer(value.ts)
}
}
override def onTimer(timestamp: Long,
ctx: CoProcessFunction[StateObject, DataObject, StatefulDataObject]#OnTimerContext,
out: Collector[StatefulDataObject]): Unit = {
println(s"Watermarked $timestamp")
leftLock.aquire { leftSignal =>
rightLock.aquire { rightSignal =>
queueOp { queue =>
while (Option(queue.peekFirst()).exists(x => timestampOf(x) <= timestamp)) {
queue.poll() match {
case Left(state) =>
dataState.update(state)
leftSignal.signal()
case Right(event) =>
println(s"Event $event emitted ${Thread.currentThread()}")
out.collect(
StatefulDataObject(
event.ts,
Option(dataState.value()).map(_.state),
event.data
)
)
rightSignal.signal()
}
}
}
}
}
}
private def queueOp[B](func: MinMaxPriorityQueue[MergedType] => B): B = queueState.synchronized {
val queue = Option(queueState.value()).
getOrElse(
MinMaxPriorityQueue.
orderedBy(Ordering.by((x: MergedType) => timestampOf(x))).create[MergedType]()
)
val result = func(queue)
queueState.update(queue)
result
}
private def timestampOf(data: MergedType): Long = data match {
case Left(y) =>
y.ts
case Right(y) =>
y.ts
}
private def queueSpan(): Long = {
queueOp { queue =>
val firstTs = Option(queue.peekFirst()).map(timestampOf).getOrElse(Long.MaxValue)
val lastTs = Option(queue.peekLast()).map(timestampOf).getOrElse(Long.MinValue)
println(s"Span: $firstTs - $lastTs = ${lastTs - firstTs}")
lastTs - firstTs
}
}
private def lastTs(): Long = {
queueOp { queue =>
Option(queue.peekLast()).map(timestampOf).getOrElse(Long.MinValue)
}
}
}
object BackpressureTest {
var data = new mutable.ArrayBuffer[DataObject]()
def main(args: Array[String]): Unit = {
val streamConfig = new Configuration()
val env = new StreamExecutionEnvironment(new LocalStreamEnvironment(streamConfig))
env.getConfig.disableSysoutLogging()
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val stateSource = env.addSource(new DataSource(ts => StateObject(ts, ts.toString), 1000))
val dataSource = env.addSource(new DataSource(ts => DataObject(ts, ts.toString), 100, 100))
stateSource.
connect(dataSource).
keyBy(_ => "", _ => "").
process(new BlockingCoProcessFunction()).
print()
env.execute()
}
}
The problem with connected streams is it seems you cant simply block in one of the processFunctions when its stream is too far ahead, since that blocks the other processFunction aswell. On the other hand if i simply accepted all events in this job eventually the process function would run out of memory. Since it would buffer the whole stream that is ahead.
So my question is: Is it possible to propagate backpressure into each of the streams in ConnectedStreams separately and if so, how? Or alternatively, is there any other nice way to deal with this issue? Possibly all the sources communicating somehow to keep them mostly at the same event-time?
From my reading of the code in StreamTwoInputProcessor, it looks to me like the processInput() method is responsible for implementing the policy in question. Perhaps one could implement a variant that reads from whichever stream has the lower watermark, so long as it has unread input. Not sure what impact that would have overall, however.

Akka http-client can't consume all stream of data from server

Im trying to write a simple akka stream rest endpoint and client for consuming this stream. But then i try to run server and client, client is able to consume only part of stream. I can't see any exception during execution.
Here are my server and client:
import akka.NotUsed
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.common.{EntityStreamingSupport, JsonEntityStreamingSupport}
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport
import akka.stream.{ActorAttributes, ActorMaterializer, Attributes, Supervision}
import akka.stream.scaladsl.{Flow, Source}
import akka.util.ByteString
import spray.json.DefaultJsonProtocol
import scala.io.StdIn
import scala.util.Random
object WebServer {
object Model {
case class Person(id: Int = Random.nextInt(), fName: String = Random.nextString(10), sName: String = Random.nextString(10))
}
object JsonProtocol extends SprayJsonSupport with DefaultJsonProtocol {
implicit val personFormat = jsonFormat(Model.Person.apply, "id", "firstName", "secondaryName")
}
def main(args: Array[String]) {
implicit val system = ActorSystem("my-system")
implicit val materializer = ActorMaterializer()
implicit val executionContext = system.dispatcher
val start = ByteString.empty
val sep = ByteString("\n")
val end = ByteString.empty
import JsonProtocol._
implicit val jsonStreamingSupport: JsonEntityStreamingSupport = EntityStreamingSupport.json()
.withFramingRenderer(Flow[ByteString].intersperse(start, sep, end))
.withParallelMarshalling(parallelism = 8, unordered = false)
val decider: Supervision.Decider = {
case ex: Throwable => {
println("Exception occurs")
ex.printStackTrace()
Supervision.Resume
}
}
val persons: Source[Model.Person, NotUsed] = Source.fromIterator(
() => (0 to 1000000).map(id => Model.Person(id = id)).iterator
)
.withAttributes(ActorAttributes.supervisionStrategy(decider))
.map(p => { println(p); p })
val route =
path("persons") {
get {
complete(persons)
}
}
val bindingFuture = Http().bindAndHandle(route, "localhost", 8080)
println(s"Server online at http://localhost:8080/\nPress RETURN to stop...")
StdIn.readLine()
bindingFuture
.flatMap(_.unbind())
.onComplete(_ => {
println("Stopping http server ...")
system.terminate()
})
}
}
and client:
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.{HttpRequest, Uri}
import akka.stream.{ActorAttributes, ActorMaterializer, Supervision}
import scala.util.{Failure, Success}
object WebClient {
def main(args: Array[String]): Unit = {
implicit val system = ActorSystem()
implicit val materializer = ActorMaterializer()
implicit val executionContext = system.dispatcher
val request = HttpRequest(uri = Uri("http://localhost:8080/persons"))
val response = Http().singleRequest(request)
val attributes = ActorAttributes.withSupervisionStrategy {
case ex: Throwable => {
println("Exception occurs")
ex.printStackTrace
Supervision.Resume
}
}
response.map(r => {
r.entity.dataBytes.withAttributes(attributes)
}).onComplete {
case Success(db) => db.map(bs => bs.utf8String).runForeach(println)
case Failure(ex) => ex.printStackTrace()
}
}
}
it works for 100, 1000, 10 000 persons but does not work for > 100 000'.
It looks like there is some limit for stream but i can't find it
Last record has been printed by server on my local machine is (with number 79101):
Person(79101,ⰷ瑐劲죗醂竜泲늎制䠸,䮳硝沢并⎗ᝨᫌꊭᐽ酡)
Last record on client is(with number 79048):
{"id":79048,"firstName":"췁頔䚐龫暀࡙頨捜昗㢵","secondaryName":"⏉ݾ袈庩컆◁ꄹ葪䑥Ϻ"}
Maybe somebody know why it happens?
I found a solution. I have to explicitly add r.entity.withoutSizeLimit() on client and after that all works as expected

Uploading file from Angular ng-file-upload to Play for Scala

I'm trying to upload a .csv file using ng-file-upload in the browser and Play for Scala 2.5.x at the backend.
If I use moveTo in Play the file is uploaded, but generated with headers and trailers instead of the plain file:
request.body.moveTo(new File("c:\\tmp\\uploaded\\filetest.csv"))
Is there a way to save only the data part?
Alternatively, I tried the following
def doUpload = Action(parse.multipartFormData) { request =>
println(request.contentType.get)
println(request.body)
request.body.file("file").map { file =>
val filename = file.filename
val contentType = file.contentType
file.ref.moveTo(new File("c:\\tmp\\uploaded\\filetest.csv"))
}
Ok("file uploaded at " + new java.util.Date())
}
But the map is empty. This is what I get in the first two println statements:
multipart/form-data
MultipartFormData(Map(),Vector(FilePart(file,hello2.csv,Some(application/vnd.ms-excel),TemporaryFile(C:\Users\BUSINE~1\AppData\Local\Temp\playtemp2236977636541678879\multipartBody2268668511935303172asTemporaryFile))),Vector())
Meaning that I am receiving something, however I cannot extract it. Any ideas?
A bit verbose maybe but it does what you want.
Note that I am on a Mac so you may have to change the /tmp/uploaded/ path in copyFile since it looks like you are on Windows.
package controllers
import java.io.File
import java.nio.file.attribute.PosixFilePermission._
import java.nio.file.attribute.PosixFilePermissions
import java.nio.file.{Files, Path}
import java.util
import javax.inject._
import akka.stream.IOResult
import akka.stream.scaladsl._
import akka.util.ByteString
import play.api._
import play.api.data.Form
import play.api.data.Forms._
import play.api.i18n.MessagesApi
import play.api.libs.streams._
import play.api.mvc.MultipartFormData.FilePart
import play.api.mvc._
import play.core.parsers.Multipart.FileInfo
import scala.concurrent.Future
import java.nio.file.StandardCopyOption._
import java.nio.file.Paths
case class FormData(filename: String)
// Type for multipart body parser
type FilePartHandler[A] = FileInfo => Accumulator[ByteString, FilePart[A]]
val form = Form(mapping("file" -> text)(FormData.apply)(FormData.unapply))
private def deleteTempFile(file: File) = Files.deleteIfExists(file.toPath)
// Copies temp file to your loc with provided name
private def copyFile(file: File, name: String) =
Files.copy(file.toPath(), Paths.get("/tmp/uploaded/", ("copy_" + name)), REPLACE_EXISTING)
// FilePartHandler which returns a File, rather than Play's TemporaryFile class
private def handleFilePartAsFile: FilePartHandler[File] = {
case FileInfo(partName, filename, contentType) =>
val attr = PosixFilePermissions.asFileAttribute(util.EnumSet.of(OWNER_READ, OWNER_WRITE))
val path: Path = Files.createTempFile("multipartBody", "tempFile", attr)
val file = path.toFile
val fileSink: Sink[ByteString, Future[IOResult]] = FileIO.toPath(file.toPath())
val accumulator: Accumulator[ByteString, IOResult] = Accumulator(fileSink)
accumulator.map {
case IOResult(count, status) =>
FilePart(partName, filename, contentType, file)
} (play.api.libs.concurrent.Execution.defaultContext)
}
// The action
def doUpload = Action(parse.multipartFormData(handleFilePartAsFile)) { implicit request =>
val fileOption = request.body.file("file").map {
case FilePart(key, filename, contentType, file) =>
val copy = copyFile(file, filename)
val deleted = deleteTempFile(file) // delete original uploaded file after we have
copy
}
Ok(s"Uploaded: ${fileOption}")
}

Resources