Apache-Camel multiple pollenrich() and error on move param - apache-camel

I'm using apache camel version 2.21.1 and i have a trouble with the pollEnrich
to download a specific file from ftp. It looks like when i download 2 file with pollenrich and the operation take some minutes (4 minutes) camel goes in error when moving the first file in the .done directory.
I have this exception:
`WARN o.a.camel.component.file.GenericFileOnCompletion - Error during commit. Exchange[ID-XXX16563-1529920620474-0-8]. Caused by: [org.apache.camel.component.file.GenericFileOperationFailedException - File operation failed: 250 CWD command successful.
Connection has been shutdown: javax.net.ssl.SSLException: java.net.SocketException: Connection reset by peer: socket write error. Code: 250]
org.apache.camel.component.file.GenericFileOperationFailedException: File operation failed: 250 CWD command successful.
Connection has been shutdown: javax.net.ssl.SSLException: java.net.SocketException: Connection reset by peer: socket write error. Code: 250
at org.apache.camel.component.file.remote.FtpOperations.buildDirectory(FtpOperations.java:352)
at org.apache.camel.component.file.strategy.GenericFileProcessStrategySupport.renameFile(GenericFileProcessStrategySupport.java:106)
at org.apache.camel.component.file.strategy.GenericFileRenameProcessStrategy.commit(GenericFileRenameProcessStrategy.java:111)
at org.apache.camel.component.file.GenericFileOnCompletion.processStrategyCommit(GenericFileOnCompletion.java:127)
at org.apache.camel.component.file.GenericFileOnCompletion.onCompletion(GenericFileOnCompletion.java:83)
at org.apache.camel.component.file.GenericFileOnCompletion.onComplete(GenericFileOnCompletion.java:57)
at org.apache.camel.util.UnitOfWorkHelper.doneSynchronizations(UnitOfWorkHelper.java:104)
at org.apache.camel.impl.DefaultUnitOfWork.done(DefaultUnitOfWork.java:243)
at org.apache.camel.util.UnitOfWorkHelper.doneUow(UnitOfWorkHelper.java:65)
at org.apache.camel.processor.CamelInternalProcessor$UnitOfWorkProcessorAdvice.after(CamelInternalProcessor.java:685)
at org.apache.camel.processor.CamelInternalProcessor$UnitOfWorkProcessorAdvice.after(CamelInternalProcessor.java:634)
at org.apache.camel.processor.CamelInternalProcessor$InternalCallback.done(CamelInternalProcessor.java:251)
at org.apache.camel.processor.Pipeline.process(Pipeline.java:127)
at org.apache.camel.processor.CamelInternalProcessor.process(CamelInternalProcessor.java:201)
at org.apache.camel.component.aws.sqs.SqsConsumer.processBatch(SqsConsumer.java:206)
at org.apache.camel.component.aws.sqs.SqsConsumer.poll(SqsConsumer.java:111)
at org.apache.camel.impl.ScheduledPollConsumer.doRun(ScheduledPollConsumer.java:174)
at org.apache.camel.impl.ScheduledPollConsumer.run(ScheduledPollConsumer.java:101)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: javax.net.ssl.SSLException: Connection has been shutdown: javax.net.ssl.SSLException: java.net.SocketException: Connection reset by peer: socket write error
at sun.security.ssl.SSLSocketImpl.checkEOF(SSLSocketImpl.java:1541)
at sun.security.ssl.SSLSocketImpl.checkWrite(SSLSocketImpl.java:1553)
at sun.security.ssl.AppOutputStream.write(AppOutputStream.java:71)
at sun.nio.cs.StreamEncoder.writeBytes(StreamEncoder.java:221)
at sun.nio.cs.StreamEncoder.implFlushBuffer(StreamEncoder.java:291)
at sun.nio.cs.StreamEncoder.implFlush(StreamEncoder.java:295)
at sun.nio.cs.StreamEncoder.flush(StreamEncoder.java:141)
at java.io.OutputStreamWriter.flush(OutputStreamWriter.java:229)
at java.io.BufferedWriter.flush(BufferedWriter.java:254)
at org.apache.commons.net.ftp.FTP.__send(FTP.java:545)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:519)
at org.apache.commons.net.ftp.FTPSClient.sendCommand(FTPSClient.java:568)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:648)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:622)
at org.apache.commons.net.ftp.FTP.pwd(FTP.java:1495)
at org.apache.commons.net.ftp.FTPClient.printWorkingDirectory(FTPClient.java:2738)
at org.apache.camel.component.file.remote.FtpOperations.buildDirectory(FtpOperations.java:329)
... 24 common frames omitted
Caused by: javax.net.ssl.SSLException: java.net.SocketException: Connection reset by peer: socket write error
at sun.security.ssl.Alerts.getSSLException(Alerts.java:208)
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1949)
at sun.security.ssl.SSLSocketImpl.fatal(SSLSocketImpl.java:1906)
at sun.security.ssl.SSLSocketImpl.handleException(SSLSocketImpl.java:1870)
at sun.security.ssl.SSLSocketImpl.handleException(SSLSocketImpl.java:1815)
at sun.security.ssl.AppOutputStream.write(AppOutputStream.java:128)
at sun.nio.cs.StreamEncoder.writeBytes(StreamEncoder.java:221)
at sun.nio.cs.StreamEncoder.implFlushBuffer(StreamEncoder.java:291)
at sun.nio.cs.StreamEncoder.implFlush(StreamEncoder.java:295)
at sun.nio.cs.StreamEncoder.flush(StreamEncoder.java:141)
at java.io.OutputStreamWriter.flush(OutputStreamWriter.java:229)
at java.io.BufferedWriter.flush(BufferedWriter.java:254)
at org.apache.commons.net.ftp.FTP.__send(FTP.java:545)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:519)
at org.apache.commons.net.ftp.FTPSClient.sendCommand(FTPSClient.java:568)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:648)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:622)
at org.apache.commons.net.ftp.FTP.pwd(FTP.java:1495)
at org.apache.commons.net.ftp.FTPClient.printWorkingDirectory(FTPClient.java:2738)
at org.apache.camel.component.file.remote.FtpOperations.getCurrentDirectory(FtpOperations.java:821)
at org.apache.camel.component.file.remote.FtpOperations.deleteFile(FtpOperations.java:286)
at org.apache.camel.component.file.strategy.GenericFileProcessStrategySupport.renameFile(GenericFileProcessStrategySupport.java:100)
... 23 common frames omitted
Caused by: java.net.SocketException: Connection reset by peer: socket write error
at java.net.SocketOutputStream.socketWrite0(Native Method)
at java.net.SocketOutputStream.socketWrite(SocketOutputStream.java:109)
at java.net.SocketOutputStream.write(SocketOutputStream.java:153)
at sun.security.ssl.OutputRecord.writeBuffer(OutputRecord.java:431)
at sun.security.ssl.OutputRecord.write(OutputRecord.java:417)
at sun.security.ssl.SSLSocketImpl.writeRecordInternal(SSLSocketImpl.java:876)
at sun.security.ssl.SSLSocketImpl.writeRecord(SSLSocketImpl.java:847)
at sun.security.ssl.AppOutputStream.write(AppOutputStream.java:123)
... 39 common frames omitted`.
This is my route:
from(queueSource)
.errorHandler(deadLetterChannel("direct:dead")
.useOriginalMessage()
.maximumRedeliveries(3)
.redeliveryDelay(180000)
.retryAttemptedLogLevel(LoggingLevel.WARN)
)
.unmarshal().json(JsonLibrary.Jackson,MyClass.class)
.process(setHeaderWithMyVariableProcessorClass)
.choice()
.when(header("DownloadFilename").isEqualTo(true))
.pollEnrich()
.simple("{{ftp.path}}
&useList=false
&consumer.bridgeErrorHandler=true
&autoCreate=false
&username={{ftp.username}}&password={{ftp.password}}
&passiveMode=true
&localWorkDirectory={{ftp.localWorkDirectory}}
&maxMessagesPerPoll=1
&disconnectOnBatchComplete=true
&move={{ftp.fullFileDir}}/.done
&soTimeout=600000
&fileName=${body.getFileAssetName()}")
.aggregationStrategy(new ApplyOldExchangeAggregationStrategy())
.end()
.end()
.choice()
.when(header("DownloadAttach0").isEqualTo(true))
.pollEnrich()
.simple("{{ftp.attachmentspath}}&.....&soTimeout=600000&fileName=${body.getAttachmentList()[0]}")
.aggregationStrategy(new ApplyOldExchangeAggregationStrategy())
.end()
.end()
.log("<<<<<< ${body} >>>>>>>")
.end();

Related

Flink MySQL connector limit connection

I'm using flink mysql connector with a single executor of 32Gb RAM, 16vCPU with 32 slots. If I run a job with parallelism 32 (job parallelism 224) that is doing temporal lookup joins with 10 MySQL tables, it starts to fail after 2-3 successful runs with below error.
org.apache.flink.runtime.JobException: Recovery is suppressed by NoRestartBackoffTimeStrategy
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.handleFailure(ExecutionFailureHandler.java:138)
at org.apache.flink.runtime.executiongraph.failover.flip1.ExecutionFailureHandler.getFailureHandlingResult(ExecutionFailureHandler.java:82)
at org.apache.flink.runtime.scheduler.DefaultScheduler.handleTaskFailure(DefaultScheduler.java:228)
at org.apache.flink.runtime.scheduler.DefaultScheduler.maybeHandleTaskFailure(DefaultScheduler.java:218)
at org.apache.flink.runtime.scheduler.DefaultScheduler.updateTaskExecutionStateInternal(DefaultScheduler.java:209)
at org.apache.flink.runtime.scheduler.SchedulerBase.updateTaskExecutionState(SchedulerBase.java:679)
at org.apache.flink.runtime.scheduler.SchedulerNG.updateTaskExecutionState(SchedulerNG.java:79)
at org.apache.flink.runtime.jobmaster.JobMaster.updateTaskExecutionState(JobMaster.java:444)
at sun.reflect.GeneratedMethodAccessor62.invoke(Unknown Source)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.lambda$handleRpcInvocation$1(AkkaRpcActor.java:316)
at org.apache.flink.runtime.concurrent.akka.ClassLoadingUtils.runWithContextClassLoader(ClassLoadingUtils.java:83)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcInvocation(AkkaRpcActor.java:314)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleRpcMessage(AkkaRpcActor.java:217)
at org.apache.flink.runtime.rpc.akka.FencedAkkaRpcActor.handleRpcMessage(FencedAkkaRpcActor.java:78)
at org.apache.flink.runtime.rpc.akka.AkkaRpcActor.handleMessage(AkkaRpcActor.java:163)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:24)
at akka.japi.pf.UnitCaseStatement.apply(CaseStatements.scala:20)
at scala.PartialFunction.applyOrElse(PartialFunction.scala:123)
at scala.PartialFunction.applyOrElse$(PartialFunction.scala:122)
at akka.japi.pf.UnitCaseStatement.applyOrElse(CaseStatements.scala:20)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:171)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at scala.PartialFunction$OrElse.applyOrElse(PartialFunction.scala:172)
at akka.actor.Actor.aroundReceive(Actor.scala:537)
at akka.actor.Actor.aroundReceive$(Actor.scala:535)
at akka.actor.AbstractActor.aroundReceive(AbstractActor.scala:220)
at akka.actor.ActorCell.receiveMessage(ActorCell.scala:580)
at akka.actor.ActorCell.invoke(ActorCell.scala:548)
at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:270)
at akka.dispatch.Mailbox.run(Mailbox.scala:231)
at akka.dispatch.Mailbox.exec(Mailbox.scala:243)
at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:157)
Caused by: java.lang.IllegalArgumentException: open() failed.
at org.apache.flink.connector.jdbc.table.JdbcRowDataLookupFunction.open(JdbcRowDataLookupFunction.java:138)
at LookupFunction$55178.open(Unknown Source)
at org.apache.flink.api.common.functions.util.FunctionUtils.openFunction(FunctionUtils.java:34)
at org.apache.flink.table.runtime.operators.join.lookup.LookupJoinRunner.open(LookupJoinRunner.java:67)
at org.apache.flink.table.runtime.operators.join.lookup.LookupJoinWithCalcRunner.open(LookupJoinWithCalcRunner.java:51)
at org.apache.flink.api.common.functions.util.FunctionUtils.openFunction(FunctionUtils.java:34)
at org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator.open(AbstractUdfStreamOperator.java:100)
at org.apache.flink.streaming.api.operators.ProcessOperator.open(ProcessOperator.java:56)
at org.apache.flink.streaming.runtime.tasks.RegularOperatorChain.initializeStateAndOpenOperators(RegularOperatorChain.java:110)
at org.apache.flink.streaming.runtime.tasks.StreamTask.restoreGates(StreamTask.java:711)
at org.apache.flink.streaming.runtime.tasks.StreamTaskActionExecutor$1.call(StreamTaskActionExecutor.java:55)
at org.apache.flink.streaming.runtime.tasks.StreamTask.restoreInternal(StreamTask.java:687)
at org.apache.flink.streaming.runtime.tasks.StreamTask.restore(StreamTask.java:654)
at org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:958)
at org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:927)
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:766)
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:575)
at java.lang.Thread.run(Thread.java:748)
Caused by: com.mysql.jdbc.exceptions.jdbc4.CommunicationsException: Communications link failure
The last packet sent successfully to the server was 0 milliseconds ago. The driver has not received any packets from the server.
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:403)
at com.mysql.jdbc.SQLError.createCommunicationsException(SQLError.java:990)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:335)
at com.mysql.jdbc.ConnectionImpl.coreConnect(ConnectionImpl.java:2187)
at com.mysql.jdbc.ConnectionImpl.connectOneTryOnly(ConnectionImpl.java:2220)
at com.mysql.jdbc.ConnectionImpl.createNewIO(ConnectionImpl.java:2015)
at com.mysql.jdbc.ConnectionImpl.<init>(ConnectionImpl.java:768)
at com.mysql.jdbc.JDBC4Connection.<init>(JDBC4Connection.java:47)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at com.mysql.jdbc.Util.handleNewInstance(Util.java:403)
at com.mysql.jdbc.ConnectionImpl.getInstance(ConnectionImpl.java:385)
at com.mysql.jdbc.NonRegisteringDriver.connect(NonRegisteringDriver.java:323)
at org.apache.flink.connector.jdbc.internal.connection.SimpleJdbcConnectionProvider.getOrEstablishConnection(SimpleJdbcConnectionProvider.java:121)
at org.apache.flink.connector.jdbc.table.JdbcRowDataLookupFunction.establishConnectionAndStatement(JdbcRowDataLookupFunction.java:211)
at org.apache.flink.connector.jdbc.table.JdbcRowDataLookupFunction.open(JdbcRowDataLookupFunction.java:129)
... 17 more
Caused by: java.net.SocketException: Too many open files
at java.net.Socket.createImpl(Socket.java:478)
at java.net.Socket.getImpl(Socket.java:538)
at java.net.Socket.setTcpNoDelay(Socket.java:998)
at com.mysql.jdbc.StandardSocketFactory.configureSocket(StandardSocketFactory.java:132)
at com.mysql.jdbc.StandardSocketFactory.connect(StandardSocketFactory.java:203)
at com.mysql.jdbc.MysqlIO.<init>(MysqlIO.java:299)
... 32 more
Did Some debugging, the process list on MySQL shows ~ 2* (total job parallelism) connections, i.e. 448 connections from Task Manager IP. The output of lsof | grep mysql-cj- | wc -l on task manager also reached to 12k from 3k. But after cancelling job, sometime this number doesn't go down. Am I missing something ?
The error is mainly because there are too many connections requesting mysql at the same time. Provide several optimization ideas for reference
Consider reducing the total concurrency of tasks
By default, lookup cache is not enabled. You can enable it by setting both lookup.cache.max-rows and lookup.cache.ttl, refer to https://nightlies.apache.org/flink/flink-docs-release-1.14/docs/connectors/table/jdbc/

Flink job failed, Caused by: java.io.IOException: The rpc invocation size exceeds the maximum akka framesize

Flink job failed,The error information is as follows
2020-12-02 09:37:27
java.util.concurrent.CompletionException: java.lang.reflect.UndeclaredThrowableException
at java.util.concurrent.CompletableFuture.encodeThrowable(CompletableFuture.java:273)
at java.util.concurrent.CompletableFuture.completeThrowable(CompletableFuture.java:280)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1592)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.reflect.UndeclaredThrowableException
at com.sun.proxy.$Proxy41.submitTask(Unknown Source)
at org.apache.flink.runtime.jobmaster.RpcTaskManagerGateway.submitTask(RpcTaskManagerGateway.java:77)
at org.apache.flink.runtime.executiongraph.Execution.lambda$deploy$9(Execution.java:735)
at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1590)
... 7 more
Caused by: java.io.IOException: The rpc invocation size exceeds the maximum akka framesize.
at org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.createRpcInvocationMessage(AkkaInvocationHandler.java:270)
at org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.invokeRpc(AkkaInvocationHandler.java:200)
at org.apache.flink.runtime.rpc.akka.AkkaInvocationHandler.invoke(AkkaInvocationHandler.java:129)
... 11 more
The logic of this job is simple,Consumption data of Kafka is saved to Clickhouse.
Start command
flink run -m yarn-cluster -p 2 -ys 2 -yjm 2048 -ytm 2048 -ynm xx --class xx /data/flink/lib/xx.jar -name --input --groupId xx --bootstrapServers xx:9092 --CheckpointInterval 60000 --CheckpointTimeout 600000 --clientId xx
Why is that? thanks
The exception means the payload of message(JM submits task to TM) exceeds max size. Try to increase the max size by adding akka.framesize to flink-conf.yaml.
The default for this is: 10485760b. Try to set a bigger number for this. Probably needing to restart the JM/TM or Flink cluster.
Doc: https://ci.apache.org/projects/flink/flink-docs-release-1.12/deployment/config.html#akka-framesize

Nutch giving an Shuffle error while indexing to SOLR.

The Nutch Crawler succesfully indexed the documents upto a particular time. At some point its stopeed abruptly don't know the reasons. i am posting the logs may i know the reason for this.
java.lang.Exception: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in localfetcher#1
at org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:462)
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:529)
Caused by: org.apache.hadoop.mapreduce.task.reduce.Shuffle$ShuffleError: error in shuffle in localfetcher#1
at org.apache.hadoop.mapreduce.task.reduce.Shuffle.run(Shuffle.java:134)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:376)
at org.apache.hadoop.mapred.LocalJobRunner$Job$ReduceTaskRunnable.run(LocalJobRunner.java:319)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.OutOfMemoryError: Java heap space
at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:56)
at org.apache.hadoop.io.BoundedByteArrayOutputStream.<init>(BoundedByteArrayOutputStream.java:46)
at org.apache.hadoop.mapreduce.task.reduce.InMemoryMapOutput.<init>(InMemoryMapOutput.java:63)
at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.unconditionalReserve(MergeManagerImpl.java:309)
at org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.reserve(MergeManagerImpl.java:299)
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.copyMapOutput(LocalFetcher.java:134)
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.doCopy(LocalFetcher.java:102)
at org.apache.hadoop.mapreduce.task.reduce.LocalFetcher.run(LocalFetcher.java:85)
2018-08-30 03:15:54,758 ERROR indexer.IndexingJob - Indexer: java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:873)
at org.apache.nutch.indexer.IndexingJob.index(IndexingJob.java:147)
at org.apache.nutch.indexer.IndexingJob.run(IndexingJob.java:230)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.nutch.indexer.IndexingJob.main(IndexingJob.java:239)
Caused by: java.lang.OutOfMemoryError: Java heap space
It's a memory error
try adjust in solr.in.sh
SOLR_JAVA_MEM="-Xms512m -Xmx5120m"
For me after this it's work

camel ftp in recursive manner results in ftp error code: 226

I have a very simple ftp route that should recursively download files from an URL. It would be very important to get it running still today.
There are no proxies, no extra authentication, no firewall. However, it only downloads the first file and then the socket will be closed. I have experimented with the different timeouts but they did not solve the issue. Either error code 226 is retuned if I don't use any timeout or extra config option or if I use the commented out things, error code 221 is returned. The 226 does not seem to be an error as it just counts as an indication that the server completed the transfer. Stack traces I copied below. I would appreciate the responses and thanks also in advance.
The route I am using in pollEnrich as I have to start it depending on a timer.
The code:
[...]
from("direct:ftp").routeId("ftp")
.log("### FTP is in progress ")
.pollEnrich().simple(ConfigData.getConfigData().getFtpUrl()
+ "?binary=true&"
+ "recursive=true"
/* + "soTimeout=300000&"
+ "stepwise=true&"
+ "ignoreFileNotFoundOrPermissionError=true&"
+ "ftpClient.dataTimeout=30000&"
+ "disconnect=true&"
+ "consumer.delay=1000" */
)
.to("file:modelFiles")
.end();
[...]
Update 1
I removed pollEnrich() and without it it works fine. However, I cannot start it from another route e.g. from a timer. So this is a quick hack to get FTP running. I also copy the code here as I did it with an idempotent consumer, so that only those files will be downloaded that are not yet on the disk. It can maybe be useful also to other people. The idempotent consumer example you can find here (Apache Camel ftp consumer loads the same files again and again) for more information.
Any further comments?
from(ConfigData.getConfigData().getFtpUrl()
+ "?binary=true&"
+ "recursive=true&"
+ "passiveMode=true&"
+ "ftpClient.bufferSize=10000000&"
+ "localWorkDirectory=" + ConfigData.getConfigData().getLocalTmpDirectory())
.idempotentConsumer(header("CamelFileName"), FileIdempotentRepository.fileIdempotentRepository(new File("data", "repo.dat")))
.to("file:modelFiles")
.log("Downloaded file ${file:name} complete.")
.end();
Stack Trace
2016-03-19 15:27:53,921 [WARN|org.apache.camel.component.file.remote.FtpConsumer|MarkerIgnoringBase] Error processing file RemoteFile[2009-03-25/BioModels_Database-r13-sbml_files.tar.gz] due to File operation failed: null socket closed. Code: 221. Caused by: [org.apache.camel.component.file.GenericFileOperationFailedException - File operation failed: null socket closed. Code: 221]
org.apache.camel.component.file.GenericFileOperationFailedException: File **operation failed: null socket closed. Code: 221**
[...]
Caused by: java.net.SocketException: socket closed
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(Unknown Source)
at java.net.SocketInputStream.read(Unknown Source)
at sun.nio.cs.StreamDecoder.readBytes(Unknown Source)
at sun.nio.cs.StreamDecoder.implRead(Unknown Source)
at sun.nio.cs.StreamDecoder.read(Unknown Source)
at java.io.InputStreamReader.read(Unknown Source)
at java.io.BufferedReader.fill(Unknown Source)
at java.io.BufferedReader.read(Unknown Source)
at org.apache.commons.net.io.CRLFLineReader.readLine(CRLFLineReader.java:58)
at org.apache.commons.net.ftp.FTP.__getReply(FTP.java:314)
at org.apache.commons.net.ftp.FTP.__getReply(FTP.java:294)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:483)
at org.apache.commons.net.ftp.FTP.sendCommand(FTP.java:608)
at org.apache.commons.net.ftp.FTP.cwd(FTP.java:828)
at org.apache.commons.net.ftp.FTPClient.changeWorkingDirectory(FTPClient.java:1128)
at org.apache.camel.component.file.remote.FtpOperations.doChangeDirectory(FtpOperations.java:769)
2016-03-19 12:42:16,068 [WARN|org.apache.camel.component.file.remote.FtpConsumer|MarkerIgnoringBase] Error processing file RemoteFile[2008-12-03/BioModels_Database-r12-sbml_files.tar.gz] due to File operation failed: null Socket Closed. Code: 226. Caused by: [org.apache.camel.component.file.GenericFileOperationFailedException - **File operation failed: null Socket Closed. Code: 226]**
[...]
Caused by: java.net.SocketException: Socket Closed
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.read(Unknown Source)
at java.net.SocketInputStream.read(Unknown Source)
at sun.nio.cs.StreamDecoder.readBytes(Unknown Source)
at sun.nio.cs.StreamDecoder.implRead(Unknown Source)
at sun.nio.cs.StreamDecoder.read(Unknown Source)
at java.io.InputStreamReader.read(Unknown Source)
at java.io.BufferedReader.fill(Unknown Source)
at java.io.BufferedReader.read(Unknown Source)
at org.apache.commons.net.io.CRLFLineReader.readLine(CRLFLineReader.java:58)
at org.apache.commons.net.ftp.FTP.__getReply(FTP.java:314)
at org.apache.commons.net.ftp.FTP.__getReply(FTP.java:294)
at org.apache.commons.net.ftp.FTP.getReply(FTP.java:692)
at org.apache.commons.net.ftp.FTPClient.completePendingCommand(FTPClient.java:1813)
at org.apache.commons.net.ftp.FTPClient._retrieveFile(FTPClient.java:1885)
at org.apache.commons.net.ftp.FTPClient.retrieveFile(FTPClient.java:1845)
at org.apache.camel.component.file.remote.FtpOperations.retrieveFileToStreamInBody(FtpOperations.java:367)
Solution:
As added in Upadete 1, I do not use pollEnrich(). The route cannot be started from a timer now but it works. So I will close the question. I really like the idea of the idempotent consumer (unrelated to the original question).
Don't forget that a pollEnrich delete the oldExchange and keep the newExchange.
So you don't want to do a from("ftp").pollEnrich("timer") but a from("timer").pollEnrich("ftp").
See the Solution at the end of the question.

SoLR performance issue

I am working on solr 4.2.1 jetty and we are facing some performance issues and heap memory overflow issue as well.
So I am searching the actual cause for this exceptions. Then i applied load test for different solr queries. After few mins got below errors:
WARN:oejs.Response:Committed before 500 {msg=Software caused
connection abort: socket write
Caused by: java.net.SocketException: Software caused connection abort:
socket write error
SEVERE: null:org.eclipse.jetty.io.EofException
I also tried to set the maxIdleTime to 300000 milliSeconds. But still getting same error.
Any ideas?
Please help, how to tackle this.
Thanks,
Mayur
Stack Trace:
SEVERE: null:org.eclipse.jetty.io.EofException
at org.eclipse.jetty.http.HttpGenerator.flushBuffer(HttpGenerator.java:914)
at org.eclipse.jetty.http.AbstractGenerator.blockForOutput(AbstractGenerator.java:507)
at org.eclipse.jetty.server.HttpOutput.write(HttpOutput.java:147)
at org.eclipse.jetty.server.HttpOutput.write(HttpOutput.java:107)
at sun.nio.cs.StreamEncoder.writeBytes(Unknown Source)
at sun.nio.cs.StreamEncoder.implWrite(Unknown Source)
at sun.nio.cs.StreamEncoder.write(Unknown Source)
at java.io.OutputStreamWriter.write(Unknown Source)
at org.apache.solr.util.FastWriter.flush(FastWriter.java:141)
at org.apache.solr.util.FastWriter.write(FastWriter.java:55)
at org.apache.solr.response.XMLWriter.writePrim(XMLWriter.java:356)
at org.apache.solr.response.XMLWriter.writeStr(XMLWriter.java:295)
at org.apache.solr.schema.StrField.write(StrField.java:67)
at org.apache.solr.response.TextResponseWriter.writeVal(TextResponseWriter.java:130)
at org.apache.solr.response.XMLWriter.writeSolrDocument(XMLWriter.java:199)
at org.apache.solr.response.TextResponseWriter.writeDocuments(TextResponseWriter.java:275)
at org.apache.solr.response.TextResponseWriter.writeVal(TextResponseWriter.java:172)
at org.apache.solr.response.XMLWriter.writeResponse(XMLWriter.java:111)
at org.apache.solr.response.XMLResponseWriter.write(XMLResponseWriter.java:39)
at org.apache.solr.servlet.SolrDispatchFilter.writeResponse(SolrDispatchFilter.java:627)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:358)
at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:141)
at org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1307)
at org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:453)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:137)
at org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:560)
at org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:231)
at org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1072)
at org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:382)
at org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:193)
at org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1006)
at org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:135)
at org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:255)
at org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:154)
at org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:116)
at org.eclipse.jetty.server.Server.handle(Server.java:365)
at org.eclipse.jetty.server.AbstractHttpConnection.handleRequest(AbstractHttpConnection.java:485)
at org.eclipse.jetty.server.BlockingHttpConnection.handleRequest(BlockingHttpConnection.java:53)
at org.eclipse.jetty.server.AbstractHttpConnection.headerComplete(AbstractHttpConnection.java:926)
at org.eclipse.jetty.server.AbstractHttpConnection$RequestHandler.headerComplete(AbstractHttpConnection.java:988)
at org.eclipse.jetty.http.HttpParser.parseNext(HttpParser.java:635)
at org.eclipse.jetty.http.HttpParser.parseAvailable(HttpParser.java:235)
at org.eclipse.jetty.server.BlockingHttpConnection.handle(BlockingHttpConnection.java:72)
at org.eclipse.jetty.server.bio.SocketConnector$ConnectorEndPoint.run(SocketConnector.java:264)
at org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:608)
at org.eclipse.jetty.util.thread.QueuedThreadPool$3.run(QueuedThreadPool.java:543)
at java.lang.Thread.run(Unknown Source) Caused by: java.net.SocketException: Software caused connection abort: socket
write error
at java.net.SocketOutputStream.socketWrite0(Native Method)
at java.net.SocketOutputStream.socketWrite(Unknown Source)
at java.net.SocketOutputStream.write(Unknown Source)
at org.eclipse.jetty.io.ByteArrayBuffer.writeTo(ByteArrayBuffer.java:359)
at org.eclipse.jetty.io.bio.StreamEndPoint.flush(StreamEndPoint.java:164)
at org.eclipse.jetty.io.bio.StreamEndPoint.flush(StreamEndPoint.java:194)
at org.eclipse.jetty.http.HttpGenerator.flushBuffer(HttpGenerator.java:838)
... 46 more

Resources