You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
During the Spark-SolrJ index with below configuration on 70MM parquet datasets, throws this error --
"java.lang.NoClassDefFoundError: org/apache/commons/httpclient/NoHttpResponseException+details"
any help is appreciated on configuration on Solr side?
val numBatchSize = "5000" val numCoalSize = 100
var writeToSolrOpts = Map("zkhost" -> zkHost, "collection" -> solrCollection, "batch_size" -> numBatchSize)
data.coalesce(numCoalSize).write.format("solr").options(writeToSolrOpts).mode(SaveMode.Overwrite).save
commit(zkHost, solrCollection)
java.lang.NoClassDefFoundError: org/apache/commons/httpclient/NoHttpResponseException
at com.lucidworks.spark.util.SolrSupport$.shouldRetry(SolrSupport.scala:422)
at com.lucidworks.spark.util.SolrSupport$.sendBatchToSolr(SolrSupport.scala:388)
at com.lucidworks.spark.util.SolrSupport$.sendBatchToSolrWithRetry(SolrSupport.scala:348)
at com.lucidworks.spark.util.SolrSupport$$anonfun$indexDocs$1.apply(SolrSupport.scala:327)
at com.lucidworks.spark.util.SolrSupport$$anonfun$indexDocs$1.apply(SolrSupport.scala:316)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
The text was updated successfully, but these errors were encountered:
Hi --
During the Spark-SolrJ index with below configuration on 70MM parquet datasets, throws this error --
"java.lang.NoClassDefFoundError: org/apache/commons/httpclient/NoHttpResponseException+details"
any help is appreciated on configuration on Solr side?
val numBatchSize = "5000"
val numCoalSize = 100
var writeToSolrOpts = Map("zkhost" -> zkHost, "collection" -> solrCollection, "batch_size" -> numBatchSize)
data.coalesce(numCoalSize).write.format("solr").options(writeToSolrOpts).mode(SaveMode.Overwrite).save
commit(zkHost, solrCollection)
java.lang.NoClassDefFoundError: org/apache/commons/httpclient/NoHttpResponseException
at com.lucidworks.spark.util.SolrSupport$.shouldRetry(SolrSupport.scala:422)
at com.lucidworks.spark.util.SolrSupport$.sendBatchToSolr(SolrSupport.scala:388)
at com.lucidworks.spark.util.SolrSupport$.sendBatchToSolrWithRetry(SolrSupport.scala:348)
at com.lucidworks.spark.util.SolrSupport$$anonfun$indexDocs$1.apply(SolrSupport.scala:327)
at com.lucidworks.spark.util.SolrSupport$$anonfun$indexDocs$1.apply(SolrSupport.scala:316)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$28.apply(RDD.scala:935)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2121)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:121)
at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
The text was updated successfully, but these errors were encountered: