java - Error while importing data from MongoDb to Hdfs -
i'm trying import documents of collection in mongodb hdfs through mapreduce job. using old api. driver code
package my.pac; import org.apache.hadoop.conf.configured; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.text; import org.apache.hadoop.mapred.fileoutputformat; import org.apache.hadoop.mapred.jobclient; import org.apache.hadoop.mapred.jobconf; import org.apache.hadoop.mapred.textoutputformat; import org.apache.hadoop.util.tool; import org.apache.hadoop.util.toolrunner; import com.mongodb.hadoop.mapred.mongoinputformat; import com.mongodb.hadoop.util.mongoconfigutil; public class importdriver extends configured implements tool { public static void main(string[] args) throws exception { int exitcode = toolrunner.run(new importdriver(), args); system.exit(exitcode); } @override public int run(string[] args) throws exception { jobconf conf = new jobconf(); mongoconfigutil.setinputuri(conf,"mongodb://127.0.0.1:27017/sampledb.shows"); conf.setjarbyclass(importdriver.class); conf.addresource(new path("/usr/lib/hadoop/hadoop-1.2.1/conf/core-site.xml")); conf.addresource(new path("/usr/lib/hadoop/hadoop-1.2.1/conf/hdfs-site.xml")); fileoutputformat.setoutputpath(conf, new path(args[0])); conf.setinputformat(mongoinputformat.class); conf.setoutputformat(textoutputformat.class); conf.setmapperclass(importmapper.class); conf.setmapoutputkeyclass(text.class); conf.setmapoutputkeyclass(text.class); jobclient.runjob(conf); return 0; } }
this mapper code:
package my.pac; import java.io.ioexception; import org.apache.hadoop.io.text; import org.apache.hadoop.mapred.mapreducebase; import org.apache.hadoop.mapred.mapper; import org.apache.hadoop.mapred.outputcollector; import org.apache.hadoop.mapred.reporter; import org.bson.bsonobject; import com.mongodb.hadoop.io.bsonwritable; public class importmapper extends mapreducebase implements mapper<bsonwritable, bsonwritable, text, text>{ @override public void map(bsonwritable key, bsonwritable value, outputcollector<text, text> o, reporter arg3) throws ioexception { string val = ((bsonobject) value).get("_id").tostring(); system.out.println(val); o.collect( new text(val), new text(val)); } }
i using
- ubuntu-14.0
- hadoop-1.2.1
- mongodb-3.0.4
i have added following jars:
- mongo-2.9.3.jar
- mongo-hadoop-core-1.3.0.jar
- mongo-java-driver-2.13.2.jar
when run this, getting error :
java.lang.exception: java.lang.classcastexception: com.mongodb.hadoop.io.bsonwritable cannot cast org.bson.bsonobject at org.apache.hadoop.mapred.localjobrunner$job.run(localjobrunner.java:354) caused by: java.lang.classcastexception: com.mongodb.hadoop.io.bsonwritable cannot cast org.bson.bsonobject at my.pac.importmapper.map(importmapper.java:18) at my.pac.importmapper.map(importmapper.java:1) at org.apache.hadoop.mapred.maprunner.run(maprunner.java:50) at org.apache.hadoop.mapred.maptask.runoldmapper(maptask.java:430) at org.apache.hadoop.mapred.maptask.run(maptask.java:366) at org.apache.hadoop.mapred.localjobrunner$job$maptaskrunnable.run(localjobrunner.java:223) at java.util.concurrent.executors$runnableadapter.call(executors.java:511) at java.util.concurrent.futuretask.run(futuretask.java:266) at java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1142) at java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:617) at java.lang.thread.run(thread.java:745)
how can rectify this?
you may have outdated driver in classpath that's causing conflict in read preference settings.
see below links similar issues: https://jira.mongodb.org/browse/java-849
if doesn't help, https://jira.talendforge.org/browse/tbd-1002 suggests may need re-run mongodb or use separate connection.
Comments
Post a Comment