关于hadoop的问题“java.lang.RuntimeException: java.lang.ClassNotFoundException:”

Question

提问by hao

Here's my source code

这是我的源代码

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class PageRank {

public static final String MAGIC_STRING = ">>>>";
boolean overwrite = true;

PageRank(boolean overwrite){
    this.overwrite = overwrite;
}
public static class TextPair implements WritableComparable<TextPair>{
    Text x;
    int ordering;

    public TextPair(){
        x = new Text();
        ordering = 1;
    }

    public void setText(Text t, int o){
        x = t;
        ordering = o;
    }

    public void setText(String t, int o){
        x.set(t);
        ordering = o;
    }


    public void readFields(DataInput in) throws IOException {
        x.readFields(in);
        ordering = in.readInt();
    }


    public void write(DataOutput out) throws IOException {
        x.write(out);
        out.writeInt(ordering);
    }


    public int hashCode() {
        return x.hashCode();
    }


    public int compareTo(TextPair o) {
        int x = this.x.compareTo(o.x);
        if(x==0)
            return ordering-o.ordering;
        else
            return x;
    }
}

public static class MapperA extends Mapper<LongWritable, Text, TextPair, Text> {

private Text word = new Text();
Text title = new Text();
Text link = new Text();
TextPair textpair = new TextPair();

boolean start=false;
String currentTitle="";
private Pattern linkPattern = Pattern.compile("\[\[\s*(.+?)\s*\]\]");
private Pattern titlePattern = Pattern.compile("<title>\s*(.+?)\s*</title>");
private Pattern pagePattern = Pattern.compile("&ltpage&gt\s*(.+?)\s*&lt/page&gt");


public void map(LongWritable key, Text value,  Context context) throws IOException, InterruptedException {
    String line = value.toString();
    int startPage=line.lastIndexOf("<title>");  

    if(startPage<0)
    {           
        Matcher matcher = linkPattern.matcher(line);                
        int n = 0;
        title.set(currentTitle);
        while(matcher.find()){
            textpair.setText(matcher.group(1), 1);
            context.write(textpair, title);
        }
        link.set(MAGIC_STRING);     
        textpair.setText(title.toString(), 0);
        context.write(textpair, link);
    } 
    else
    {           
        String result=line.trim();
        Matcher titleMatcher = titlePattern.matcher(result);            
        if(titleMatcher.find()){
            currentTitle = titleMatcher.group(1);
        }
        else
        {
            currentTitle=result;
        }               
        }    
   }
    } 

   public static class ReducerA extends Reducer<TextPair, Text, Text, Text>{
    Text aw = new Text();
    boolean valid = false;
    String last = "";

    public void run(Context context) throws IOException, InterruptedException {
        setup(context);
        while (context.nextKeyValue()) {
            TextPair key = context.getCurrentKey();
            Text value = context.getCurrentValue();
            if(key.ordering==0){
                last = key.x.toString();
            }
            else if(key.x.toString().equals(last)){
                context.write(key.x, value);
            }
        }
        cleanup(context);
         }
               }

  public static class MapperB extends Mapper<Text, Text, Text, Text>{
Text t = new Text();        
public void map(Text key, Text value, Context context) throws InterruptedException, IOException{
    context.write(value, key);
}
 }

   public static class ReducerB extends Reducer<Text, Text, Text, PageRankRecord>{
    ArrayList<String> q = new ArrayList<String>();

    public void reduce(Text key, Iterable<Text> values, Context context)throws InterruptedException, IOException{
        q.clear();
        for(Text value:values){
            q.add(value.toString());
        }

        PageRankRecord prr = new PageRankRecord();
        prr.setPageRank(1.0);

        if(q.size()>0){
            String[] a = new String[q.size()];
            q.toArray(a);

            prr.setlinks(a);
        }
        context.write(key, prr);
    }
}

public boolean roundA(Configuration conf, String inputPath, String outputPath, boolean overwrite) throws IOException, InterruptedException, ClassNotFoundException{
    if(FileSystem.get(conf).exists(new Path(outputPath))){
        if(overwrite){
            FileSystem.get(conf).delete(new Path(outputPath), true);
            System.err.println("The target file is dirty, overwriting!");
        }
        else
            return true;
    }

    Job job = new Job(conf, "closure graph build round A");

    //job.setJarByClass(GraphBuilder.class);
    job.setMapperClass(MapperA.class);
    //job.setCombinerClass(RankCombiner.class);
    job.setReducerClass(ReducerA.class);

    job.setMapOutputKeyClass(TextPair.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(30);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job.waitForCompletion(true);
}

public boolean roundB(Configuration conf, String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{
    if(FileSystem.get(conf).exists(new Path(outputPath))){
        if(overwrite){
            FileSystem.get(conf).delete(new Path(outputPath), true);
            System.err.println("The target file is dirty, overwriting!");
        }
        else
            return true;
    }

    Job job = new Job(conf, "closure graph build round B");

    //job.setJarByClass(PageRank.class);
    job.setMapperClass(MapperB.class);
    //job.setCombinerClass(RankCombiner.class);
    job.setReducerClass(ReducerB.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(PageRankRecord.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(30);

    SequenceFileInputFormat.addInputPath(job, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job.waitForCompletion(true);
}

public boolean build(Configuration conf, String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{

    System.err.println(inputPath);
    if(roundA(conf, inputPath, "cgb", true)){           
        return roundB(conf, "cgb", outputPath);
    }
    else
        return false;
}   

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException{
    Configuration conf = new Configuration();       
            //PageRanking.banner("ClosureGraphBuilder");
    PageRank cgb = new PageRank(true);
    cgb.build(conf, args[0], args[1]);
}


 }

Here's how i compile and run

这是我编译和运行的方式

javac -classpath hadoop-0.20.1-core.jar -d pagerank_classes PageRank.java PageRankRecord.java

jar -cvf pagerank.jar -C pagerank_classes/ .

bin/hadoop jar pagerank.jar PageRank pagerank result

but I am getting the following errors:

但我收到以下错误：

 INFO mapred.JobClient: Task Id : attempt_201001012025_0009_m_000001_0, Status : FAILED
java.lang.RuntimeException: java.lang.ClassNotFoundException: PageRank$MapperA

Can someone tell me whats wrong

谁能告诉我怎么了

Thanks

谢谢

Answer 1

回答by Sami A. Haija

If you are using the 0.2.0 hadoop (want to use the non-deprecated classes), you can do:

如果您使用的是 0.2.0 hadoop（想使用未弃用的类），您可以执行以下操作：

public int run(String[] args) throws Exception {
    Job job = new Job();
    job.setJarByClass(YourMapReduceClass.class);  // <-- omitting this causes above error

    job.setMapperClass(MyMapper.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.waitForCompletion(true);
    return 0;
}

Answer 2

回答by ZoFreX

Did "PageRank$MapperA.class" end up inside that jar file? It should be in the same place as "PageRank.class".

“PageRank$MapperA.class”是否最终出现在那个 jar 文件中？它应该与“PageRank.class”在同一个地方。

Answer 3

回答by Victor

Try to add "--libjars pagerank.jar". Mapper and reducer are running across machines, thus you need to distribute your jar to every machine. "--libjars" helps to do that.

尝试添加“--libjars pagerank.jar”。Mapper 和 reducer 跨机器运行，因此您需要将 jar 分发到每台机器。“--libjars”有助于做到这一点。

Answer 4

回答by sebcore

For the HADOOP_CLASSPATH you should specify the folder where the JAR file is located...

对于 HADOOP_CLASSPATH，您应该指定 JAR 文件所在的文件夹...

If you want to understand how the classpath works: http://download.oracle.com/javase/6/docs/technotes/tools/windows/classpath.html

如果您想了解类路径的工作原理：http: //download.oracle.com/javase/6/docs/technotes/tools/windows/classpath.html

Answer 5

回答by krishnan

If you are using ECLIPSE for generating jar then use "Extract generated libraries into generated JAR" option.

如果您使用 ECLIPSE 生成 jar，请使用“将生成的库提取到生成的 JAR”选项。

Answer 6

回答by ??V??? Rā????

Though MapReduce program is parallel processing. Mapper, Combiner and Reducer class has sequence flow. Have to wait for completing each flow depends on other class so need job.waitForCompletion(true);But It must to set input and output path before starting Mapper, Combiner and Reducer class. Reference

虽然 MapReduce 程序是并行处理的。Mapper、Combiner 和 Reducer 类具有序列流。必须等待完成每个流依赖于其他类所以需要job.waitForCompletion(true);但它必须在启动 Mapper、Combiner 和 Reducer 类之前设置输入和输出路径。参考

Solution for this already answered in https://stackoverflow.com/a/38145962/3452185

https://stackoverflow.com/a/38145962/3452185已经回答了这个问题的解决方案

Answer 7

回答by Ravi

I guess you should change your HADOOP_CLASSPATHvariable, so that it points to the jar file.

我想您应该更改HADOOP_CLASSPATH变量，使其指向 jar 文件。

e.g. HADOOP_CLASSPATH=<what ever the path>/PageRank.jaror something like that.

例如HADOOP_CLASSPATH=<what ever the path>/PageRank.jar或类似的东西。

关于hadoop的问题“java.lang.RuntimeException: java.lang.ClassNotFoundException:”

提问by hao

回答by Sami A. Haija

回答by ZoFreX

回答by Victor

回答by sebcore

回答by krishnan

回答by ??V??? Rā????

回答by Ravi

相关推荐

最近更新

标签

关于hadoop的问题“java.lang.RuntimeException: java.lang.ClassNotFoundException:”

提问by hao

回答by Sami A. Haija

回答by ZoFreX

回答by Victor

回答by sebcore

回答by krishnan

回答by ??V??? Rā????

回答by Ravi

相关推荐

System.currentTimeMillis () (Java) 的时间错误

java 在 Ant 脚本中将命令行参数传递给 JAR

如何在 C++ 中编写具有多个数据字段的类 Java 枚举类？

java 更改默认 JLabel 字体

相关推荐

最近更新

标签