http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
see:
https://wuzhuti.cn/2662.html
http://zhongyaonan.com/hadoop-tutorial/setting-up-hadoop-2-6-on-mac-osx-yosemite.html
http://danqingdani.blog.163.com/blog/static/18609419520155655152609
http://kiritor.github.io/2016/04/26/Hadoop-dev-install/
#安装 jdk,下载 hadoop2.7.3
+++++++++++++++++++++++**
cd /usr/local
tar xvf hadoop-2.7.3.tar.gz
vi /usr/local/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_77.jdk/Contents/Home
export HADOOP_PREFIX=/usr/local/hadoop-2.7.3
##编辑配置文件
++++++++++++++++++
-
etc/hadoop/core-site.xml:
hadoop.tmp.dir /usr/local/hadoop-2.7.3/data/tmp fs.defaultFS hdfs://localhost:9000 -
etc/hadoop/hdfs-site.xml:
dfs.replication 1 dfs.namenode.name.dir file:/usr/local/hadoop-2.7.3/data/name dfs.datanode.data.dir file:/usr/local/hadoop-2.7.3/data/data dfs.permissions false -
etc/hadoop/mapred-site.xml:
mapreduce.framework.name yarn mapreduce.jobhistory.address localhost:10020 mapreduce.jobhistory.webapp.address localhost:19888 -
etc/hadoop/yarn-site.xml:
mapreduce.framework.name yarn yarn.nodemanager.aux-services mapreduce_shuffle
3.Execution
+++++++++++++++++++++++++++++
$ /usr/local/hadoop-2.7.3/bin/hdfs namenode -format
启动 NameNode 和 DataNode 的守护进程。
$ sbin/start-dfs.sh
启动 ResourceManager 和 NodeManager 的守护进程。
$ sbin/start-yarn.sh
browe http://localhost:50070/
$ bin/hdfs dfs -mkdir /user
$ bin/hdfs dfs -mkdir /user/lisangang
授权
/usr/local/hadoop-2.7.3/bin/hadoop fs -chown -R lisangang /home
/usr/local/hadoop-2.7.3/data
拷贝一些文件到 input 目录:
/usr/local/hadoop-2.7.3/bin/hdfs dfs -put etc/hadoop /user/lisangang
运行样例:
/usr/local/hadoop-2.7.3/bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep /user/lisangang output 'dfs[a-z.]+'
在 localhost:50070 中的 Utilities 标签下找到/user/root 目录,下载 part-r-00000 文件,
localhost:hadoop-2.7.3 root# /usr/local/hadoop-2.7.3/bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar pi 2 5
Number of Maps = 2
Samples per Map = 5
16/10/11 11:56:22 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Wrote input for Map #0
Wrote input for Map #1
http://localhost:8088 Cluster Status 这个接口非常有用
http://localhost:50070/ HDFS status
http://localhost:50090 secondaryNamenode
pom 工程
++++++++++++++++++++++++++++++++
mvn archetype:generate -DgroupId=com.benzuo -DartifactId=hadoop_study -DarchetypeArtifactId=maven-archetype-quickstart -DinteractiveMode=false
编辑 pom.xml 增加依赖。
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.benzuo</groupId>
<artifactId>hadoop_study</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>hadoop_study</name>
<url>http://maven.apache.org</url>
<properties>
<hadoop-common.version>2.7.3</hadoop-common.version>
<hadoop-hdfs.version>2.7.3</hadoop-hdfs.version>
<hadoop-client.version>2.7.3</hadoop-client.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop-common.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop-hdfs.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop-client.version}</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>${basedir}/src/main/resources</directory>
</resource>
<resource>
<directory>${basedir}/src/main/resources-${environment}</directory>
</resource>
</resources>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<encoding>UTF-8</encoding>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-resources-plugin</artifactId>
<version>2.5</version>
<configuration>
<encoding>UTF-8</encoding>
<overwrite>true</overwrite>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<profiles>
<profile>
<id>test</id>
<properties>
<environment>test</environment>
</properties>
</profile>
<profile>
<id>product</id>
<properties>
<environment>product</environment>
</properties>
</profile>
</profiles>
</project>
执行一个 worldcount 例子
package com.benzuo;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class NameCount {
public static class MyMapper extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text name= new Text();
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
int idx = value.toString().indexOf(" ");
if (idx > 0) {
String e = value.toString().substring(0, idx);
name.set(e);
context.write(name, one);
}
}
}
public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: NameCount <in> <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "name count");
job.setJarByClass(NameCount.class);
job.setMapperClass(MyMapper.class);
job.setCombinerClass(MyReducer.class);
job.setReducerClass(MyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
+++++++++++
mvn clean package
将文本放入到 /home/lisangang/data 下
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/lisangang
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/lisangang/data
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/lisangang/data/input
/usr/local/hadoop-2.7.3/bin/hadoop fs -chown -R lisangang /home
拷贝一些文件到 input 目录:
/usr/local/hadoop-2.7.3/bin/hdfs dfs -put /usr/local/hadoop-2.7.3/etc/hadoop /home/lisangang/data/input
通过 maven 将打成的 jar 放在 hadoop 安装目录:share/hadoop/mapreduce/下.
sudo cp target/hadoop_study-1.0-SNAPSHOT.jar /usr/local/hadoop-2.7.3/share/hadoop/mapreduce/
/usr/local/hadoop-2.7.3/bin/hadoop jar /usr/local/hadoop-2.7.3/share/hadoop/mapreduce/hadoop_study-1.0-SNAPSHOT.jar com.benzuo.NameCount /home/lisangang/data/input/hadoop /home/lisangang/data/output
开启 debug 模式
export HADOOP_ROOT_LOGGER=DEBUG,console
查看 job
/usr/local/hadoop-2.7.3/bin/hadoop job -list
杀 job
/usr/local/hadoop-2.7.3/bin/hadoop job -kill job_1476238795797_0001
欢迎来到这里!
我们正在构建一个小众社区,大家在这里相互信任,以平等 • 自由 • 奔放的价值观进行分享交流。最终,希望大家能够找到与自己志同道合的伙伴,共同成长。
注册 关于