http://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.7.3/hadoop-2.7.3.tar.gz
see:
https://wuzhuti.cn/2662.html
http://zhongyaonan.com/hadoop-tutorial/setting-up-hadoop-2-6-on-mac-osx-yosemite.html
http://danqingdani.blog.163.com/blog/static/18609419520155655152609
http://kiritor.github.io/2016/04/26/Hadoop-dev-install/
#安装 jdk,下载 hadoop2.7.3
+++++++++++++++++++++++**
cd /usr/local
tar xvf hadoop-2.7.3.tar.gz
vi /usr/local/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.8.0_77.jdk/Contents/Home
export HADOOP_PREFIX=/usr/local/hadoop-2.7.3
##编辑配置文件
++++++++++++++++++
-
etc/hadoop/core-site.xml:
hadoop.tmp.dir /usr/local/hadoop-2.7.3/data/tmp fs.defaultFS hdfs://localhost:9000 -
etc/hadoop/hdfs-site.xml:
dfs.replication 1 dfs.namenode.name.dir file:/usr/local/hadoop-2.7.3/data/name dfs.datanode.data.dir file:/usr/local/hadoop-2.7.3/data/data dfs.permissions false -
etc/hadoop/mapred-site.xml:
mapreduce.framework.name yarn mapreduce.jobhistory.address localhost:10020 mapreduce.jobhistory.webapp.address localhost:19888 -
etc/hadoop/yarn-site.xml:
mapreduce.framework.name yarn yarn.nodemanager.aux-services mapreduce_shuffle
3.Execution
+++++++++++++++++++++++++++++
$ /usr/local/hadoop-2.7.3/bin/hdfs namenode -format
启动 NameNode 和 DataNode 的守护进程。
$ sbin/start-dfs.sh
启动 ResourceManager 和 NodeManager 的守护进程。
$ sbin/start-yarn.sh
browe http://localhost:50070/
$ bin/hdfs dfs -mkdir /user
$ bin/hdfs dfs -mkdir /user/lisangang
授权
/usr/local/hadoop-2.7.3/bin/hadoop fs -chown -R lisangang /home
/usr/local/hadoop-2.7.3/data
拷贝一些文件到 input 目录:
/usr/local/hadoop-2.7.3/bin/hdfs dfs -put etc/hadoop /user/lisangang
运行样例:
/usr/local/hadoop-2.7.3/bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep /user/lisangang output 'dfs[a-z.]+'
在 localhost:50070 中的 Utilities 标签下找到/user/root 目录,下载 part-r-00000 文件,
localhost:hadoop-2.7.3 root# /usr/local/hadoop-2.7.3/bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar pi 2 5 Number of Maps = 2 Samples per Map = 5 16/10/11 11:56:22 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Wrote input for Map #0 Wrote input for Map #1
http://localhost:8088 Cluster Status 这个接口非常有用
http://localhost:50070/ HDFS status
http://localhost:50090 secondaryNamenode
pom 工程
++++++++++++++++++++++++++++++++
mvn archetype:generate -DgroupId=com.benzuo -DartifactId=hadoop_study -DarchetypeArtifactId=maven-archetype-quickstart -DinteractiveMode=false
编辑 pom.xml 增加依赖。
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.benzuo</groupId> <artifactId>hadoop_study</artifactId> <packaging>jar</packaging> <version>1.0-SNAPSHOT</version> <name>hadoop_study</name> <url>http://maven.apache.org</url> <properties> <hadoop-common.version>2.7.3</hadoop-common.version> <hadoop-hdfs.version>2.7.3</hadoop-hdfs.version> <hadoop-client.version>2.7.3</hadoop-client.version> </properties> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop-common.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop-hdfs.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop-client.version}</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> </dependencies> <build> <resources> <resource> <directory>${basedir}/src/main/resources</directory> </resource> <resource> <directory>${basedir}/src/main/resources-${environment}</directory> </resource> </resources> <pluginManagement> <plugins> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-compiler-plugin</artifactId> <version>2.3.2</version> <configuration> <encoding>UTF-8</encoding> <source>1.7</source> <target>1.7</target> </configuration> </plugin> <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-resources-plugin</artifactId> <version>2.5</version> <configuration> <encoding>UTF-8</encoding> <overwrite>true</overwrite> </configuration> </plugin> </plugins> </pluginManagement> </build> <profiles> <profile> <id>test</id> <properties> <environment>test</environment> </properties> </profile> <profile> <id>product</id> <properties> <environment>product</environment> </properties> </profile> </profiles> </project>
执行一个 worldcount 例子
package com.benzuo;
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class NameCount { public static class MyMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text name= new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { int idx = value.toString().indexOf(" "); if (idx > 0) { String e = value.toString().substring(0, idx); name.set(e); context.write(name, one); } } } public static class MyReducer extends Reducer<Text,IntWritable,Text,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: NameCount <in> <out>"); System.exit(2); } Job job = Job.getInstance(conf, "name count"); job.setJarByClass(NameCount.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }
+++++++++++
mvn clean package
将文本放入到 /home/lisangang/data 下
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/lisangang
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/lisangang/data
/usr/local/hadoop-2.7.3/bin/hadoop fs -mkdir /home/lisangang/data/input
/usr/local/hadoop-2.7.3/bin/hadoop fs -chown -R lisangang /home
拷贝一些文件到 input 目录:
/usr/local/hadoop-2.7.3/bin/hdfs dfs -put /usr/local/hadoop-2.7.3/etc/hadoop /home/lisangang/data/input
通过 maven 将打成的 jar 放在 hadoop 安装目录:share/hadoop/mapreduce/下.
sudo cp target/hadoop_study-1.0-SNAPSHOT.jar /usr/local/hadoop-2.7.3/share/hadoop/mapreduce/
/usr/local/hadoop-2.7.3/bin/hadoop jar /usr/local/hadoop-2.7.3/share/hadoop/mapreduce/hadoop_study-1.0-SNAPSHOT.jar com.benzuo.NameCount /home/lisangang/data/input/hadoop /home/lisangang/data/output
开启 debug 模式
export HADOOP_ROOT_LOGGER=DEBUG,console
查看 job
/usr/local/hadoop-2.7.3/bin/hadoop job -list
杀 job
/usr/local/hadoop-2.7.3/bin/hadoop job -kill job_1476238795797_0001
欢迎来到这里!
我们正在构建一个小众社区,大家在这里相互信任,以平等 • 自由 • 奔放的价值观进行分享交流。最终,希望大家能够找到与自己志同道合的伙伴,共同成长。
注册 关于