Size: 3342
Comment:
|
Size: 3464
Comment:
|
Deletions are marked like this. | Additions are marked like this. |
Line 15: | Line 15: |
{{{ | {{{#!highlight bash |
Line 20: | Line 20: |
{{{ | {{{#!highlight xml |
Line 32: | Line 32: |
{{{ | {{{#!highlight xml |
Line 52: | Line 52: |
{{{ | {{{#!highlight xml |
Line 64: | Line 64: |
{{{ | {{{#!highlight xml |
Line 83: | Line 83: |
{{{ | {{{#!highlight bash |
Line 88: | Line 88: |
{{{ | {{{#!highlight bash |
Line 94: | Line 94: |
{{{ | {{{#!highlight bash |
Line 115: | Line 115: |
Apache Hadoop
Apache Hadoop project develops open-source software for reliable, scalable, distributed computing.
The Apache Hadoop software library is a framework that allows for the distributed processing of large data sets across clusters of computers using simple programming models. It is designed to scale up from single servers to thousands of machines, each offering local computation and storage.
Install
~/tmp/hadoop-3.3.1/etc/hadoop/hadoop-env.sh
1 export JAVA_HOME=/home/vitor/jdk-11.0.10+9
~/tmp/hadoop-3.3.1/etc/hadoop/core-site.xml
~/tmp/hadoop-3.3.1/etc/hadoop/hdfs-site.xml
1 <?xml version="1.0" encoding="UTF-8"?>
2 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
3 <configuration>
4 <property>
5 <name>dfs.namenode.name.dir</name>
6 <value>/tmp/nameNode</value>
7 </property>
8 <property>
9 <name>dfs.namenode.data.dir</name>
10 <value>/tmp/dataNode</value>
11 </property>
12 <property>
13 <name>dfs.replication</name>
14 <value>2</value>
15 </property>
16 </configuration>
~/tmp/hadoop-3.3.1/etc/hadoop/mapred-site.xml
~/tmp/hadoop-3.3.1/etc/hadoop/yarn-site.xml
1 <?xml version="1.0"?>
2 <configuration>
3 <property>
4 <name>yarn.acl.enable</name>
5 <value>0</value>
6 </property>
7 <property>
8 <name>yarn.resourcemanager.hostname</name>
9 <value>master</value>
10 </property>
11 <property>
12 <name>yarn.nodemanager.aux-services</name>
13 <value>mapreduce_shuffle</value>
14 </property>
15 </configuration>
~/tmp/hadoop-3.3.1/etc/hadoop/slaves
1 localhost
~/.bashrc
Setup HDFS
1 ls $HADOOP_HOME/bin/
2 hdfs dfsadmin -report
3 # 2021-07-23 14:07:17,010 WARN fs.FileSystem: Failed to initialize fileystem hdfs://master:9000:
4 # java.lang.IllegalArgumentException: java.net.UnknownHostException: master
5 # report: java.net.UnknownHostException: master
6 # add 127.0.0.1 master to /etc/hosts
7 hdfs namenode -format master
8 hdfs --daemon start namenode
9 hdfs --daemon start datanode
10 yarn --daemon start resourcemanager
11 yarn --daemon start nodemanager
12 yarn --daemon start proxyserver
13 mapred --daemon start historyserver
14 hdfs dfsadmin -report
15 # http://localhost:9870/
16 # http://localhost:9870/dfshealth.html#tab-overview
17 # http://localhost:9870/explorer.html#
18 # http://localhost:8088/
19 # http://localhost:8088/cluster
20 # http://localhost:19888/
21 hadoop fs -ls /
22 hadoop fs -ls /tmp
23 hadoop fs -mkdir /test
24 hadoop fs -ls /