Installing Hadoop Single cluster Node – Using source to compile

OS Version: Ubuntu 14.04.2 Server version

Java Version: 1.7.0_79

Hadoop Version:

1. Install Java
sudo apt-get install default-java

java -version
java version “1.7.0_79”
OpenJDK Runtime Environment (IcedTea 2.5.5) (7u79-2.5.5-0ubuntu0.14.04.2)
OpenJDK 64-Bit Server VM (build 24.79-b02, mixed mode)

cd ~user1
vi .bashrc
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH

2. >>>Download and install protbuf, other tools required for compilation
curl -# -O https://protobuf.googlecode.com/files/protobuf-2.5.0.tar.gz
gunzip protobuf-2.5.0.tar.gz
tar -xvf protobuf-2.5.0.tar
cd protobuf-2.5.0/
sudo ./configure -prefix=/usr
sudo make
sudo make install
cd java
mvn install
mvn package

sudo apt-get install -y gcc g++ make maven cmake zlib zliblg-dev libcurl4-openssl-dev
Note: zlib and zliblg-dev already installed
sudo apt-get install -y gcc g++ make maven cmake libcurl4-openssl-dev

3. >>>Download hadoop 2.6.0 source from Apache hadoop mirror site.
wget http://mirror.nus.edu.sg/apache/hadoop/common/stable/hadoop-2.6.0-src.tar.gz
sudo gunzip hadoop-2.6.0-src.tar.gz
sudo tar -xvf hadoop-2.6.0-src.tar
cd hadoop-2.6.0-src/

4. >>>Compile the source
cd /home/user1/hadoop-2.6.0-src/
mvn clean install -DskipTests
cd hadoop-mapreduce-project/
export Platform=x64
mvn clean install assembly:assembly -Pnative
mvn package -Pdist,native -DskipTests=true -Dtar

This will create binaries and tar file under
cd /home/user1/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0/
Set hadoop Path
sudo ln -s /home/user1/hadoop-2.6.0-src/hadoop-dist/target/hadoop-2.6.0 /usr/local/hadoop
sudo vi /etc/environment

5. >>>Configuring Hadoop
>>>cd to hadoop root folder
user1@Master:~$ cd /usr/local/hadoop
user1@Master:/usr/local/hadoop$ ls
bin  conf  etc  include  lib  libexec  LICENSE.txt  NOTICE.txt  README.txt  sbin  share

>>>Create folder /app/hadoop/tmp to create hadoop metadata
user1@Master:/usr/local/hadoop/conf$ sudo mkdir -p /app/hadoop/tmp
user1@Master:/usr/local/hadoop/conf$ sudo chown user1 -R /app
user1@Master:/usr/local/hadoop/conf$ ls -ld /app
drwxr-xr-x 3 user1 root 4096 Jun 30 00:34 /app

>>>Configure hadoop by creating following configuration files
user1@Master:/usr/local/hadoop$ cd conf/
user1@Master:/usr/local/hadoop/conf$ vi core-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
<description>A base for other temporary directories.</description>
</property>

<property>
<name>fs.default.name</name>
<value>hdfs://master:54310/</value>
</property>
</configuration>

user1@Master:/usr/local/hadoop/conf$ vi map-red-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>master:54311</value>
<description>The host and port that the MapReduce job tracker runs
at.  If “local”, then jobs are run in-process as a single map
and reduce task.
</description>
</property>
</configuration>

user1@Master:/usr/local/hadoop/conf$ vi hdfs-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>Default block replication.
The actual number of replications can be specified when the file is created.
    The default of 3 is used if replication is not specified.
    </description>
</property>
</configuration>

user1@Master:/usr/local/hadoop/conf$ vi yarn-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.dispatcher.exit-on-error</name>
<value>true</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
      $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
      $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
      $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
      $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*
</value>
</property>

<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>master:8034</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
</configuration>

user1@Master:/usr/local/hadoop/conf$ vi capacity-scheduler-site.xml

<?xml version=”1.0″ encoding=”UTF-8″?>
<?xml-stylesheet type=”text/xsl” href=”configuration.xsl”?>
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.1</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>-1</value>
</property>
</configuration>

user1@Master:/usr/local/hadoop/conf$ vi hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=/usr/local/hadoop/conf
export HADOOP_OPTS=-Djava.net.preferIPv4Stack=true
export HADOOP_COMMON_HOME=/usr/local/hadoop
export HADOOP_HDFS_HOME=/usr/local/hadoop
export HADOO_MAPRED_HOME=/usr/local/hadoop
export HADOOP_YARN_HOME=/usr/local/hadoop
export YARN_CONF_DIR=/usr/local/hadoop/conf

user1@Master:/usr/local/hadoop/conf$ cp hadoop-env.sh yarn-env.sh

>>>We are building single node hadoop cluster, hence declare master node itself act as data node.

user1@Master:/usr/local/hadoop/conf$ hostname -f
Master
user1@Master:/usr/local/hadoop/conf$ vi slaves

6. >>>Create the HDFS file system

user1@Master:/usr/local/hadoop/conf$ hdfs namenode -format

7. >>>Start the hadoop service and list the services with jps

user1@Master:~$ jps
2475 NodeManager
1875 NameNode
2550 Jps
2208 SecondaryNameNode
2028 DataNode

Advertisements

Author: rajukv

Hadoop(BigData) Architect and Hadoop Security Architect can design and build hadoop system to meet various data science projects.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s