最后一次hadoop作业与hive有关,在本地部署需要安装的东西很多比较麻烦,因此在服务器上练习一下相关环境的配置。
1. 安装mysql
1
2
3
4
|
sudo apt-get update
sudo apt-get install mysql-server
mysql_secure_installation
sudo systemctl start mysql
|
启动mysql
设置密码
1
|
ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY '123qwe12';
|
密码为123456
2. hadoop环境
2.1 java
1
|
sudo apt install openjdk-8-jdk -y
|
2.2 创建用户
1
2
3
|
sudo adduser hadoop
usermod -aG sudo hadoop # 添加sudo权限
su - hadoop
|
2.3 配置ssh
1
2
3
4
|
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
ssh localhost
|
2.4 下载hadoop
1
2
|
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz
tar xzf hadoop-3.2.1.tar.gz
|
2.5 设置环境变量
添加如下内容
1
2
3
4
5
6
7
8
9
10
|
#Hadoop Related Options
export HADOOP_HOME=/home/hadoop/hadoop-3.2.1
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
|
启用新的环境变量
2.6 设置JDK环境变量
1
|
sudo nano $HADOOP_HOME/etc/hadoop/hadoop-env.sh
|
添加
1
|
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
|
2.7 编辑xml文件
2.7.1 core-site.xml
1
|
sudo nano $HADOOP_HOME/etc/hadoop/core-site.xml
|
插入如下内容
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmpdata</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://127.0.0.1:9000</value>
</property>
</configuration>
2.7.2 hdfs-site.xml
1
|
sudo nano $HADOOP_HOME/etc/hadoop/hdfs-site.xml
|
插入如下内容
<configuration>
<property>
<name>dfs.data.dir</name>
<value>/home/hadoop/dfsdata/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/hadoop/dfsdata/datanode</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
2.7.3 mapred-site.xml
1
|
sudo nano $HADOOP_HOME/etc/hadoop/mapred-site.xml
|
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
2.7.4 yarn-site.xml
1
|
sudo nano $HADOOP_HOME/etc/hadoop/yarn-site.xml
|
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>127.0.0.1</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
启动集群
1
2
|
cd hadoop-3.2.1/sbin
./start-dfs.sh
|
3. 安装hive
3.1 下载并解压
1
2
|
wget https://mirrors.bfsu.edu.cn/apache/hive/hive-3.1.2/apache-hive-3.1.2-bin.tar.gz
tar -xzvf apache-hive-3.1.2-bin.tar.gz
|
3.2 设置环境变量
添加如下内容
1
2
|
export HIVE_HOME=/home/hadoop/apache-hive-3.1.2-bin
export PATH=$HIVE_HOME/bin:$PATH
|
启用新的环境变量
3.3 添加hdfs文件
1
2
3
4
5
6
|
hadoop fs -mkdir /tmp
hadoop fs -mkdir /user
hadoop fs -mkdir /user/hive
hadoop fs -mkdir /user/hive/warehouse
hadoop fs -chmod g+w /tmp
hadoop fs -chmod g+w /user/hive/warehouse
|
3.4 初始化hive
1
|
bin/schematool -dbType derby -initSchema
|
3.5 启动hive
3. 安装sqoop
3.1 下载并解压
1
2
|
wget https://mirrors.tuna.tsinghua.edu.cn/apache/sqoop/1.4.7/sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
tar -xvf sqoop-1.4.7.bin__hadoop-2.6.0.tar.gz
|
3.2 添加环境变量
添加如下内容
1
2
|
export SQOOP_HOME=/home/hadoop/sqoop-1.4.7.bin__hadoop-2.6.0
export PATH=$PATH:$SQOOP_HOME/bin
|
启用新的环境变量
3.3 配置sqoop
1
2
3
|
cd $SQOOP_HOME/conf
mv sqoop-env-template.sh sqoop-env.sh
nano sqoop-env.sh
|
写入如下内容:
1
2
3
|
export HADOOP_COMMON_HOME=/home/hadoop/hadoop-3.2.1
export HADOOP_MAPRED_HOME=/home/hadoop/hadoop-3.2.1
export HIVE_HOME=/home/hadoop/apache-hive-3.1.2-bin
|
3.4 配置mysql插件
1
2
3
4
5
6
|
wget https://dev.mysql.com/get/Downloads/Connector-J/mysql-connector-java-8.0.20.zip
unzip mysql-connector-java-8.0.20.zip
cp mysql-connector-java-8.0.20/mysql-connector-java-8.0.20.jar $SQOOP_HOME/lib
wget http://mirrors.tuna.tsinghua.edu.cn/apache//commons/lang/binaries/commons-lang-2.6-bin.zip
unzip commons-lang-2.6-bin.zip
cp commons-lang-2.6/commons-lang-2.6.jar $SQOOP_HOME/lib
|
3.5 测试mysql连接
1
2
3
|
sqoop list-databases \
--connect jdbc:mysql://localhost/ \
--username root
|
配置过程还是有一点麻烦的,hive和sqoop配置的时候出现了一点bug,但是在网上都能找到解决方案,hadoop这门课终于结束了,希望能够拿到理想的分数!