mirror of
https://gitee.com/openeuler/A-Tune.git
synced 2025-12-06 16:09:25 +08:00
spark auto-deployment and tuning
This commit is contained in:
@@ -1,73 +0,0 @@
|
|||||||
## Spark 自动化部署和HiBench基准性能测试
|
|
||||||
|
|
||||||
### **工作流程**
|
|
||||||
|
|
||||||
一、**Spark 自动化部署流程**
|
|
||||||
|
|
||||||
1. 安装 gcc make curl wget samba git
|
|
||||||
2. 关闭防火墙并启动nmbd.service
|
|
||||||
3. 配置本机免密登录
|
|
||||||
4. 安装Java和配置Java 环境
|
|
||||||
5. 安装Hadoop并配置Hadoop环境,格式化namenode并启动hdfs和yarn
|
|
||||||
6. 安装Spark并配置Spark环境,启动Spark master和worker守护进程
|
|
||||||
|
|
||||||
二、**HiBench 自动化部署流程**
|
|
||||||
|
|
||||||
1. 安装python2
|
|
||||||
2. 安装Maven并配置Maven环境变量和设置maven仓库国内镜像
|
|
||||||
3. 为Spark Benchmark下载并编译、配置HiBench
|
|
||||||
|
|
||||||
三、**执行 Benchmark**
|
|
||||||
|
|
||||||
1. 准备工作
|
|
||||||
2. 执行测试
|
|
||||||
|
|
||||||
## 开始操作
|
|
||||||
|
|
||||||
**前提条件**
|
|
||||||
|
|
||||||
将本目录及所有文件和子目录放到您的主机所在的目录,然后在`/etc/hosts`中添加`ip hostname`,用`ip addr`查看本机ip,`hostname`命令查看本机名称,例如:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
那么`/etc/hosts`中应该添加`192.168.70.129 spark`,相应的在你的机器上添加正确的信息。
|
|
||||||
|
|
||||||
在自动化部署的过程中,需要下载大量的文件,如果遇到网络问题导致安装不成功,可以配置代理:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 为git配置代理执行:
|
|
||||||
git config --global http.proxy http://ip:port
|
|
||||||
git config --global https.proxy http://ip:port
|
|
||||||
|
|
||||||
# 为系统设置代理,可以在~/.bashrc中添加如下:
|
|
||||||
export http_proxy=http://ip:port
|
|
||||||
export https_proxy=http://ip:port
|
|
||||||
# 使环境变量立即生效
|
|
||||||
source ~/.bashrc
|
|
||||||
|
|
||||||
#注意上面的ip和port替换为自己的代理地址
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### **Spark 自动化部署**
|
|
||||||
|
|
||||||
切换到脚本所在的目录,执行`chmod u+x ./install_spark.sh`,为脚本添加执行权限,然后执行脚本`./install_spark.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Spark deployment success.`的字样,代表执行成功,执行`source ~/.bashrc`和`jps`命令,可以看到如下几个守护进程正在运行:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
如果运行不成功,可以查看本目录的install_spark.log日志文件,可以看到哪一步未成功。
|
|
||||||
|
|
||||||
### HiBench 自动化部署
|
|
||||||
|
|
||||||
切换到脚本所在的目录,执行`chmod u+x ./install_hibench.sh`,为脚本添加执行权限,然后执行脚本`./install_hibench.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Hibench init success`的字样,代表执行成功。
|
|
||||||
|
|
||||||
如果运行不成功,可以查看本目录的install_hibench.log日志文件,可以看到哪一步未成功。
|
|
||||||
|
|
||||||
### 执行Benchmark
|
|
||||||
|
|
||||||
切换到脚本所在的目录,执行`chmod u+x ./benchmark.sh`,为脚本添加执行权限,然后执行脚本`./benchmark.sh`,等待输出结果。
|
|
||||||
|
|
||||||
可以在`HiBench/report/hibench.report`查看基准测试结果,执行`cat HiBench/report/hibench.report`
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 686 B |
Binary file not shown.
|
Before Width: | Height: | Size: 14 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 2.4 KiB |
@@ -1,21 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
source ~/.bashrc
|
|
||||||
|
|
||||||
# benchmark
|
|
||||||
cd HiBench
|
|
||||||
bin/workloads/micro/wordcount/prepare/prepare.sh
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "------------ HiBench prepare success ------------" >>./hibench.log
|
|
||||||
else
|
|
||||||
echo "------------ HiBench prepare failed ------------" >>./hibench.log
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
bin/workloads/micro/wordcount/spark/run.sh
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "------------ HiBench benchmark success ------------" >>./hibench.log
|
|
||||||
else
|
|
||||||
echo "------------ HiBench benchmark failed ------------" >>./hibench.log
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
cat report/hibench.report
|
|
||||||
cd ..
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
||||||
<!--
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. See accompanying LICENSE file.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Put site-specific property overrides in this file. -->
|
|
||||||
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>fs.defaultFS</name>
|
|
||||||
<value>hdfs://localhost:9000</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
# Hadoop home
|
|
||||||
hibench.hadoop.home /home/lionel/Tools/hadoop-3.2.4
|
|
||||||
|
|
||||||
# The path of hadoop executable
|
|
||||||
hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop
|
|
||||||
|
|
||||||
# Hadoop configraution directory
|
|
||||||
hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop
|
|
||||||
|
|
||||||
# The root HDFS path to store HiBench data
|
|
||||||
hibench.hdfs.master hdfs://localhost:9000
|
|
||||||
|
|
||||||
|
|
||||||
# Hadoop release provider. Supported value: apache
|
|
||||||
hibench.hadoop.release apache
|
|
||||||
@@ -1,24 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
||||||
<!--
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. See accompanying LICENSE file.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Put site-specific property overrides in this file. -->
|
|
||||||
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>dfs.replication</name>
|
|
||||||
<value>1</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
@@ -1,28 +0,0 @@
|
|||||||
<?xml version="1.0"?>
|
|
||||||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
|
||||||
<!--
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. See accompanying LICENSE file.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Put site-specific property overrides in this file. -->
|
|
||||||
|
|
||||||
<configuration>
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.framework.name</name>
|
|
||||||
<value>yarn</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.application.classpath</name>
|
|
||||||
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
@@ -1,277 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8"?>
|
|
||||||
|
|
||||||
<!--
|
|
||||||
Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
or more contributor license agreements. See the NOTICE file
|
|
||||||
distributed with this work for additional information
|
|
||||||
regarding copyright ownership. The ASF licenses this file
|
|
||||||
to you under the Apache License, Version 2.0 (the
|
|
||||||
"License"); you may not use this file except in compliance
|
|
||||||
with the License. You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing,
|
|
||||||
software distributed under the License is distributed on an
|
|
||||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
||||||
KIND, either express or implied. See the License for the
|
|
||||||
specific language governing permissions and limitations
|
|
||||||
under the License.
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!--
|
|
||||||
| This is the configuration file for Maven. It can be specified at two levels:
|
|
||||||
|
|
|
||||||
| 1. User Level. This settings.xml file provides configuration for a single user,
|
|
||||||
| and is normally provided in ${user.home}/.m2/settings.xml.
|
|
||||||
|
|
|
||||||
| NOTE: This location can be overridden with the CLI option:
|
|
||||||
|
|
|
||||||
| -s /path/to/user/settings.xml
|
|
||||||
|
|
|
||||||
| 2. Global Level. This settings.xml file provides configuration for all Maven
|
|
||||||
| users on a machine (assuming they're all using the same Maven
|
|
||||||
| installation). It's normally provided in
|
|
||||||
| ${maven.conf}/settings.xml.
|
|
||||||
|
|
|
||||||
| NOTE: This location can be overridden with the CLI option:
|
|
||||||
|
|
|
||||||
| -gs /path/to/global/settings.xml
|
|
||||||
|
|
|
||||||
| The sections in this sample file are intended to give you a running start at
|
|
||||||
| getting the most out of your Maven installation. Where appropriate, the default
|
|
||||||
| values (values used when the setting is not specified) are provided.
|
|
||||||
|
|
|
||||||
|-->
|
|
||||||
<settings xmlns="http://maven.apache.org/SETTINGS/1.2.0"
|
|
||||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.2.0 https://maven.apache.org/xsd/settings-1.2.0.xsd">
|
|
||||||
<!-- localRepository
|
|
||||||
| The path to the local repository maven will use to store artifacts.
|
|
||||||
|
|
|
||||||
| Default: ${user.home}/.m2/repository
|
|
||||||
<localRepository>/path/to/local/repo</localRepository>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- interactiveMode
|
|
||||||
| This will determine whether maven prompts you when it needs input. If set to false,
|
|
||||||
| maven will use a sensible default value, perhaps based on some other setting, for
|
|
||||||
| the parameter in question.
|
|
||||||
|
|
|
||||||
| Default: true
|
|
||||||
<interactiveMode>true</interactiveMode>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- offline
|
|
||||||
| Determines whether maven should attempt to connect to the network when executing a build.
|
|
||||||
| This will have an effect on artifact downloads, artifact deployment, and others.
|
|
||||||
|
|
|
||||||
| Default: false
|
|
||||||
<offline>false</offline>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- pluginGroups
|
|
||||||
| This is a list of additional group identifiers that will be searched when resolving plugins by their prefix, i.e.
|
|
||||||
| when invoking a command line like "mvn prefix:goal". Maven will automatically add the group identifiers
|
|
||||||
| "org.apache.maven.plugins" and "org.codehaus.mojo" if these are not already contained in the list.
|
|
||||||
|-->
|
|
||||||
<pluginGroups>
|
|
||||||
<!-- pluginGroup
|
|
||||||
| Specifies a further group identifier to use for plugin lookup.
|
|
||||||
<pluginGroup>com.your.plugins</pluginGroup>
|
|
||||||
-->
|
|
||||||
</pluginGroups>
|
|
||||||
|
|
||||||
<!-- proxies
|
|
||||||
| This is a list of proxies which can be used on this machine to connect to the network.
|
|
||||||
| Unless otherwise specified (by system property or command-line switch), the first proxy
|
|
||||||
| specification in this list marked as active will be used.
|
|
||||||
|-->
|
|
||||||
<proxies>
|
|
||||||
<!-- proxy
|
|
||||||
| Specification for one proxy, to be used in connecting to the network.
|
|
||||||
|
|
|
||||||
<proxy>
|
|
||||||
<id>optional</id>
|
|
||||||
<active>true</active>
|
|
||||||
<protocol>http</protocol>
|
|
||||||
<username>proxyuser</username>
|
|
||||||
<password>proxypass</password>
|
|
||||||
<host>proxy.host.net</host>
|
|
||||||
<port>80</port>
|
|
||||||
<nonProxyHosts>local.net|some.host.com</nonProxyHosts>
|
|
||||||
</proxy>
|
|
||||||
-->
|
|
||||||
<!-- <proxy>
|
|
||||||
<id>clash</id>
|
|
||||||
<active>true</active>
|
|
||||||
<protocol>http</protocol>
|
|
||||||
<host>127.0.0.1</host>
|
|
||||||
<port>7890</port>
|
|
||||||
</proxy> -->
|
|
||||||
</proxies>
|
|
||||||
|
|
||||||
<!-- servers
|
|
||||||
| This is a list of authentication profiles, keyed by the server-id used within the system.
|
|
||||||
| Authentication profiles can be used whenever maven must make a connection to a remote server.
|
|
||||||
|-->
|
|
||||||
<servers>
|
|
||||||
<!-- server
|
|
||||||
| Specifies the authentication information to use when connecting to a particular server, identified by
|
|
||||||
| a unique name within the system (referred to by the 'id' attribute below).
|
|
||||||
|
|
|
||||||
| NOTE: You should either specify username/password OR privateKey/passphrase, since these pairings are
|
|
||||||
| used together.
|
|
||||||
|
|
|
||||||
<server>
|
|
||||||
<id>deploymentRepo</id>
|
|
||||||
<username>repouser</username>
|
|
||||||
<password>repopwd</password>
|
|
||||||
</server>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!-- Another sample, using keys to authenticate.
|
|
||||||
<server>
|
|
||||||
<id>siteServer</id>
|
|
||||||
<privateKey>/path/to/private/key</privateKey>
|
|
||||||
<passphrase>optional; leave empty if not used.</passphrase>
|
|
||||||
</server>
|
|
||||||
-->
|
|
||||||
</servers>
|
|
||||||
|
|
||||||
<!-- mirrors
|
|
||||||
| This is a list of mirrors to be used in downloading artifacts from remote repositories.
|
|
||||||
|
|
|
||||||
| It works like this: a POM may declare a repository to use in resolving certain artifacts.
|
|
||||||
| However, this repository may have problems with heavy traffic at times, so people have mirrored
|
|
||||||
| it to several places.
|
|
||||||
|
|
|
||||||
| That repository definition will have a unique id, so we can create a mirror reference for that
|
|
||||||
| repository, to be used as an alternate download site. The mirror site will be the preferred
|
|
||||||
| server for that repository.
|
|
||||||
|-->
|
|
||||||
<mirrors>
|
|
||||||
<!-- mirror
|
|
||||||
| Specifies a repository mirror site to use instead of a given repository. The repository that
|
|
||||||
| this mirror serves has an ID that matches the mirrorOf element of this mirror. IDs are used
|
|
||||||
| for inheritance and direct lookup purposes, and must be unique across the set of mirrors.
|
|
||||||
|
|
|
||||||
<mirror>
|
|
||||||
<id>mirrorId</id>
|
|
||||||
<mirrorOf>repositoryId</mirrorOf>
|
|
||||||
<name>Human Readable Name for this Mirror.</name>
|
|
||||||
<url>http://my.repository.com/repo/path</url>
|
|
||||||
</mirror>
|
|
||||||
-->
|
|
||||||
<!-- <mirror>
|
|
||||||
<id>maven-default-http-blocker</id>
|
|
||||||
<mirrorOf>external:http:*</mirrorOf>
|
|
||||||
<name>Pseudo repository to mirror external repositories initially using HTTP.</name>
|
|
||||||
<url>http://0.0.0.0/</url>
|
|
||||||
<blocked>true</blocked>
|
|
||||||
</mirror> -->
|
|
||||||
<mirror>
|
|
||||||
<id>aliyunmaven</id>
|
|
||||||
<mirrorOf>*</mirrorOf>
|
|
||||||
<name>阿里云公共仓库</name>
|
|
||||||
<url>https://maven.aliyun.com/repository/public</url>
|
|
||||||
</mirror>
|
|
||||||
</mirrors>
|
|
||||||
|
|
||||||
<!-- profiles
|
|
||||||
| This is a list of profiles which can be activated in a variety of ways, and which can modify
|
|
||||||
| the build process. Profiles provided in the settings.xml are intended to provide local machine-
|
|
||||||
| specific paths and repository locations which allow the build to work in the local environment.
|
|
||||||
|
|
|
||||||
| For example, if you have an integration testing plugin - like cactus - that needs to know where
|
|
||||||
| your Tomcat instance is installed, you can provide a variable here such that the variable is
|
|
||||||
| dereferenced during the build process to configure the cactus plugin.
|
|
||||||
|
|
|
||||||
| As noted above, profiles can be activated in a variety of ways. One way - the activeProfiles
|
|
||||||
| section of this document (settings.xml) - will be discussed later. Another way essentially
|
|
||||||
| relies on the detection of a system property, either matching a particular value for the property,
|
|
||||||
| or merely testing its existence. Profiles can also be activated by JDK version prefix, where a
|
|
||||||
| value of '1.4' might activate a profile when the build is executed on a JDK version of '1.4.2_07'.
|
|
||||||
| Finally, the list of active profiles can be specified directly from the command line.
|
|
||||||
|
|
|
||||||
| NOTE: For profiles defined in the settings.xml, you are restricted to specifying only artifact
|
|
||||||
| repositories, plugin repositories, and free-form properties to be used as configuration
|
|
||||||
| variables for plugins in the POM.
|
|
||||||
|
|
|
||||||
|-->
|
|
||||||
<profiles>
|
|
||||||
<!-- profile
|
|
||||||
| Specifies a set of introductions to the build process, to be activated using one or more of the
|
|
||||||
| mechanisms described above. For inheritance purposes, and to activate profiles via <activatedProfiles/>
|
|
||||||
| or the command line, profiles have to have an ID that is unique.
|
|
||||||
|
|
|
||||||
| An encouraged best practice for profile identification is to use a consistent naming convention
|
|
||||||
| for profiles, such as 'env-dev', 'env-test', 'env-production', 'user-jdcasey', 'user-brett', etc.
|
|
||||||
| This will make it more intuitive to understand what the set of introduced profiles is attempting
|
|
||||||
| to accomplish, particularly when you only have a list of profile id's for debug.
|
|
||||||
|
|
|
||||||
| This profile example uses the JDK version to trigger activation, and provides a JDK-specific repo.
|
|
||||||
<profile>
|
|
||||||
<id>jdk-1.4</id>
|
|
||||||
|
|
||||||
<activation>
|
|
||||||
<jdk>1.4</jdk>
|
|
||||||
</activation>
|
|
||||||
|
|
||||||
<repositories>
|
|
||||||
<repository>
|
|
||||||
<id>jdk14</id>
|
|
||||||
<name>Repository for JDK 1.4 builds</name>
|
|
||||||
<url>http://www.myhost.com/maven/jdk14</url>
|
|
||||||
<layout>default</layout>
|
|
||||||
<snapshotPolicy>always</snapshotPolicy>
|
|
||||||
</repository>
|
|
||||||
</repositories>
|
|
||||||
</profile>
|
|
||||||
-->
|
|
||||||
|
|
||||||
<!--
|
|
||||||
| Here is another profile, activated by the system property 'target-env' with a value of 'dev',
|
|
||||||
| which provides a specific path to the Tomcat instance. To use this, your plugin configuration
|
|
||||||
| might hypothetically look like:
|
|
||||||
|
|
|
||||||
| ...
|
|
||||||
| <plugin>
|
|
||||||
| <groupId>org.myco.myplugins</groupId>
|
|
||||||
| <artifactId>myplugin</artifactId>
|
|
||||||
|
|
|
||||||
| <configuration>
|
|
||||||
| <tomcatLocation>${tomcatPath}</tomcatLocation>
|
|
||||||
| </configuration>
|
|
||||||
| </plugin>
|
|
||||||
| ...
|
|
||||||
|
|
|
||||||
| NOTE: If you just wanted to inject this configuration whenever someone set 'target-env' to
|
|
||||||
| anything, you could just leave off the <value/> inside the activation-property.
|
|
||||||
|
|
|
||||||
<profile>
|
|
||||||
<id>env-dev</id>
|
|
||||||
|
|
||||||
<activation>
|
|
||||||
<property>
|
|
||||||
<name>target-env</name>
|
|
||||||
<value>dev</value>
|
|
||||||
</property>
|
|
||||||
</activation>
|
|
||||||
|
|
||||||
<properties>
|
|
||||||
<tomcatPath>/path/to/tomcat/instance</tomcatPath>
|
|
||||||
</properties>
|
|
||||||
</profile>
|
|
||||||
-->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- activeProfiles
|
|
||||||
| List of profiles that are active for all builds.
|
|
||||||
|
|
|
||||||
<activeProfiles>
|
|
||||||
<activeProfile>alwaysActiveProfile</activeProfile>
|
|
||||||
<activeProfile>anotherAlwaysActiveProfile</activeProfile>
|
|
||||||
</activeProfiles>
|
|
||||||
-->
|
|
||||||
</profiles>
|
|
||||||
</settings>
|
|
||||||
@@ -1,45 +0,0 @@
|
|||||||
# Spark home
|
|
||||||
hibench.spark.home /home/lionel/Tools/spark-3.1.3-bin-hadoop3.2
|
|
||||||
|
|
||||||
# Spark master
|
|
||||||
# standalone mode: spark://xxx:7077
|
|
||||||
# YARN mode: yarn-client
|
|
||||||
hibench.spark.master spark://localhost:7077
|
|
||||||
|
|
||||||
# executor number and cores when running on Yarn
|
|
||||||
hibench.yarn.executor.num 2
|
|
||||||
hibench.yarn.executor.cores 4
|
|
||||||
|
|
||||||
# executor and driver memory in standalone & YARN mode
|
|
||||||
spark.executor.memory 4g
|
|
||||||
spark.driver.memory 4g
|
|
||||||
|
|
||||||
# set spark parallelism property according to hibench's parallelism value
|
|
||||||
spark.default.parallelism ${hibench.default.map.parallelism}
|
|
||||||
|
|
||||||
# set spark sql's default shuffle partitions according to hibench's parallelism value
|
|
||||||
spark.sql.shuffle.partitions ${hibench.default.shuffle.parallelism}
|
|
||||||
|
|
||||||
#======================================================
|
|
||||||
# Spark Streaming
|
|
||||||
#======================================================
|
|
||||||
# Spark streaming Batchnterval in millisecond (default 100)
|
|
||||||
hibench.streambench.spark.batchInterval 100
|
|
||||||
|
|
||||||
# Number of nodes that will receive kafka input (default: 4)
|
|
||||||
hibench.streambench.spark.receiverNumber 4
|
|
||||||
|
|
||||||
# Indicate RDD storage level. (default: 2)
|
|
||||||
# 0 = StorageLevel.MEMORY_ONLY
|
|
||||||
# 1 = StorageLevel.MEMORY_AND_DISK_SER
|
|
||||||
# other = StorageLevel.MEMORY_AND_DISK_SER_2
|
|
||||||
hibench.streambench.spark.storageLevel 2
|
|
||||||
|
|
||||||
# indicate whether to test the write ahead log new feature (default: false)
|
|
||||||
hibench.streambench.spark.enableWAL false
|
|
||||||
|
|
||||||
# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp)
|
|
||||||
hibench.streambench.spark.checkpointPath /var/tmp
|
|
||||||
|
|
||||||
# whether to use direct approach or not (dafault: true)
|
|
||||||
hibench.streambench.spark.useDirectMode true
|
|
||||||
@@ -1,26 +0,0 @@
|
|||||||
<?xml version="1.0"?>
|
|
||||||
<!--
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License. See accompanying LICENSE file.
|
|
||||||
-->
|
|
||||||
<configuration>
|
|
||||||
|
|
||||||
<!-- Site specific YARN configuration properties -->
|
|
||||||
<property>
|
|
||||||
<name>yarn.nodemanager.aux-services</name>
|
|
||||||
<value>mapreduce_shuffle</value>
|
|
||||||
</property>
|
|
||||||
<property>
|
|
||||||
<name>yarn.nodemanager.env-whitelist</name>
|
|
||||||
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME</value>
|
|
||||||
</property>
|
|
||||||
</configuration>
|
|
||||||
143
examples/tuning/spark/spark_auto_deployment_and_tuning/README.md
Normal file
143
examples/tuning/spark/spark_auto_deployment_and_tuning/README.md
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
## Spark 自动化部署和HiBench基准性能测试
|
||||||
|
|
||||||
|
### **工作流程**
|
||||||
|
|
||||||
|
一、**Spark 自动化部署流程**
|
||||||
|
|
||||||
|
1. 安装 gcc make curl wget samba git
|
||||||
|
2. 关闭防火墙并启动nmbd.service
|
||||||
|
3. 配置本机免密登录
|
||||||
|
4. 安装Java和配置Java 环境
|
||||||
|
5. 安装Hadoop并配置Hadoop环境,格式化namenode并启动hdfs和yarn
|
||||||
|
7. 安装Spark并配置Spark环境,启动Spark master和worker守护进程
|
||||||
|
|
||||||
|
二、**HiBench 自动化部署流程**
|
||||||
|
|
||||||
|
1. 安装python2
|
||||||
|
2. 安装Maven并配置Maven环境变量和设置maven仓库国内镜像
|
||||||
|
3. 为Spark Benchmark下载并编译、配置HiBench
|
||||||
|
|
||||||
|
三、**执行 Benchmark**
|
||||||
|
|
||||||
|
1. 准备工作
|
||||||
|
2. 执行测试
|
||||||
|
|
||||||
|
## 开始操作
|
||||||
|
|
||||||
|
**前提条件**
|
||||||
|
|
||||||
|
- 将本目录及所有文件和子目录放到您的主机所在的目录,用`ip addr`查看本机ip,`hostname`命令查看本机名称,然后在`/etc/hosts`中添加`ip hostname`,例如:`192.168.70.129 spark`
|
||||||
|
|
||||||
|
- 关闭防火墙:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl stop firewalld
|
||||||
|
```
|
||||||
|
|
||||||
|
- 执行系统更新,并安装必要依赖
|
||||||
|
|
||||||
|
```bash
|
||||||
|
dnf update -y
|
||||||
|
dnf install gcc make curl wget samba git atune atune-engine -y
|
||||||
|
```
|
||||||
|
|
||||||
|
- 启动服务:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
systemctl start nmb
|
||||||
|
systemctl start atuned
|
||||||
|
systemctl start atune-engine
|
||||||
|
```
|
||||||
|
|
||||||
|
**PS**: atuned和atune-engine可能启动不成功,需要将`/etc/atuned/atuned.cnf`中的`rest_tls`和`engine_tls`置为false,并把`network`置为自己的网络;`/etc/atuned/engine.cnf`中的`engine_tls`置为false。
|
||||||
|
|
||||||
|
- 配置免密登录
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh-keygen -t rsa
|
||||||
|
cat ~/.ssh/id_rsa.pub >>~/.ssh/authorized_keys
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
在自动化部署的过程中,需要下载大量的文件,如果遇到网络问题导致安装不成功,可以配置代理:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 为git配置代理执行:
|
||||||
|
git config --global http.proxy http://ip:port
|
||||||
|
git config --global https.proxy http://ip:port
|
||||||
|
|
||||||
|
# 为系统设置代理,可以在~/.bashrc中添加如下:
|
||||||
|
export http_proxy=http://ip:port
|
||||||
|
export https_proxy=http://ip:port
|
||||||
|
# 使环境变量立即生效
|
||||||
|
source ~/.bashrc
|
||||||
|
|
||||||
|
#注意上面的ip和port替换为自己的代理地址
|
||||||
|
```
|
||||||
|
|
||||||
|
### **Spark 自动化部署**
|
||||||
|
|
||||||
|
切换到脚本所在的目录,执行`chmod u+x ./install_spark.sh`,为脚本添加执行权限,然后执行脚本`./install_spark.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Spark deployment success.`的字样,代表执行成功,执行`source ~/.bashrc`和`jps`命令,查看正在运行的守护进程为:`NameNode、NodeManager、SecondaryNameNode、ResourceManager、DataNode、Master、Worker`
|
||||||
|
|
||||||
|
如果运行不成功,可以查看本目录的install_spark.log日志文件,可以看到哪一步未成功。
|
||||||
|
|
||||||
|
### HiBench 自动化部署
|
||||||
|
|
||||||
|
切换到脚本所在的目录,执行`chmod u+x ./install_hibench.sh`,为脚本添加执行权限,然后执行脚本`./install_hibench.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Hibench init success`的字样,代表执行成功。
|
||||||
|
|
||||||
|
如果运行不成功,可以查看本目录的install_hibench.log日志文件,可以看到哪一步未成功。
|
||||||
|
|
||||||
|
### 执行基本Benchmark
|
||||||
|
|
||||||
|
切换到脚本所在的目录:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
sh HiBench/bin/workloads/sql/join/prepare/prepare.sh
|
||||||
|
sh HiBench/bin/workloads/sql/join/spark/run.sh
|
||||||
|
# 结果
|
||||||
|
cat HiBench/report/hibench.log
|
||||||
|
```
|
||||||
|
|
||||||
|
## A-tune HiBench性能调优
|
||||||
|
|
||||||
|
**实例机器具体参数**
|
||||||
|
|
||||||
|
- 虚拟化:Vmware Workstation 17
|
||||||
|
- 操作系统:Openeuler 22.03 SP1
|
||||||
|
- CPU:AMD Ryzen 7 4800H with Radeon Graphics (虚拟机 2CPU 4Core)
|
||||||
|
- Memery:8G
|
||||||
|
- Disk:128G
|
||||||
|
|
||||||
|
**Spark调优参数:**
|
||||||
|
|
||||||
|
- num_executors:执行器数量 (2~4)
|
||||||
|
- executor_core:每个执行器核心数 (2~4)
|
||||||
|
- executor_memory:执行器内存 (1g~4g)
|
||||||
|
- driver_memory:driver内存 (1g-2g)
|
||||||
|
- default_parallelism:默认并行度 (10~50)
|
||||||
|
- storageLevel:rdd默认存储级别(0~2)
|
||||||
|
- shuffle_partition:shuffle分区个数(1~4)
|
||||||
|
|
||||||
|
**HDFS数据规模调整为huge(参见HiBench调整测试数据规模)**
|
||||||
|
|
||||||
|
### 开始测试
|
||||||
|
|
||||||
|
首先,生成测试数据:`sh HiBench/bin/workloads/sql/join/prepare/prepare.sh`
|
||||||
|
|
||||||
|
将`spark_hibench_server.yaml`拷贝到`/etc/atuned/tuning`下:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp spark_hibench_server.yaml /etc/atuned/tuning
|
||||||
|
# 注意要修改spark_hibench_server.yaml 中设置的所有get和set的路径应该指向spark_hibench.sh的所在位置
|
||||||
|
```
|
||||||
|
|
||||||
|
开始执行性能调优:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
atune-adm tuning --project spark_hibench --detail ./spark_hibench_client.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
**Notice:实例测试结果保存在本目录下atune_spark_bench.log文件**
|
||||||
@@ -0,0 +1,211 @@
|
|||||||
|
[root@spark sparkbench]# atune-adm tuning --project spark_hibench --detail ./spark_hibench_client.yaml
|
||||||
|
Start to benchmark baseline...
|
||||||
|
1.Loading its corresponding tuning project: spark_hibench
|
||||||
|
2.Start to tuning the system......
|
||||||
|
Current Tuning Progress......(1/50)
|
||||||
|
Used time: 1m38s, Total Time: 1m38s, Best Performance: (duration=48.58,throughput=39149753.00), Performance Improvement Rate: 0.06%
|
||||||
|
The 1th recommand parameters is: num_executors=3,executor_core=3,executor_memory=1,driver_memory=1,default_parallelism=13,storageLevel=0,shuffle_partition=2
|
||||||
|
The 1th evaluation value: (duration=48.58,throughput=39149753.00)(0.06%)
|
||||||
|
Current Tuning Progress......(2/50)
|
||||||
|
Used time: 2m23s, Total Time: 2m23s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 2th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=33,storageLevel=0,shuffle_partition=4
|
||||||
|
The 2th evaluation value: (duration=44.55,throughput=42695879.00)(9.11%)
|
||||||
|
Current Tuning Progress......(3/50)
|
||||||
|
Used time: 3m10s, Total Time: 3m10s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 3th recommand parameters is: num_executors=2,executor_core=3,executor_memory=2,driver_memory=2,default_parallelism=13,storageLevel=0,shuffle_partition=3
|
||||||
|
The 3th evaluation value: (duration=47.06,throughput=40417640.00)(3.29%)
|
||||||
|
Current Tuning Progress......(4/50)
|
||||||
|
Used time: 3m56s, Total Time: 3m56s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 4th recommand parameters is: num_executors=4,executor_core=3,executor_memory=3,driver_memory=2,default_parallelism=38,storageLevel=0,shuffle_partition=1
|
||||||
|
The 4th evaluation value: (duration=45.69,throughput=41629603.00)(6.39%)
|
||||||
|
Current Tuning Progress......(5/50)
|
||||||
|
Used time: 4m50s, Total Time: 4m50s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 5th recommand parameters is: num_executors=2,executor_core=4,executor_memory=1,driver_memory=1,default_parallelism=39,storageLevel=1,shuffle_partition=3
|
||||||
|
The 5th evaluation value: (duration=53.73,throughput=35400039.00)(-10.53%)
|
||||||
|
Current Tuning Progress......(6/50)
|
||||||
|
Used time: 5m40s, Total Time: 5m40s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 6th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=35,storageLevel=1,shuffle_partition=3
|
||||||
|
The 6th evaluation value: (duration=49.34,throughput=38548304.00)(-1.50%)
|
||||||
|
Current Tuning Progress......(7/50)
|
||||||
|
Used time: 6m27s, Total Time: 6m27s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 7th recommand parameters is: num_executors=3,executor_core=4,executor_memory=1,driver_memory=1,default_parallelism=38,storageLevel=0,shuffle_partition=2
|
||||||
|
The 7th evaluation value: (duration=47.10,throughput=40383314.00)(3.21%)
|
||||||
|
Current Tuning Progress......(8/50)
|
||||||
|
Used time: 7m12s, Total Time: 7m12s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 8th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 8th evaluation value: (duration=45.06,throughput=42213541.00)(7.88%)
|
||||||
|
Current Tuning Progress......(9/50)
|
||||||
|
Used time: 8m0s, Total Time: 8m0s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 9th recommand parameters is: num_executors=3,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=12,storageLevel=0,shuffle_partition=3
|
||||||
|
The 9th evaluation value: (duration=46.89,throughput=40560721.00)(3.67%)
|
||||||
|
Current Tuning Progress......(10/50)
|
||||||
|
Used time: 8m50s, Total Time: 8m50s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 10th recommand parameters is: num_executors=3,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=30,storageLevel=1,shuffle_partition=3
|
||||||
|
The 10th evaluation value: (duration=50.58,throughput=37604012.00)(-4.05%)
|
||||||
|
Current Tuning Progress......(11/50)
|
||||||
|
Used time: 9m41s, Total Time: 9m41s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 11th recommand parameters is: num_executors=2,executor_core=4,executor_memory=4,driver_memory=1,default_parallelism=40,storageLevel=0,shuffle_partition=1
|
||||||
|
The 11th evaluation value: (duration=49.54,throughput=38390354.00)(-1.92%)
|
||||||
|
Current Tuning Progress......(12/50)
|
||||||
|
Used time: 10m32s, Total Time: 10m32s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 12th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=33,storageLevel=0,shuffle_partition=4
|
||||||
|
The 12th evaluation value: (duration=49.37,throughput=38528782.00)(-1.56%)
|
||||||
|
Current Tuning Progress......(13/50)
|
||||||
|
Used time: 11m23s, Total Time: 11m23s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 13th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=33,storageLevel=0,shuffle_partition=4
|
||||||
|
The 13th evaluation value: (duration=50.74,throughput=37484693.00)(-4.38%)
|
||||||
|
Current Tuning Progress......(14/50)
|
||||||
|
Used time: 12m13s, Total Time: 12m13s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 14th recommand parameters is: num_executors=2,executor_core=2,executor_memory=4,driver_memory=1,default_parallelism=14,storageLevel=0,shuffle_partition=1
|
||||||
|
The 14th evaluation value: (duration=48.31,throughput=39370993.00)(0.62%)
|
||||||
|
Current Tuning Progress......(15/50)
|
||||||
|
Used time: 13m5s, Total Time: 13m5s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 15th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=37,storageLevel=0,shuffle_partition=1
|
||||||
|
The 15th evaluation value: (duration=51.56,throughput=36887828.00)(-6.07%)
|
||||||
|
Current Tuning Progress......(16/50)
|
||||||
|
Used time: 13m54s, Total Time: 13m54s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 16th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=17,storageLevel=1,shuffle_partition=2
|
||||||
|
The 16th evaluation value: (duration=48.44,throughput=39264519.00)(0.35%)
|
||||||
|
Current Tuning Progress......(17/50)
|
||||||
|
Used time: 14m44s, Total Time: 14m44s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 17th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=3
|
||||||
|
The 17th evaluation value: (duration=47.74,throughput=39836907.00)(1.82%)
|
||||||
|
Current Tuning Progress......(18/50)
|
||||||
|
Used time: 15m32s, Total Time: 15m32s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 18th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=2,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 18th evaluation value: (duration=47.72,throughput=39861119.00)(1.87%)
|
||||||
|
Current Tuning Progress......(19/50)
|
||||||
|
Used time: 16m22s, Total Time: 16m22s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 19th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=17,storageLevel=0,shuffle_partition=1
|
||||||
|
The 19th evaluation value: (duration=48.78,throughput=38990843.00)(-0.35%)
|
||||||
|
Current Tuning Progress......(20/50)
|
||||||
|
Used time: 17m11s, Total Time: 17m11s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 20th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=4
|
||||||
|
The 20th evaluation value: (duration=48.30,throughput=39382406.00)(0.64%)
|
||||||
|
Current Tuning Progress......(21/50)
|
||||||
|
Used time: 17m58s, Total Time: 17m58s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 21th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 21th evaluation value: (duration=46.49,throughput=40913210.00)(4.56%)
|
||||||
|
Current Tuning Progress......(22/50)
|
||||||
|
Used time: 18m49s, Total Time: 18m49s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 22th recommand parameters is: num_executors=4,executor_core=3,executor_memory=3,driver_memory=2,default_parallelism=38,storageLevel=0,shuffle_partition=1
|
||||||
|
The 22th evaluation value: (duration=49.63,throughput=38324601.00)(-2.10%)
|
||||||
|
Current Tuning Progress......(23/50)
|
||||||
|
Used time: 19m34s, Total Time: 19m34s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 23th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2
|
||||||
|
The 23th evaluation value: (duration=44.78,throughput=42473723.00)(8.55%)
|
||||||
|
Current Tuning Progress......(24/50)
|
||||||
|
Used time: 20m21s, Total Time: 20m21s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 24th recommand parameters is: num_executors=2,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2
|
||||||
|
The 24th evaluation value: (duration=45.56,throughput=41747477.00)(6.69%)
|
||||||
|
Current Tuning Progress......(25/50)
|
||||||
|
Used time: 21m8s, Total Time: 21m8s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 25th recommand parameters is: num_executors=4,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2
|
||||||
|
The 25th evaluation value: (duration=46.73,throughput=40697850.00)(4.02%)
|
||||||
|
Current Tuning Progress......(26/50)
|
||||||
|
Used time: 21m54s, Total Time: 21m54s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 26th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 26th evaluation value: (duration=44.84,throughput=42415943.00)(8.41%)
|
||||||
|
Current Tuning Progress......(27/50)
|
||||||
|
Used time: 22m40s, Total Time: 22m40s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11%
|
||||||
|
The 27th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2
|
||||||
|
The 27th evaluation value: (duration=44.76,throughput=42496499.00)(8.60%)
|
||||||
|
Current Tuning Progress......(28/50)
|
||||||
|
Used time: 23m25s, Total Time: 23m25s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 28th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3
|
||||||
|
The 28th evaluation value: (duration=44.38,throughput=42861370.00)(9.53%)
|
||||||
|
Current Tuning Progress......(29/50)
|
||||||
|
Used time: 24m11s, Total Time: 24m11s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 29th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3
|
||||||
|
The 29th evaluation value: (duration=45.40,throughput=41891840.00)(7.07%)
|
||||||
|
Current Tuning Progress......(30/50)
|
||||||
|
Used time: 25m8s, Total Time: 25m8s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 30th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2
|
||||||
|
The 30th evaluation value: (duration=56.03,throughput=33942595.00)(-15.27%)
|
||||||
|
Current Tuning Progress......(31/50)
|
||||||
|
Used time: 26m2s, Total Time: 26m2s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 31th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=13,storageLevel=0,shuffle_partition=1
|
||||||
|
The 31th evaluation value: (duration=53.17,throughput=35774241.00)(-9.38%)
|
||||||
|
Current Tuning Progress......(32/50)
|
||||||
|
Used time: 26m56s, Total Time: 26m56s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 32th recommand parameters is: num_executors=2,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2
|
||||||
|
The 32th evaluation value: (duration=52.67,throughput=36111132.00)(-8.35%)
|
||||||
|
Current Tuning Progress......(33/50)
|
||||||
|
Used time: 27m49s, Total Time: 27m49s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 33th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3
|
||||||
|
The 33th evaluation value: (duration=50.66,throughput=37540182.00)(-4.22%)
|
||||||
|
Current Tuning Progress......(34/50)
|
||||||
|
Used time: 28m39s, Total Time: 28m39s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 34th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3
|
||||||
|
The 34th evaluation value: (duration=49.44,throughput=38474225.00)(-1.71%)
|
||||||
|
Current Tuning Progress......(35/50)
|
||||||
|
Used time: 29m26s, Total Time: 29m26s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53%
|
||||||
|
The 35th recommand parameters is: num_executors=3,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=12,storageLevel=1,shuffle_partition=3
|
||||||
|
The 35th evaluation value: (duration=45.28,throughput=42003783.00)(7.35%)
|
||||||
|
Current Tuning Progress......(36/50)
|
||||||
|
Used time: 30m8s, Total Time: 30m8s, Best Performance: (duration=41.91,throughput=45385575.00), Performance Improvement Rate: 15.99%
|
||||||
|
The 36th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 36th evaluation value: (duration=41.91,throughput=45385575.00)(15.99%)
|
||||||
|
Current Tuning Progress......(37/50)
|
||||||
|
Used time: 30m51s, Total Time: 30m51s, Best Performance: (duration=41.81,throughput=45494135.00), Performance Improvement Rate: 16.26%
|
||||||
|
The 37th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 37th evaluation value: (duration=41.81,throughput=45494135.00)(16.26%)
|
||||||
|
Current Tuning Progress......(38/50)
|
||||||
|
Used time: 31m33s, Total Time: 31m33s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56%
|
||||||
|
The 38th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1
|
||||||
|
The 38th evaluation value: (duration=41.35,throughput=45999161.00)(17.56%)
|
||||||
|
Current Tuning Progress......(39/50)
|
||||||
|
Used time: 32m15s, Total Time: 32m15s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56%
|
||||||
|
The 39th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=19,storageLevel=0,shuffle_partition=1
|
||||||
|
The 39th evaluation value: (duration=41.48,throughput=45858308.00)(17.19%)
|
||||||
|
Current Tuning Progress......(40/50)
|
||||||
|
Used time: 32m58s, Total Time: 32m58s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56%
|
||||||
|
The 40th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=40,storageLevel=0,shuffle_partition=1
|
||||||
|
The 40th evaluation value: (duration=41.81,throughput=45486519.00)(16.26%)
|
||||||
|
Current Tuning Progress......(41/50)
|
||||||
|
Used time: 33m41s, Total Time: 33m41s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56%
|
||||||
|
The 41th recommand parameters is: num_executors=4,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=4
|
||||||
|
The 41th evaluation value: (duration=41.36,throughput=45983592.00)(17.53%)
|
||||||
|
Current Tuning Progress......(42/50)
|
||||||
|
Used time: 34m23s, Total Time: 34m23s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56%
|
||||||
|
The 42th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=18,storageLevel=0,shuffle_partition=3
|
||||||
|
The 42th evaluation value: (duration=41.52,throughput=45808606.00)(17.08%)
|
||||||
|
Current Tuning Progress......(43/50)
|
||||||
|
Used time: 35m5s, Total Time: 35m5s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 43th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=20,storageLevel=0,shuffle_partition=2
|
||||||
|
The 43th evaluation value: (duration=41.33,throughput=46014741.00)(17.61%)
|
||||||
|
Current Tuning Progress......(44/50)
|
||||||
|
Used time: 35m48s, Total Time: 35m48s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 44th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=30,storageLevel=0,shuffle_partition=2
|
||||||
|
The 44th evaluation value: (duration=41.53,throughput=45800884.00)(17.05%)
|
||||||
|
Current Tuning Progress......(45/50)
|
||||||
|
Used time: 36m30s, Total Time: 36m30s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 45th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=2,default_parallelism=14,storageLevel=1,shuffle_partition=4
|
||||||
|
The 45th evaluation value: (duration=41.44,throughput=45902578.00)(17.30%)
|
||||||
|
Current Tuning Progress......(46/50)
|
||||||
|
Used time: 37m13s, Total Time: 37m13s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 46th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=20,storageLevel=1,shuffle_partition=2
|
||||||
|
The 46th evaluation value: (duration=41.77,throughput=45533344.00)(16.37%)
|
||||||
|
Current Tuning Progress......(47/50)
|
||||||
|
Used time: 37m57s, Total Time: 37m57s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 47th recommand parameters is: num_executors=4,executor_core=3,executor_memory=1,driver_memory=2,default_parallelism=39,storageLevel=1,shuffle_partition=2
|
||||||
|
The 47th evaluation value: (duration=43.16,throughput=44067964.00)(12.63%)
|
||||||
|
Current Tuning Progress......(48/50)
|
||||||
|
Used time: 38m42s, Total Time: 38m42s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 48th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=3
|
||||||
|
The 48th evaluation value: (duration=43.91,throughput=43312306.00)(10.70%)
|
||||||
|
Current Tuning Progress......(49/50)
|
||||||
|
Used time: 39m31s, Total Time: 39m31s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 49th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=19,storageLevel=1,shuffle_partition=2
|
||||||
|
The 49th evaluation value: (duration=47.78,throughput=39807725.00)(1.74%)
|
||||||
|
Current Tuning Progress......(50/50)
|
||||||
|
Used time: 40m16s, Total Time: 40m16s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61%
|
||||||
|
The 50th recommand parameters is: num_executors=4,executor_core=4,executor_memory=2,driver_memory=1,default_parallelism=17,storageLevel=1,shuffle_partition=1
|
||||||
|
The 50th evaluation value: (duration=43.84,throughput=43388387.00)(10.88%)
|
||||||
|
|
||||||
|
The final optimization result is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=20,storageLevel=0,shuffle_partition=2
|
||||||
|
The final evaluation value is: duration=41.33,throughput=46014741.00
|
||||||
|
|
||||||
|
Baseline Performance is: (duration=48.61,throughput=39128812.00)
|
||||||
|
|
||||||
|
Tuning Finished
|
||||||
@@ -12,6 +12,7 @@ if ! command -v python2 &>/dev/null; then
|
|||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
tar -xf ./Python-2.7.18.tgz
|
tar -xf ./Python-2.7.18.tgz
|
||||||
|
rm -f ./Python-2.7.18.tgz
|
||||||
# install python-2.7
|
# install python-2.7
|
||||||
echo "installing python-2.7..."
|
echo "installing python-2.7..."
|
||||||
td=$(pwd)
|
td=$(pwd)
|
||||||
@@ -19,7 +20,7 @@ if ! command -v python2 &>/dev/null; then
|
|||||||
./configure --prefix=$td/python-2.7
|
./configure --prefix=$td/python-2.7
|
||||||
make
|
make
|
||||||
make install
|
make install
|
||||||
sudo ln -s $td/python-2.7/bin/python2.7 /usr/bin/python2
|
ln -s $td/python-2.7/bin/python2.7 /usr/bin/python2
|
||||||
cd ..
|
cd ..
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@@ -33,6 +34,7 @@ else
|
|||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
tar -xf ./apache-maven-3.8.8-bin.tar.gz
|
tar -xf ./apache-maven-3.8.8-bin.tar.gz
|
||||||
|
rm -f ./apache-maven-3.8.8-bin.tar.gz
|
||||||
export MAVEN_HOME=$(pwd)/apache-maven-3.8.8
|
export MAVEN_HOME=$(pwd)/apache-maven-3.8.8
|
||||||
if ! grep -q "export MAVEN_HOME=$(pwd)/apache-maven-3.8.8" ~/.bashrc; then
|
if ! grep -q "export MAVEN_HOME=$(pwd)/apache-maven-3.8.8" ~/.bashrc; then
|
||||||
echo "export MAVEN_HOME=$(pwd)/apache-maven-3.8.8" >>~/.bashrc
|
echo "export MAVEN_HOME=$(pwd)/apache-maven-3.8.8" >>~/.bashrc
|
||||||
@@ -42,7 +44,20 @@ source ~/.bashrc
|
|||||||
|
|
||||||
# maven aliyun mirror
|
# maven aliyun mirror
|
||||||
mkdir ~/.m2
|
mkdir ~/.m2
|
||||||
cp ./conf/settings.xml ~/.m2
|
cat >~/.m2/settings.xml <<EOF
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<settings xmlns="http://maven.apache.org/SETTINGS/1.2.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.2.0 https://maven.apache.org/xsd/settings-1.2.0.xsd">
|
||||||
|
<mirrors>
|
||||||
|
<mirror>
|
||||||
|
<id>aliyunmaven</id>
|
||||||
|
<mirrorOf>*</mirrorOf>
|
||||||
|
<name>阿里云公共仓库</name>
|
||||||
|
<url>https://maven.aliyun.com/repository/public</url>
|
||||||
|
</mirror>
|
||||||
|
</mirrors>
|
||||||
|
</settings>
|
||||||
|
EOF
|
||||||
|
|
||||||
# install HiBench
|
# install HiBench
|
||||||
echo "installing HiBench..."
|
echo "installing HiBench..."
|
||||||
@@ -72,6 +87,8 @@ cp conf/hadoop.conf.template conf/hadoop.conf
|
|||||||
sed -i "2c hibench.hadoop.home $HADOOP_HOME" conf/hadoop.conf
|
sed -i "2c hibench.hadoop.home $HADOOP_HOME" conf/hadoop.conf
|
||||||
sed -i "11c hibench.hdfs.master hdfs://localhost:9000" conf/hadoop.conf
|
sed -i "11c hibench.hdfs.master hdfs://localhost:9000" conf/hadoop.conf
|
||||||
|
|
||||||
|
sed -i "s|hibench.scale.profile.*|hibench.scale.profile\thuge|g" conf/hibench.conf
|
||||||
|
|
||||||
cp conf/spark.conf.template conf/spark.conf
|
cp conf/spark.conf.template conf/spark.conf
|
||||||
sed -i "2c hibench.spark.home $SPARK_HOME" conf/spark.conf
|
sed -i "2c hibench.spark.home $SPARK_HOME" conf/spark.conf
|
||||||
sed -i "7c hibench.spark.master spark://localhost:7077" conf/spark.conf
|
sed -i "7c hibench.spark.master spark://localhost:7077" conf/spark.conf
|
||||||
@@ -1,29 +1,9 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
# install dependencies
|
|
||||||
echo "install dependencies..."
|
|
||||||
sudo dnf install gcc make curl wget samba git -y
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "------------ dependencies install success ------------" >>./install_spark.log
|
|
||||||
else
|
|
||||||
echo "------------ dependencies install failed ------------" >>./install_spark.log
|
|
||||||
exit
|
|
||||||
fi
|
|
||||||
|
|
||||||
# stop firewalld
|
|
||||||
sudo systemctl disable --now firewalld
|
|
||||||
|
|
||||||
# start samba
|
|
||||||
sudo systemctl enable --now nmbd
|
|
||||||
|
|
||||||
### ssh password-free login
|
|
||||||
ssh-keygen -t rsa
|
|
||||||
cat ~/.ssh/id_rsa.pub >>~/.ssh/authorized_keys
|
|
||||||
|
|
||||||
# download and install software
|
# download and install software
|
||||||
# JDK 1.8
|
# JDK 1.8
|
||||||
echo "downloading jdk..."
|
echo "downloading jdk..."
|
||||||
wget https://mirrors.tuna.tsinghua.edu.cn/Adoptium/8/jdk/x64/linux/OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz
|
wget https://mirrors.ustc.edu.cn/adoptium/releases/temurin8-binaries/jdk8u372-b07/OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
echo "------------ jdk-1.8 download success ------------" >>./install_spark.log
|
echo "------------ jdk-1.8 download success ------------" >>./install_spark.log
|
||||||
else
|
else
|
||||||
@@ -32,6 +12,7 @@ else
|
|||||||
fi
|
fi
|
||||||
# install jdk
|
# install jdk
|
||||||
tar -xf ./OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz
|
tar -xf ./OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz
|
||||||
|
rm -f ./OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz
|
||||||
export JAVA_HOME=$(pwd)/jdk8u372-b07
|
export JAVA_HOME=$(pwd)/jdk8u372-b07
|
||||||
if ! grep -q "export JAVA_HOME=$(pwd)/jdk8u372-b07" ~/.bashrc; then
|
if ! grep -q "export JAVA_HOME=$(pwd)/jdk8u372-b07" ~/.bashrc; then
|
||||||
echo "export JAVA_HOME=$(pwd)/jdk8u372-b07" >>~/.bashrc
|
echo "export JAVA_HOME=$(pwd)/jdk8u372-b07" >>~/.bashrc
|
||||||
@@ -41,7 +22,7 @@ source ~/.bashrc
|
|||||||
|
|
||||||
## Hadoop
|
## Hadoop
|
||||||
echo "downloading hadoop..."
|
echo "downloading hadoop..."
|
||||||
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/core/hadoop-3.2.4/hadoop-3.2.4.tar.gz
|
wget https://mirrors.ustc.edu.cn/apache/hadoop/core/hadoop-3.2.4/hadoop-3.2.4.tar.gz
|
||||||
if [ $? -eq 0 ]; then
|
if [ $? -eq 0 ]; then
|
||||||
echo "------------ hadoop-3.2 download success ------------" >>./install_spark.log
|
echo "------------ hadoop-3.2 download success ------------" >>./install_spark.log
|
||||||
else
|
else
|
||||||
@@ -50,12 +31,102 @@ else
|
|||||||
fi
|
fi
|
||||||
# install hadoop
|
# install hadoop
|
||||||
tar -xf ./hadoop-3.2.4.tar.gz
|
tar -xf ./hadoop-3.2.4.tar.gz
|
||||||
|
rm -f ./hadoop-3.2.4.tar.gz
|
||||||
export HADOOP_HOME=$(pwd)/hadoop-3.2.4
|
export HADOOP_HOME=$(pwd)/hadoop-3.2.4
|
||||||
if ! grep -q "export HADOOP_HOME=$(pwd)/hadoop-3.2.4" ~/.bashrc; then
|
if ! grep -q "export HADOOP_HOME=$(pwd)/hadoop-3.2.4" ~/.bashrc; then
|
||||||
echo "export HADOOP_HOME=$(pwd)/hadoop-3.2.4" >>~/.bashrc
|
echo "export HADOOP_HOME=$(pwd)/hadoop-3.2.4" >>~/.bashrc
|
||||||
echo "export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin" >>~/.bashrc
|
echo "export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin" >>~/.bashrc
|
||||||
fi
|
fi
|
||||||
cp ./conf/core-site.xml ./conf/hdfs-site.xml ./conf/mapred-site.xml ./conf/yarn-site.xml hadoop-3.2.4/etc/hadoop/
|
cat >hadoop-3.2.4/etc/hadoop/core-site.xml <<EOF
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>fs.defaultFS</name>
|
||||||
|
<value>hdfs://localhost:9000</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hadoop.tmp.dir</name>
|
||||||
|
<value>$(pwd)/tmp</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >hadoop-3.2.4/etc/hadoop/hdfs-site.xml <<EOF
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>dfs.replication</name>
|
||||||
|
<value>1</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>dfs.safemode.threshold.pct</name>
|
||||||
|
<value>0</value>
|
||||||
|
<description>
|
||||||
|
Specifies the percentage of blocks that should satisfy
|
||||||
|
the minimal replication requirement defined by dfs.replication.min.
|
||||||
|
Values less than or equal to 0 mean not to wait for any particular
|
||||||
|
percentage of blocks before exiting safemode.
|
||||||
|
Values greater than 1 will make safe mode permanent.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >hadoop-3.2.4/etc/hadoop/mapred-site.xml <<EOF
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.framework.name</name>
|
||||||
|
<value>yarn</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>mapreduce.application.classpath</name>
|
||||||
|
<value>\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >hadoop-3.2.4/etc/hadoop/yarn-site.xml <<EOF
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<configuration>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.aux-services</name>
|
||||||
|
<value>mapreduce_shuffle</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.env-whitelist</name>
|
||||||
|
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.pmem-check-enabled</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>yarn.nodemanager.vmem-check-enabled</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
</configuration>
|
||||||
|
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >>hadoop-3.2.4/etc/hadoop/hadoop-env.sh <<EOF
|
||||||
|
export JAVA_HOME=$JAVA_HOME
|
||||||
|
export HDFS_NAMENODE_USER=root
|
||||||
|
export HDFS_DATANODE_USER=root
|
||||||
|
export HDFS_SECONDARYNAMENODE_USER=root
|
||||||
|
export YARN_RESOURCEMANAGER_USER=root
|
||||||
|
export YARN_NODEMANAGER_USER=root
|
||||||
|
EOF
|
||||||
|
|
||||||
|
cat >>hadoop-3.2.4/etc/hadoop/yarn-env.sh <<EOF
|
||||||
|
export JAVA_HOME=$JAVA_HOME
|
||||||
|
EOF
|
||||||
|
cat >>hadoop-3.2.4/etc/hadoop/mapred-env.sh <<EOF
|
||||||
|
export JAVA_HOME=$JAVA_HOME
|
||||||
|
EOF
|
||||||
source ~/.bashrc
|
source ~/.bashrc
|
||||||
|
|
||||||
# start hadoop
|
# start hadoop
|
||||||
@@ -92,10 +163,11 @@ else
|
|||||||
fi
|
fi
|
||||||
# install spark
|
# install spark
|
||||||
tar -xf ./spark-3.1.3-bin-hadoop3.2.tgz
|
tar -xf ./spark-3.1.3-bin-hadoop3.2.tgz
|
||||||
|
rm -f ./spark-3.1.3-bin-hadoop3.2.tgz
|
||||||
export SPARK_HOME=$(pwd)/spark-3.1.3-bin-hadoop3.2
|
export SPARK_HOME=$(pwd)/spark-3.1.3-bin-hadoop3.2
|
||||||
if ! grep -q "export SPARK_HOME=$(pwd)/spark-3.1.3-bin-hadoop3.2" ~/.bashrc; then
|
if ! grep -q "export SPARK_HOME=$(pwd)/spark-3.1.3-bin-hadoop3.2" ~/.bashrc; then
|
||||||
echo "export SPARK_HOME=$(pwd)/spark-3.1.3-bin-hadoop3.2" >>~/.bashrc
|
echo "export SPARK_HOME=$(pwd)/spark-3.1.3-bin-hadoop3.2" >>~/.bashrc
|
||||||
echo "export PATH=\$PATH:$\SPARK_HOME/bin:$\SPARK_HOME/sbin" >>~/.bashrc
|
echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >>~/.bashrc
|
||||||
fi
|
fi
|
||||||
|
|
||||||
cp spark-3.1.3-bin-hadoop3.2/conf/spark-env.sh.template spark-3.1.3-bin-hadoop3.2/conf/spark-env.sh
|
cp spark-3.1.3-bin-hadoop3.2/conf/spark-env.sh.template spark-3.1.3-bin-hadoop3.2/conf/spark-env.sh
|
||||||
@@ -0,0 +1,17 @@
|
|||||||
|
num_executors=2
|
||||||
|
executor_core=2
|
||||||
|
executor_memory=2g
|
||||||
|
driver_memory=2g
|
||||||
|
default_parallelism=50
|
||||||
|
storageLevel=1
|
||||||
|
shuffle_partitions=4
|
||||||
|
|
||||||
|
sed "s/^hibench.yarn.executor.num.*/hibench.yarn.executor.num\t$num_executors/" -i HiBench/conf/spark.conf
|
||||||
|
sed "s/^hibench.yarn.executor.cores.*/hibench.yarn.executor.cores\t$executor_core/" -i HiBench/conf/spark.conf
|
||||||
|
sed "s/^spark.executor.memory.*/spark.executor.memory\t$executor_memory/" -i HiBench/conf/spark.conf
|
||||||
|
sed "s/^spark.driver.memory.*/spark.driver.memory\t$driver_memory/" -i HiBench/conf/spark.conf
|
||||||
|
sed "s/^spark.default.parallelism.*/spark.default.parallelism\t$default_parallelism/" -i HiBench/conf/spark.conf
|
||||||
|
sed "s/^hibench.streambench.spark.storageLevel.*/hibench.streambench.spark.storageLevel\t$storageLevel/" -i HiBench/conf/spark.conf
|
||||||
|
sed "s/^spark.sql.shuffle.partitions.*/spark.sql.shuffle.partitions\t$shuffle_partitions/" -i HiBench/conf/spark.conf
|
||||||
|
|
||||||
|
sh HiBench/bin/workloads/sql/join/spark/run.sh
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
project: "spark_hibench"
|
||||||
|
engine : "bayes"
|
||||||
|
iterations : 50
|
||||||
|
random_starts : 10
|
||||||
|
|
||||||
|
benchmark : sh spark_hibench.sh
|
||||||
|
evaluations :
|
||||||
|
-
|
||||||
|
name: "duration"
|
||||||
|
info:
|
||||||
|
get: "awk '{print $5}' HiBench/report/hibench.report | tail -n 1"
|
||||||
|
type: "positive"
|
||||||
|
weight: 80
|
||||||
|
-
|
||||||
|
name: "throughput"
|
||||||
|
info:
|
||||||
|
get: "awk '{print $6}' HiBench/report/hibench.report | tail -n 1"
|
||||||
|
type: "negative"
|
||||||
|
weight: 20
|
||||||
@@ -0,0 +1,96 @@
|
|||||||
|
project: "spark_hibench"
|
||||||
|
maxiterations: 50
|
||||||
|
startworkload: ""
|
||||||
|
stopworkload: ""
|
||||||
|
object:
|
||||||
|
- name: "num_executors"
|
||||||
|
info:
|
||||||
|
desc: "num_executors"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^num_executors=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^num_executors=.*/num_executors=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "discrete"
|
||||||
|
scope:
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
dtype: "int"
|
||||||
|
|
||||||
|
- name: "executor_core"
|
||||||
|
info:
|
||||||
|
desc: "executor_core"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^executor_core=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^executor_core=.*/executor_core=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "discrete"
|
||||||
|
scope:
|
||||||
|
- 2
|
||||||
|
- 4
|
||||||
|
dtype: "int"
|
||||||
|
|
||||||
|
- name: "executor_memory"
|
||||||
|
info:
|
||||||
|
desc: "executor_memory"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^executor_memory=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^executor_memory=[0-9]/executor_memory=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "discrete"
|
||||||
|
scope:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
- 4
|
||||||
|
dtype: "int"
|
||||||
|
|
||||||
|
- name: "driver_memory"
|
||||||
|
info:
|
||||||
|
desc: "driver_memory"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^driver_memory=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^driver_memory=[0-9]/driver_memory=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "continuous"
|
||||||
|
scope:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
dtype: "int"
|
||||||
|
|
||||||
|
- name: "default_parallelism"
|
||||||
|
info:
|
||||||
|
desc: "default_parallelism"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^default_parallelism=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^default_parallelism=.*/default_parallelism=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "discrete"
|
||||||
|
scope:
|
||||||
|
- 10
|
||||||
|
- 20
|
||||||
|
- 30
|
||||||
|
- 40
|
||||||
|
- 50
|
||||||
|
dtype: "int"
|
||||||
|
|
||||||
|
- name: "storageLevel"
|
||||||
|
info:
|
||||||
|
desc: "storageLevel"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^storageLevel=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^storageLevel=.*/storageLevel=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "discrete"
|
||||||
|
scope:
|
||||||
|
- 0
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
dtype: "int"
|
||||||
|
|
||||||
|
- name: "shuffle_partition"
|
||||||
|
info:
|
||||||
|
desc: "shuffle_partition"
|
||||||
|
get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^shuffle_partitions=' | awk -F '=' '{print $2}'"
|
||||||
|
set: 'sed -i "s/^shuffle_partitions=.*/shuffle_partitions=$value/g" /root/sparkbench/spark_hibench.sh'
|
||||||
|
needrestart: "false"
|
||||||
|
type: "discrete"
|
||||||
|
scope:
|
||||||
|
- 1
|
||||||
|
- 2
|
||||||
|
- 3
|
||||||
|
- 4
|
||||||
|
dtype: "int"
|
||||||
Reference in New Issue
Block a user