From 4291dae1ed2f51e2f2d63da9adc0199d16325981 Mon Sep 17 00:00:00 2001 From: QIUZHILEI <2925212608@qq.com> Date: Wed, 17 May 2023 16:50:55 +0800 Subject: [PATCH] spark auto-deployment and tuning --- .../spark/spark_auto_deployment/README.md | 73 ----- .../spark_auto_deployment/assets/hostname.png | Bin 686 -> 0 bytes .../spark/spark_auto_deployment/assets/ip.png | Bin 14020 -> 0 bytes .../assets/spark_install.png | Bin 2499 -> 0 bytes .../spark/spark_auto_deployment/benchmark.sh | 21 -- .../spark_auto_deployment/conf/core-site.xml | 24 -- .../spark_auto_deployment/conf/hadoop.conf | 15 - .../spark_auto_deployment/conf/hdfs-site.xml | 24 -- .../conf/mapred-site.xml | 28 -- .../spark_auto_deployment/conf/settings.xml | 277 ------------------ .../spark_auto_deployment/conf/spark.conf | 45 --- .../spark_auto_deployment/conf/yarn-site.xml | 26 -- .../README.md | 143 +++++++++ .../atune_spark_bench.log | 211 +++++++++++++ .../install_hibench.sh | 21 +- .../install_spark.sh | 120 ++++++-- .../spark_hibench.sh | 17 ++ .../spark_hibench_client.yaml | 19 ++ .../spark_hibench_server.yaml | 96 ++++++ 19 files changed, 601 insertions(+), 559 deletions(-) delete mode 100644 examples/tuning/spark/spark_auto_deployment/README.md delete mode 100644 examples/tuning/spark/spark_auto_deployment/assets/hostname.png delete mode 100644 examples/tuning/spark/spark_auto_deployment/assets/ip.png delete mode 100644 examples/tuning/spark/spark_auto_deployment/assets/spark_install.png delete mode 100644 examples/tuning/spark/spark_auto_deployment/benchmark.sh delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/core-site.xml delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/hadoop.conf delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/hdfs-site.xml delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/mapred-site.xml delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/settings.xml delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/spark.conf delete mode 100644 examples/tuning/spark/spark_auto_deployment/conf/yarn-site.xml create mode 100644 examples/tuning/spark/spark_auto_deployment_and_tuning/README.md create mode 100644 examples/tuning/spark/spark_auto_deployment_and_tuning/atune_spark_bench.log rename examples/tuning/spark/{spark_auto_deployment => spark_auto_deployment_and_tuning}/install_hibench.sh (77%) rename examples/tuning/spark/{spark_auto_deployment => spark_auto_deployment_and_tuning}/install_spark.sh (54%) create mode 100644 examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench.sh create mode 100644 examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_client.yaml create mode 100644 examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_server.yaml diff --git a/examples/tuning/spark/spark_auto_deployment/README.md b/examples/tuning/spark/spark_auto_deployment/README.md deleted file mode 100644 index f7b91d1..0000000 --- a/examples/tuning/spark/spark_auto_deployment/README.md +++ /dev/null @@ -1,73 +0,0 @@ -## Spark 自动化部署和HiBench基准性能测试 - -### **工作流程** - -一、**Spark 自动化部署流程** - -1. 安装 gcc make curl wget samba git -2. 关闭防火墙并启动nmbd.service -3. 配置本机免密登录 -4. 安装Java和配置Java 环境 -5. 安装Hadoop并配置Hadoop环境,格式化namenode并启动hdfs和yarn -6. 安装Spark并配置Spark环境,启动Spark master和worker守护进程 - -二、**HiBench 自动化部署流程** - -1. 安装python2 -2. 安装Maven并配置Maven环境变量和设置maven仓库国内镜像 -3. 为Spark Benchmark下载并编译、配置HiBench - -三、**执行 Benchmark** - -1. 准备工作 -2. 执行测试 - -## 开始操作 - -**前提条件** - -将本目录及所有文件和子目录放到您的主机所在的目录,然后在`/etc/hosts`中添加`ip hostname`,用`ip addr`查看本机ip,`hostname`命令查看本机名称,例如: - -![image-20230511213226693](assets/ip.png) - -![image-20230511213625811](assets/hostname.png) - -那么`/etc/hosts`中应该添加`192.168.70.129 spark`,相应的在你的机器上添加正确的信息。 - -在自动化部署的过程中,需要下载大量的文件,如果遇到网络问题导致安装不成功,可以配置代理: - -```bash -# 为git配置代理执行: -git config --global http.proxy http://ip:port -git config --global https.proxy http://ip:port - -# 为系统设置代理,可以在~/.bashrc中添加如下: -export http_proxy=http://ip:port -export https_proxy=http://ip:port -# 使环境变量立即生效 -source ~/.bashrc - -#注意上面的ip和port替换为自己的代理地址 -``` - - - -### **Spark 自动化部署** - -切换到脚本所在的目录,执行`chmod u+x ./install_spark.sh`,为脚本添加执行权限,然后执行脚本`./install_spark.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Spark deployment success.`的字样,代表执行成功,执行`source ~/.bashrc`和`jps`命令,可以看到如下几个守护进程正在运行: - -![image-20230511214208550](assets/spark_install.png) - -如果运行不成功,可以查看本目录的install_spark.log日志文件,可以看到哪一步未成功。 - -### HiBench 自动化部署 - -切换到脚本所在的目录,执行`chmod u+x ./install_hibench.sh`,为脚本添加执行权限,然后执行脚本`./install_hibench.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Hibench init success`的字样,代表执行成功。 - -如果运行不成功,可以查看本目录的install_hibench.log日志文件,可以看到哪一步未成功。 - -### 执行Benchmark - -切换到脚本所在的目录,执行`chmod u+x ./benchmark.sh`,为脚本添加执行权限,然后执行脚本`./benchmark.sh`,等待输出结果。 - -可以在`HiBench/report/hibench.report`查看基准测试结果,执行`cat HiBench/report/hibench.report` \ No newline at end of file diff --git a/examples/tuning/spark/spark_auto_deployment/assets/hostname.png b/examples/tuning/spark/spark_auto_deployment/assets/hostname.png deleted file mode 100644 index b45d8250a6fa2b9ce56135536d4e8688c3076d3c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 686 zcmV;f0#W^mP)uo*^H+du?GjjwAiv_uWr_C%*qozf;aK z{cO-GG%NS~-?PEezGcU%k;A1O(V$qbZacpHL3SM3t$L~kJNyLz000000I(*0^OD^T>Fb+@=^5zhQTTMbk5yrutgwV@pS<~>U%c)w-+pGWYBRH6U3;*8i1h-A(ciXK=IV*o8ZlWZHUo%2 zLu~f)ei!xz@M2Iro`W}JQ4ita>*fv`y9Ou+jd0JBi%GifwX&LgE)-+%irGXMmn zVGSV8eGh<%NPQ;YeGh&Zld%S zVhaeNNGKtcpu_+n2}nqRguol;`OkaiIlpthy`SFwfnRcyz3;vDUhTTpO1ORvEV)N^ z4*&p=w7hc39st;u2>@&{-?dfzr03`8D*)g)!1B@sho}P9)WCp7clg)k)<`te_j_(& zApYZtYHo$zk?KIqTz>e*gVt8x{=#~j6dagEVmPK!>>aPr207p9U`E{*L#fWQ#Rp>=@Q(N)k@8V7T z9~-wcr2ZaA=oSC~J+nsH{cGu)15Yk{7H70g1=HEwk6bXF-LdAK@!Wn~Ekl96=1ubQ zz)5=GE(CuWG?E7ZhRHL%3|2&v?{7$(n)~s+WM9E_^+us9G1tuupQBhNJ z=~b3Ht*&PCa)>6)-3oYc3OFj&$jss9#DNLbnouw>)9RulUmZ&A z$V&0@^d$CV5&JcoINJaK;7hMUI13eXG8Cj7S?sW5&fh}iCHu{t^%B~_ESEWb*G3O? zE<;w?eGA|P!B83F;d`18J$T@2)IN;TyAoPm(uFyza)tx|puuo%l9c~|I7Y?tU$zUbiy6YzpkJ>p|KQNsVJV)-x;ePDJPsAf(Qo%2LS+iayLv) z{QP}xGB)U2bZSJ)n%`PfUlB|bN7^CuF*!pN!b^iR8C=jB?-c1kBmWo`{W?mF#2BDBPi z3C6LqUHKi@vlDjphSoImk=WyMPz|(A4T)Fr`Dnf80y^qJMeJWn{dbu|I}xT^{RxHv z_0pDIv-3VqM0U75fgP@HxAdm^3RRc5GDxttL{MwY;VbXDJZJ7lUvkO;4hlTlOi^l1 z0?!1`KfTPslw}>kV5oXr4?1UzzsTvvO8)Vd`AEGddOV>y?3k|{% zxdGPeZfxHMCPD==+nA}Te?ks)*UQzc?X6bv$ryEOVR1w!P$1At&6v?=wB9M|o2-SD zvXrbS9U!|-%VkwM4^nfoua5%5!!L%wyxdbWj*6w=76z2v_}yt;vP_29$s9L4OpP?T z6J$aT?TdGv0f#VGNN2A(PuY%QR~~(j8Vza5Owjgk+s!`+-iu<@H;a5vVT|3=ZbZIq znj5NaPrt3->_Ev+(`XB=la?m%tDG#oL`yyef0cWck)rBKORvh8M1Q=iSc@KiQTnmX zbD;t2z{yv7$D;v@r_Nxy1_NeaWz0Yg*a}VW2SvxZjrS)PqTC(Kv=_bW$|H=@&ln`l zgsP-=O!l|EyA)Fm+tIdyzJY{$f$C4+;7cb&{)HqU`P(3E>nwH0yvDPIwyKY%0`6B4 z`$ya4%Rm1ZiCGNVCS5nSt?{FEF8Q~has4=~`_22>flK#wDI9m2hv-?+Zbng>Ql2iO zCpdYcIIiKZg#+c!O;cLA9XXP%B;nlRg;AjXd7l5aL?2M>U#r=L_`wJzGS#ZJ-3cT^ zb-wG;bM~>p;+)DE$GRkRn&+sY;1zavwg~XPS2|HFA-H7edE|8WWjd?Kw)Dk-peGz5 zJ#nlYv#3K_j7zuNf+g^4`@!VTsRa=^>_>vz*|*=GUzF&-i{XY=+_3mVW}Bz|@j}_a z-;_M;mue;4eRaq-TFBwSm2Tgn!>K)Btvbg`%P1rZrPOJH0*;6d(p2y&jn+M1DH^0+ zJu4vAN8JkOYG1gylTf{4XLQ@$j2>077=LyXLgTKZ*p%gxOjtK~eDlggNiD8wOzMiz z6t+P=HWnyyopEMo&KzlK)6LV`tdGV=cwndZH7;BkxOm)`RI)fgG-tV*+|UdRdsd${@xEwI0NX1ehb2hkUFyqoVuEFO=A~4_dP6Ch>Qq^TCED0-HY&zJr1#s0I zfsPl!6~k?J9_5=n_b7r%OG7qP)s_1@LXv%|a8dQV1LeUU+uo}b@+94wCf+Qy?AdqB z1-q~MvtfJ*cfy4Vc%N0cVu&Gpf3;fmf+>T_|2;X&w>mXjX2nv+rR#Y%G-Kd$ok3}H z0noTJVYJ2!y-;Qoe^)aE0GKmb$?GR(t58EP>zkgQ*Ks14K5YUM4*0{hikWWPhd$az z!#3=03q$F@8H5NrpI=fnHSQdJUcK~OPXpwNE7aQua3_l$y|k+@`cY{{+KgTo=c}_; zSkTh;nZSc{*Vd@zTI|7bYk9yQKt|9uto+=!=cYOO4~NHEk4lGJ?y9UtnSNx7B1VCL zdpio?B4;B_lm6a@#{nH~A=~?kemirG4{QTo_|J=uW{_-a=HMCq?pfxi33PQszMM>Om4p1#x`%_y~$+8^cx+d7{&K0TgR?7==QUJn5H zEyvXQS5mU0NmBgxmTcIx{&`IrTjPIU-hV!v9GYGkXZ%?ssC%czJ*?<)D5eywe;9oh zw~>wdIh@8!ICgYY=_S9RSRp`s^Cm&Zw{LD+A7T^Se`=ftb-QM}owN=aCs?oXQ{v=f zped9_9q10g%QhLL^4Ghn$PV6hoo~B&^0f$zY&avH|cbgd!ZdPl9 z1g2ep<*UmN+6Js=HTtkCW?!k$7iburD41f3ErgtuNnYHx9vB+*ACg{&YbM`!zM5lOr;V;@F z8rxm&`$@+q>a>xCe=Y+6&6(nxd#gj=;V@`jm)ymjp-n+*F#%Ok_<&^_4}aJ^EK!?R zt`wUsjvvs{R>g${9}|wO6yU+$ncnJo-q{yWxlWn)H=Pe$ky^9%CRlo^^rw3dd()@w z;aCVVy4$`?YXBm;J_t&VPh156)X=fBj)SxcvQOE8n{%&a%^kb!W@!;tYY^qJWcY&U z`tx$Febf)mMF-IU!0rr^Nx#V3;zpu|PEq!QPGCNb?7Vc}_o(>wIxT{rZKpKokfWjm zJ+W{v+p29LpB7dT4^J_Oi3*Gc&fI>syj|@5f};#mb`${6`!wfJ^i_e&{V!88&1PSH zap~%TINbS^QzI|>Mx_mf&ry`XwfP&BVQS0jHWmGKPHTx9w@rreVk3BQ07@61ly>af z1}T35o?I4i$3(10pM$o&*qxIlPL2OHn#<@*a2EI8XncIU?$c$sc?!36Mo!VNRIT_T zzi0sM2>_f|fY|^3@Tc37PaY64J8mCa_(NcsOJuunSSu~#;#Bz15a?XM*WOyeUuX_~ z=I9Njm)gP~fhT`mR*N!*j;Hygo{g!$0{u%4cWgBDqC;GRedE3G4a$U6=0-gqXZkKX zx>oE4-zST0iGi@x&9RzmMm%d^*)IEns?6#F;5HQ;g9kBUGrALRtzzMc`O@XAAQtoE zBou;e=bNFSAi2$%F1=|Y^yi0Xn=uHw@_>Zeh=}?U$b5(U4u-Nzj#!?IbWr* z$(6|xTiphoG9V{5E&xxPFV#p?q{f%8;5gI%YiEajrPc;E&RlKLaXeR?U0>gH1h+^M zN&m7N#$a3QuVe2I7AO6G{+Gq%F;m2#woc{R|XfZRusN|pe; zNBx2-g(#=B93^Snm~#*cOG)Tjndn3)lT0(SLYl{wWW58aFV~f_DJG{zMAw}(#fC0c zLHFOG5Q`@82a}1j5Xzgz)6b5vbG~F}bl4Xtbysu~aVRzd%UexHUG*x$3SPaAJ0c~$ zyanLwF6ym*O|YvYTve*+x6pf1m0HqEU}dtg*WMmo85X`eY3n85`RJX=FMx933`p8@p4hD$j_#@J(xc$k`aDo)6!}B!8(!?!R9St&shfY+rg<;sWnZk4+gNPeZCP-YR{k5+P~ItP zESWu6Za?Qd^y%^ zLsjgH{x$B|Y+HhlC>GXXXG)S@(@vd}Yyod=VZ)=M&YHo_3M-F+_s~Qqo^M<#twO1tTx+;1qDHBh7vX44atbdv4s(wrJA(W+kDGoBoHs8^8L*ka zE~LT{UAEMyi5TS3pd4<+XL@bi*sZqnW+&g?F}#c|z2yRpykOcK{6`P;dJ+8;xnz|X zYFZqgOJgwNN*||$a1bS~YJF)NCk9Vh7yITcjX_Mq2+iqtIjIBnl-6kGGKtYj(l;~e zXH_2Ohw(+`+^1EIl<$GKgAol)PIFQDhG;iLP-I&jCNGt0Q&sPzm`XV^WP%mumh_eE z^BMl3XB1JUAM0A4Iq=j(qVST+;-T6{v7+Z%*Ys;Ro=OuPNc7CzJ#CHr7mZgp{Dh-a z?>lyr`@<{m_(k7h%}|5TqJiYbsQv(|_cdH#UeOxHtdHVDZ28d3rQBT#�Jw4YccJ zs-54X=YYh9HAMb}LgkP6DdrCP<_Gtm;5&f@Fw@-Y;k%x9-1)5`*fsO6R;U0>dL6TQ z25-AEoKZiQ5d^c|zypb^31&SS!%m~V6Lt~UsI*9Ykh1ZQL=~02NyuxcP1Zz1ZAYS~ z|Is;V(ap)tD85eblT#)A?HZ-u@0iqBmG}Ju=G-BsR2nf;c4hdHnj@5hm0)tguKKBwJ> zSQ?)z`{5Fpqoo*Q5YRiFpQasKn`K2q(ps*KO$;#Dc5>#dDCeHB37TsfF)B={a&BoH z#E}(m&)2dhW-TklRGbUF4e|^#_?|q@rXTouJ3O|6^xb`TZSrb-`5Y>^C11cjH*OhH zdRny}!%5Zm!+75ZosGNijU>_lW@(zi;J>TzQcrcVWKc7`WI2Vm8%; z#RIclv$d_hr+}%$8E;Ay3=tkBCaMAnojbGRcrjV9)M)ZUbH}Jq&uc3SzSykeF(CI^ z*O%PlG-v3KRKl<4Op!y^_qD`KGIdD%<792L^h-rA>;tXySp({x%rTJtn#+?uh_7Ni z5)V$Y(^UC_X1A8O9y*6CxhsHGM%TA{75Vp*^BrCzQr5sjTUBBa*2P+>4wysALH;}( z-hsQ3v(TWaI0aEjJ#az}OqU?7=FFU>+ROyy1kB1R6;RH}D9Ni=egRGV6&Oto^}=Sa z*I~z%if07cot0m}cVSZz&;jK)9s}jh9ZHH$NeeUgnQ^9y*7DXx_VUz-=a+S6i?cCf zi&gyMSHqiF9thBb=EeJCc#t>KX#ea66YC-;9Y3slM7U?;cW47X3=(qFNvQhP37Iwo zBU09ZNAFlC$H%m2cmkaC7aF$!nrjjn__h~H1B@fBmo?g2H7D-<%ga4j$rTnk(cnir zji*_*lW)sQL4Ts!2GdpzUCqMbR@B>6&npgT5+oVaN6L6YosB%;0pYTy%8^ph*jN7i zNz?J50-we3Cbj3;NUFSgajOYRdje#LLYR!<&YC9K!7rZ*iAdfC_?d@fS2>?Cz!orp zy7SLbBU5IJ`>VorNC!<0U3cA2rdc5qhP3tpUQQVnujD}*-LA#YJ8?Kk1Uk-~KAd#G zJ>f>LPBm%GB8Kf0x#z8gS6gY=ElQY|In3)T3CY!MCAB94Cq9eAQg6&az|Tp-3rIhq zUxs^~SYU4HYpU>kp$Br;!QLt?=~|o>p_+PDa(LTi>rs8bob}oO}aE#znVl4PmYj66#=LAoe(nqW?8XH+(+a`&?(`iv|a9g=+DFab5fD z4sux2KEQ*+zknD*HjqDrjF8r8XC9qS?7SROW!#sQ*5o0Lz~|xz1*{rz+QfR%9(%_$ zu3>slu4yS;D!fi!wyi$kHEz#o?5Ht7e855pH*~ShS`jc*zUE&#gURVXg4f!jMFm20@i= zSEfl>rKgrT{mM3+Da}2laQkUZ?PDCa6S4~v^K&?5AQ9#W+m+Msn?Ln(y^9t2Yvkrm zhZnKzR>E`#?_;>8Ro3A~gGR4NNh5f~G}*C0b8;{Bm6v=$@9v$KBPekXBO02=i%Wnn ze`T#ch1_BSIqfjr$A+avB{;D(rE(GsE z)u?#oEO;FkT{#D+#c%;*TMOo2A|J*@dhMdnCS?QICsaip;v}90aWW^YPF;Q4we0w%g(mg7cAkkybDDHyeuI~iTjnF6_vu#=w zyz9H;t`mP@qG_s_i>v*S9bsud7qo_DI`~-e1h|kTOxIjrT>Hiakt zisSGmaO+5%%*9o1)AT+Jl{Tu?`vsb7bbk zu73K-Bu~eXAGxawBC z!4VB%;zSHh5Vtap=Z51BrI`r^B^1}0Bh@)U{5OYhzCWSTU9OlC?2lPR6wIFt6+UUy zC>E=SMh(g+IjlZ5xLbMr4H*W#TSKUwlv8%Qg`M)ajZ1lyFZLe{(Dpim$+J#vwdDdL z4S|A*=*1l(WA15KULDIM0 zfDh1O&}KsuMbNphXv<;0iJTUN2iL!U;x033u(!c>k& zFFee8Q;z1QP<&1xNXPmzSWz?}{>}$0PUVyPP08~Sw_6zloMuy5wEy`>B=<~^E&%8ocdI6 zul+p}E1tRj?Gb3>+asHUd6S!P?gCgbcC|zbG-mM3UfvDWXWmO{0cPX^n{>(W|ksHR~&9!q&q9&q7r#U_p066Rf zTB({39cFTQd2WlQq$%mRpu_2&TP-|W=ET*Hi_&fN)?n1$x}27*4F`6nwr=NblPe*$ zPa5|D&Z~$~v2Y^~Dnrgq)Yi1?2rb6U4?ekD)8tG6J}yKohL?$j24+-?8Pl2c#bkkg zK|n{Hmzd+($5_Z9z0JcF-I{H!zk>lXG|#O_tHff~)#)i`K}P)$Zc(`SXTG1U_n4jW z*e2N`+)I)eW;)eAX7N?ZLP}-p$m5T|2`EBV=EHlj8hf%>x`a@#4gAJYSBjkS`D0Pz z;*j5bsK4`HSb62@|AUn?W@uv3|2^(m{mtKguaUBy4sF>NoVIihB(S?6k*vubxFk|b zX?i8KdvwBhX(b^m00^qevFv4-1j%!ug!UI zH(JJyA51^uDR`}GI6>30_s7Z&xR+S+vK)v}C2kXv51wfsslpSbPg5y5*! zhi;WH8`zvv9+=96Sd^OpV_@rqYnjD#0`co|&|;#Yjz65h+Jn~l+PU7=PVZ@p83_p@ z(Pgr#7(6Newa(b5htIKW2*b3$gM2DuKVYZfukbDkkB|s5ahRkVFVBBHi7IzD%G{TT zuL`%{O7?n1Jkhy&VcrNEC-$@9_VSgvR!GKyXTF{IExvT9NKk90qE+V6TMi8Nn8-2J z@@X-Uuud6aNme^%>?@y{^`*zBa;>PP&85`o6~NcQzZtSQZIbb_1UE-g@a~3@x-y<$ zx7UCHBwdkI<26ges2bbOvxGLq758Q1^l^s$ zpp?f#IQgYDtSSG#!AIsbG8$#593TVGZJqxy0^BiKLm6!6MyTMZm1L~I#a)97$;bY1>oW-XrO+Ja3Gh<< z0`J^%-G@2@{NVt#Y{jVo{|uCig#`%oC9+1gE9l*Bte~9Q9G;TMj3_L8?k4!q5%+8~ zXV@OL#w_;wdMBlEkM z=uybO?E(PQUWC7SZsvJLa0M##s4QgjwiH@WN#;$ZyX?p5}gw3P9N2C5i~hKFr;Ii zwVD3Cbxt4$juiFPNizT-sk#{Mc;E1DL~iBw@7N!1o&UFy(-?3dYCZFKFK#P`Au5pSyaim@(ra(DsX!NF1AR;=!KoKj>Kz;YhQRlg3Q?K1R+}v9b-t-|RTVbLadJMm`dbWNr zB-nOG%XI6KZ58sE%2KPJq7yN;GPt1N+u)zP<(h(dx}!g4LmvBld`>E#mD8wumo3}p zZ;#z)hfmxo<}!rP5HID@8(1c|1pH(FJ`|qkYcS3QcLK?8iE&wh!bNyezS~#>HUc~m ze@yvc5|$TVG3xVvK+meJc@>EJCYvTNGQ<#Fmi-=i?kOcH)n!;9*S`8^o|U;TNc}kf z+x4hGj~vEv7ycLFqSg3BHQ()!j`7~)i;^RdrRNg?o{#>+2sU+H*?gmXuu9*auG)E> zQRc&U6*!eHlpgN{#g=-Y55yJXh?`^}g^fSsO$7&CB|v5LXNNop&204aj6O=@{7XRmXQ zb--{IPPZ(w;HM?S!WI2F1g}^)03`hMoBqOW`F7!cAm$<&w0&UXe&O7wAzWFf$cOuH zX7KdU%|z!em&nR`_u5RYPCK?{BPrb$!|nlg@672zuF$Jzt{i$JLkErp6gbsp;)8p5Y`a2K82KWx2OG={`*lNHV%mlmi5+dDdnbe^ zj1=`x7B3oIGux+qP0S9y!C(WwVkx453!}}1v5B3B2(g!BB)d`~KUi6Gu9HF<$rZfu z`>eUNw3e{Jx~gk!NaR0U;fM{-%lPy;o8evkRN0_pkhZE|3ew7K0G<~vUE$f$DPV*;3d>* zC#@5qz6M1$z|1;X?@s>HSrPI&v+0M|WHWJF%-5Nvrow2}vA0=UkLOWP0$wD8gXa*uv}op$HK{^%FO4>wQSf zj-sk1OKo|+z@0tCb+c&8gi74Ja;ka8DNRBsgNkk!{ZSA!0|2y(i}UA*bf=P*RCD}n z+cB&1OeXi9u~$KML`$c+{pWSX?-azUb)po^G`?hinagAEm4dk$uxlgptu z4+|<~l$sBezbH>y4vkx8P%Nt)DAMIxYhR2~`Mhn`py5KrzMW3?4GcZZ$$ot?y>?hE zu57Ok-p`UZo&Eo24IgRNHpoGJ{pC?Cs{+3#Ir=8~RmZIIdR-otDyge#)Wc*D26obc92a_OIBG3lGnHtBHBZLvlVAb#DG1QXD$Celye0ujekN)E6p*dZfISzw(!ts zR)wmvJnPk_E%2r1IUO0VeicidslALY*K2Oap3#h=xP_T`ptKI@)f)2%^e~vwR3}d8 z>%G{51n&AsY_%Wn*{tcXqoQnzr`q9s+L+1N0hc^+k!*J3LpB3?kg+z;UP5`dy63gH z2HXGEUFj}rG9y1mD*{wPlDDL=#Pz-|#Y14riF{lAZ%?_XX^l;TsPbrt0bZ%tuwhlQ+|NcPX-rPG!Im8xVXdHhz9Q!KoBl{9!nxkh72# z*Eq(C2*WPj@~ny6v6622Zsu2d*ByY+3NReAR#Ybv@;Vb9hMEqivzk{g<5%sA6jqw9 z4ig>y?S8Wlag7vNj3Vv2c1Oxa*tKIXkSQ#ogtZwx;jYsL!?q1qW4%XCawnCSzEdq* z>Qub$0|KHWa;ptdfmVA1(l5r;g@kMRt{rA;PA2=Edw+blbA3tJFS>0})r3t9e70B) zNRhCB{bbr1BHu3(PmZ>sqVsSa?z;mf(OE3hD4tYyG*ib(DYo@%(N#U4&0HH54`;Q3 zUg`I{!iwQs*;Zx8Yjkej+MBruDM|iKt9{yGDcPEv`e*JfWmmMn>)cZliHeRmoWGIf z+zWCox9`@_S;bxonlwF1E1BViHziAyy#)wbM}IUByz$jk6aNbHKKG%cxA_UPeok&n zB@=DExt+gwJX8T;GQG*X^hOKdD``^Pd^$bid!BO6m#&C$8P_B9a)UWOoRx&u0P1b{ z`m)q5dzr#^t0uBbFs^R96g#}0)5HFHD#Rlixp{;tmzGz8^g26=6!>E#gFGO)INriY zW!G(QI@)F9<<nF$;S z6G{6Y+*QOvp~Pq!5ZzkrXgcKYD(*%k%Pp|Dp9uhnkQOsRyLSAS&cFRu zM3U_ZYbcG^AS>cghejD(GkeO5-UQazuW~(L;b;AyveI2qBqxr{kti} z>DXK1w_T6#4uBHm)Z%Z3Q~>MpvA@8;18exZ$gRd1kV}WK-e0(~`SsEKgRbOAmP7l7 z9empjPGW7nKVnT+6}GNc*={g!B^)$7LWr8z6Ia<~KIb|Ovh2p4?1uJ@-7Wllm^?T)XF`RBEWD*N zTgf-KDCz_zOL)PrHE#u+z6x0Z@-w7uR9btZT(`u`vBV7MdiJn@f>C z9Ocnpf-IuV;F8r;#_rS_Yt@Jv60sQkCG67LV2E!G{|j{s;QT3w{ZB1Abnrx^=QqJ0 z`66)-glOZVZmNTqzMInzCO8{WM=P;K^}qYvr2EQf076q0#4lf$5qBcrvMNA{AyUuk zhf%Xqx(qZQogijW9_(+DymiL?rkFeX*Z7v#*gO|Lzj6aBC0ASi3MA+XpvHgk^?X{T zW(%@X40nj`x8vO~O+C~j+;&D}dDTuX2kL;*;vyKwUg*8Xju4}(sN3N`Cw=mL=G77+O2tKfY~Wk0oMMdgyUE~H z>plC}4yRxwHO8!leA$Usj@?hY4Pq!3%g`o}NSjyw&g(BXIOi#f%wL1E&H6MHrzoju zJLMGScn;4dCSru+0fJ^y|A!F@)>pb~8oG#d{~6na1))R{9&M?(*rfKj`roEpxC;Y= z6Ue;sNYJ#8v8*xN@xVg(6^LxyJ&L%x3jJD)!>8yQpCu_CrFws|+H|aCwLu3NQ!lYd zo3zti-JkC~5`8)(EUwO_HNP#=QvBz@8RQ8*^x67<`@>;0$31D4kON`0C>+}~@p85H<3i6iY*5k6Gw z_HIME(n#eB-b!<}QrUk%d4i%LJm|lEX*o%xl45{u?@+w9VzLo(LVb3jn0e7UF1XT1 z=qPYeR3_q5sNn8NVli>mw#)WQiX+yZ8rw{WEqgtkD2!W*?Lfo=7kUHU8MLkY6kPtw zZ=rv@1NR!xnD!V~Vlha!VQIDSaUqz_5|%2999%oFsI`$tz&+CYzMi%4Kw@unhOcwq z{&vdN%Cx3b(b7}v^1ifO>y>1V8;^xq)PdZ(J4Qep#5+ z8WYEmpKJccDh|Ajiso|{?@Xs$nRq-rK#5TIVT!(ur9ED>|rDn2X2)M(tDc45YieZzRX?g8v((+Smw1sRq zP2HO3Q#|AlreN#wDLZPn7)Xm2UXeD=mJ3rT^;C&eH^5P0|y5XMBM|laklwqyn5e zSQn~SN07;SAotnd9|r90pZZn(K~YY@w1>zYpgALrUNZ`(Zt4A0whrkn^~XqXOp4xB z5KXe?y$p>Wfd>y!7Ei@}9*#>4$M)YKd-LvI!ypU@xOX5ocEL>ZdB65> zx~(-%{h#!oCwdQAZqiy;+QBfpdz3>~SC>RP$h>*e%I^HFB(Sa_it47uEi>1-3(G4f zg6#VJqp1i>>J_e42r=7t)9tEW8-0O6XlVCBdUVZJj5PhY`Nj`$$yzO3IzIc*bn~P$ zMk$UIG1pTrW&%zQRA9p=?DTV)tCbZ771SVXyWqSQA>`GOzGhQdVcp52G3^#@4Ks@* z=Wq)UDaBmrpN@f8l@#AzkhSd02t8)2DwzuTWo1PMagL8s_|8kVg zwJl3<$0F6+?dFpk-g}T%f(#ph{js%S_O2|ZQ@e-kj!FX9wNA{K`bnz|VCY2&j3oQp zFxKCx;Cr8kIfu@Ur%A@7X%J6+z1(Ihee+KS^!LvRF!81i+cfh}(lR_C zgRsw)PqzX5BKzmWt-Y-5p%VgWQ+iJn!q?|jF~aVlYDgly^@t{42DeyPYDM!1k<#Z#m{uYlDIJlpNb2E9b_%~GRsJA@!c74`&Lx6Uu zj5IA1z3d|StYCx_r$h*j^#3H+FFzW> z-I9bT3`X6peMTSBr`(IFvMfkKfbgSz&Q%XXhuE4XLO%OA003Yi*L7Xjwb~0f{$1?= z0381q|9gH@p68jjKO3L_%G*N_$4$P@2Mz7rhyJ|r{yxs%5|wB6jGHuy z{qwPGS9kL#!Qc5g^n~;HBb4tCtIl-NZu}{}w%=|TOs?A6af^qLo7H?4*})&p&TY-` z-!Rp79w8w(FAbspUghsnxUTE(+1FP)s z_jtcS*FL_dU0V;Z@AaeXH?)h(fSM=9`bxq=f2uw=51#YYV;{bce~z8QT7UebKYMDg zp_#4oQh*=9%1gh8me;LfH-Bt$Z{OuV000000NAMaho1cdZByZBUB4b(m)LAyJY;|< zh>+P2_y;2V!Lyoaj@kDTQRTByEe&8Ld7l$ zsjAB<+CR*cyVxI~7VrJxk=0e*jpAF9>#|m7;521VYx#YkX5YAP%-@?IPWXK+{2J&eJKeW zfQND%Iz>IsG;c{RH+oz&2hfRYou+xeUc5SukS?y59MavO@j~>+wyzzRxK!ggC;$Ke z0000000000a8h_ajkb9xZU7{B*7tjgzu78)&tUE2YkSjL`%`E5?5XJwdvt&VA$$q7 zzO*U`8=`^&6o8w8-a->P^+!>NoxFO+uI~ZZllVIC=PlnIY+W000005Jk2Au=e?gA^Qr(^A0`! z*V?Zd0-zu3x%zo@9(u691;FC1| z{$c<1(fl1>we+G{aP~A3$dLX{e~8A<(^6l$FbDxYNlcx0r|4moW@0z3#%}(=Gy>^c4`FzM40ODcNgBFF2!ux{H6Qe&g(7tGkU%;BF=fvoId-IpKv9MteT03I?VH)<&Ot0*Ji;caIL+p)X znf|buBu8E1jC01Ta_ku@wKJ+`IA_8SWL}-Omh%K|nrY$sYhgV4 zcz^BloWLmkq1fMZujurrZC$k3ZY;E%ik?urTeFu#bAz67i!YMVJ@&QFM>}5e=m6da z^9jIW2tvp~e>ek@IsgCw007vZYW<J?ZzxWy;mPXx^NIrYybIv9G{$Y=_M||(8>-UTPJI4hGBr*<E-hE z&*J(jNhtsT0000000000z-Yqshg$nq`a{aKVNYW8hgy5A{;(gy5B4NXf2h@4i=BxF zfG1)4!&Q}>J-WeS{SEp)1L&?X~my-$uK~ z|HMdfTrJ5*SNS>`>r1s@6T2DWqo)MXIj%=HSlh0lMP{3Qu`*WdeE%u_D{p*`i>~sG zwCJAfsqcO*eua1H55;*^T*Z0X?EkX2EhPUnu4(P7%1_|^g6Iz{p;3RBoWDFl$^#(k zo-m0q&E{GE`-h&P1G^pdGg8|OR`iExTuw_~`j*q?Og;gY^Clf|k$VDs-g8a}MeP>1 z>ehDxyZHk*z4}Ar)<|59M$CSzNZ&{A}|_JT3qT zfJyI-d@3*fS%|&2Q>ZV6w?L{1000000G)XEE&BHZlJ=@Wi#W^TlKlgJwN0{d0uYP5 zUK&+D-Bwq9D0^ZW6G}7*KNw2PzQrA8&UUBGV^|#wtNgGP@xL>pC3~GE*W$W-qU6y2 zB@SwcMUuB@HEUXbfLeysweW1v;0Gz_@&CB5+Wh*v zGM!=gOqlo((*N{5AHWmg0R4pJr4xcMR;T9-MFS>4h$3d4W@r!se4cU~HpEhh9xrg* z(NJa$ow(L%jKxDE_tO$TBeXn=mie+u@?WEWaXlsoN80000000000P!B7;o51sF z0M=tYpQgBVF?r)&9Cyk7!ESOy+n-{Iy!J+zMtMHWwM(ncnGyC*ed9P!)?)jUpTSk1 z8)yXQr9FZ}SOuqSIg$E9f8LnukHYh%o^kVn8&BeAySke{354QW{s&XH#we4RRe%5h N002ovPDHLkV1f!J{uBTJ diff --git a/examples/tuning/spark/spark_auto_deployment/benchmark.sh b/examples/tuning/spark/spark_auto_deployment/benchmark.sh deleted file mode 100644 index b43309e..0000000 --- a/examples/tuning/spark/spark_auto_deployment/benchmark.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -source ~/.bashrc - -# benchmark -cd HiBench -bin/workloads/micro/wordcount/prepare/prepare.sh -if [ $? -eq 0 ]; then - echo "------------ HiBench prepare success ------------" >>./hibench.log -else - echo "------------ HiBench prepare failed ------------" >>./hibench.log - exit -fi -bin/workloads/micro/wordcount/spark/run.sh -if [ $? -eq 0 ]; then - echo "------------ HiBench benchmark success ------------" >>./hibench.log -else - echo "------------ HiBench benchmark failed ------------" >>./hibench.log - exit -fi -cat report/hibench.report -cd .. \ No newline at end of file diff --git a/examples/tuning/spark/spark_auto_deployment/conf/core-site.xml b/examples/tuning/spark/spark_auto_deployment/conf/core-site.xml deleted file mode 100644 index 68ceb8e..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/core-site.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - fs.defaultFS - hdfs://localhost:9000 - - diff --git a/examples/tuning/spark/spark_auto_deployment/conf/hadoop.conf b/examples/tuning/spark/spark_auto_deployment/conf/hadoop.conf deleted file mode 100644 index 30da40c..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/hadoop.conf +++ /dev/null @@ -1,15 +0,0 @@ -# Hadoop home -hibench.hadoop.home /home/lionel/Tools/hadoop-3.2.4 - -# The path of hadoop executable -hibench.hadoop.executable ${hibench.hadoop.home}/bin/hadoop - -# Hadoop configraution directory -hibench.hadoop.configure.dir ${hibench.hadoop.home}/etc/hadoop - -# The root HDFS path to store HiBench data -hibench.hdfs.master hdfs://localhost:9000 - - -# Hadoop release provider. Supported value: apache -hibench.hadoop.release apache diff --git a/examples/tuning/spark/spark_auto_deployment/conf/hdfs-site.xml b/examples/tuning/spark/spark_auto_deployment/conf/hdfs-site.xml deleted file mode 100644 index 778f3ef..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/hdfs-site.xml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - dfs.replication - 1 - - diff --git a/examples/tuning/spark/spark_auto_deployment/conf/mapred-site.xml b/examples/tuning/spark/spark_auto_deployment/conf/mapred-site.xml deleted file mode 100644 index 2915041..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/mapred-site.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - - - - mapreduce.framework.name - yarn - - - mapreduce.application.classpath - $HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* - - diff --git a/examples/tuning/spark/spark_auto_deployment/conf/settings.xml b/examples/tuning/spark/spark_auto_deployment/conf/settings.xml deleted file mode 100644 index d256504..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/settings.xml +++ /dev/null @@ -1,277 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - aliyunmaven - * - 阿里云公共仓库 - https://maven.aliyun.com/repository/public - - - - - - - - - - - - - diff --git a/examples/tuning/spark/spark_auto_deployment/conf/spark.conf b/examples/tuning/spark/spark_auto_deployment/conf/spark.conf deleted file mode 100644 index 46ad2fb..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/spark.conf +++ /dev/null @@ -1,45 +0,0 @@ -# Spark home -hibench.spark.home /home/lionel/Tools/spark-3.1.3-bin-hadoop3.2 - -# Spark master -# standalone mode: spark://xxx:7077 -# YARN mode: yarn-client -hibench.spark.master spark://localhost:7077 - -# executor number and cores when running on Yarn -hibench.yarn.executor.num 2 -hibench.yarn.executor.cores 4 - -# executor and driver memory in standalone & YARN mode -spark.executor.memory 4g -spark.driver.memory 4g - -# set spark parallelism property according to hibench's parallelism value -spark.default.parallelism ${hibench.default.map.parallelism} - -# set spark sql's default shuffle partitions according to hibench's parallelism value -spark.sql.shuffle.partitions ${hibench.default.shuffle.parallelism} - -#====================================================== -# Spark Streaming -#====================================================== -# Spark streaming Batchnterval in millisecond (default 100) -hibench.streambench.spark.batchInterval 100 - -# Number of nodes that will receive kafka input (default: 4) -hibench.streambench.spark.receiverNumber 4 - -# Indicate RDD storage level. (default: 2) -# 0 = StorageLevel.MEMORY_ONLY -# 1 = StorageLevel.MEMORY_AND_DISK_SER -# other = StorageLevel.MEMORY_AND_DISK_SER_2 -hibench.streambench.spark.storageLevel 2 - -# indicate whether to test the write ahead log new feature (default: false) -hibench.streambench.spark.enableWAL false - -# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp) -hibench.streambench.spark.checkpointPath /var/tmp - -# whether to use direct approach or not (dafault: true) -hibench.streambench.spark.useDirectMode true diff --git a/examples/tuning/spark/spark_auto_deployment/conf/yarn-site.xml b/examples/tuning/spark/spark_auto_deployment/conf/yarn-site.xml deleted file mode 100644 index 6f39674..0000000 --- a/examples/tuning/spark/spark_auto_deployment/conf/yarn-site.xml +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - yarn.nodemanager.aux-services - mapreduce_shuffle - - - yarn.nodemanager.env-whitelist - JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME - - diff --git a/examples/tuning/spark/spark_auto_deployment_and_tuning/README.md b/examples/tuning/spark/spark_auto_deployment_and_tuning/README.md new file mode 100644 index 0000000..d31777a --- /dev/null +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/README.md @@ -0,0 +1,143 @@ +## Spark 自动化部署和HiBench基准性能测试 + +### **工作流程** + +一、**Spark 自动化部署流程** + +1. 安装 gcc make curl wget samba git +2. 关闭防火墙并启动nmbd.service +3. 配置本机免密登录 +4. 安装Java和配置Java 环境 +5. 安装Hadoop并配置Hadoop环境,格式化namenode并启动hdfs和yarn +7. 安装Spark并配置Spark环境,启动Spark master和worker守护进程 + +二、**HiBench 自动化部署流程** + +1. 安装python2 +2. 安装Maven并配置Maven环境变量和设置maven仓库国内镜像 +3. 为Spark Benchmark下载并编译、配置HiBench + +三、**执行 Benchmark** + +1. 准备工作 +2. 执行测试 + +## 开始操作 + +**前提条件** + +- 将本目录及所有文件和子目录放到您的主机所在的目录,用`ip addr`查看本机ip,`hostname`命令查看本机名称,然后在`/etc/hosts`中添加`ip hostname`,例如:`192.168.70.129 spark` + +- 关闭防火墙: + + ```bash + systemctl stop firewalld + ``` + +- 执行系统更新,并安装必要依赖 + + ```bash + dnf update -y + dnf install gcc make curl wget samba git atune atune-engine -y + ``` + +- 启动服务: + + ```bash + systemctl start nmb + systemctl start atuned + systemctl start atune-engine + ``` + + **PS**: atuned和atune-engine可能启动不成功,需要将`/etc/atuned/atuned.cnf`中的`rest_tls`和`engine_tls`置为false,并把`network`置为自己的网络;`/etc/atuned/engine.cnf`中的`engine_tls`置为false。 + +- 配置免密登录 + + ```bash + ssh-keygen -t rsa + cat ~/.ssh/id_rsa.pub >>~/.ssh/authorized_keys + ``` + + + +在自动化部署的过程中,需要下载大量的文件,如果遇到网络问题导致安装不成功,可以配置代理: + +```bash +# 为git配置代理执行: +git config --global http.proxy http://ip:port +git config --global https.proxy http://ip:port + +# 为系统设置代理,可以在~/.bashrc中添加如下: +export http_proxy=http://ip:port +export https_proxy=http://ip:port +# 使环境变量立即生效 +source ~/.bashrc + +#注意上面的ip和port替换为自己的代理地址 +``` + +### **Spark 自动化部署** + +切换到脚本所在的目录,执行`chmod u+x ./install_spark.sh`,为脚本添加执行权限,然后执行脚本`./install_spark.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Spark deployment success.`的字样,代表执行成功,执行`source ~/.bashrc`和`jps`命令,查看正在运行的守护进程为:`NameNode、NodeManager、SecondaryNameNode、ResourceManager、DataNode、Master、Worker` + +如果运行不成功,可以查看本目录的install_spark.log日志文件,可以看到哪一步未成功。 + +### HiBench 自动化部署 + +切换到脚本所在的目录,执行`chmod u+x ./install_hibench.sh`,为脚本添加执行权限,然后执行脚本`./install_hibench.sh`,期间可能需要输入管理员密码,等待片刻终端出现`Hibench init success`的字样,代表执行成功。 + +如果运行不成功,可以查看本目录的install_hibench.log日志文件,可以看到哪一步未成功。 + +### 执行基本Benchmark + +切换到脚本所在的目录: + +```bash +sh HiBench/bin/workloads/sql/join/prepare/prepare.sh +sh HiBench/bin/workloads/sql/join/spark/run.sh +# 结果 +cat HiBench/report/hibench.log +``` + +## A-tune HiBench性能调优 + +**实例机器具体参数** + +- 虚拟化:Vmware Workstation 17 +- 操作系统:Openeuler 22.03 SP1 +- CPU:AMD Ryzen 7 4800H with Radeon Graphics (虚拟机 2CPU 4Core) +- Memery:8G +- Disk:128G + +**Spark调优参数:** + +- num_executors:执行器数量 (2~4) +- executor_core:每个执行器核心数 (2~4) +- executor_memory:执行器内存 (1g~4g) +- driver_memory:driver内存 (1g-2g) +- default_parallelism:默认并行度 (10~50) +- storageLevel:rdd默认存储级别(0~2) +- shuffle_partition:shuffle分区个数(1~4) + +**HDFS数据规模调整为huge(参见HiBench调整测试数据规模)** + +### 开始测试 + +首先,生成测试数据:`sh HiBench/bin/workloads/sql/join/prepare/prepare.sh` + +将`spark_hibench_server.yaml`拷贝到`/etc/atuned/tuning`下: + +```bash +cp spark_hibench_server.yaml /etc/atuned/tuning +# 注意要修改spark_hibench_server.yaml 中设置的所有get和set的路径应该指向spark_hibench.sh的所在位置 +``` + +开始执行性能调优: + +```bash +atune-adm tuning --project spark_hibench --detail ./spark_hibench_client.yaml +``` + + + +**Notice:实例测试结果保存在本目录下atune_spark_bench.log文件** diff --git a/examples/tuning/spark/spark_auto_deployment_and_tuning/atune_spark_bench.log b/examples/tuning/spark/spark_auto_deployment_and_tuning/atune_spark_bench.log new file mode 100644 index 0000000..67c70f6 --- /dev/null +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/atune_spark_bench.log @@ -0,0 +1,211 @@ +[root@spark sparkbench]# atune-adm tuning --project spark_hibench --detail ./spark_hibench_client.yaml + Start to benchmark baseline... + 1.Loading its corresponding tuning project: spark_hibench + 2.Start to tuning the system...... + Current Tuning Progress......(1/50) + Used time: 1m38s, Total Time: 1m38s, Best Performance: (duration=48.58,throughput=39149753.00), Performance Improvement Rate: 0.06% + The 1th recommand parameters is: num_executors=3,executor_core=3,executor_memory=1,driver_memory=1,default_parallelism=13,storageLevel=0,shuffle_partition=2 + The 1th evaluation value: (duration=48.58,throughput=39149753.00)(0.06%) + Current Tuning Progress......(2/50) + Used time: 2m23s, Total Time: 2m23s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 2th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=33,storageLevel=0,shuffle_partition=4 + The 2th evaluation value: (duration=44.55,throughput=42695879.00)(9.11%) + Current Tuning Progress......(3/50) + Used time: 3m10s, Total Time: 3m10s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 3th recommand parameters is: num_executors=2,executor_core=3,executor_memory=2,driver_memory=2,default_parallelism=13,storageLevel=0,shuffle_partition=3 + The 3th evaluation value: (duration=47.06,throughput=40417640.00)(3.29%) + Current Tuning Progress......(4/50) + Used time: 3m56s, Total Time: 3m56s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 4th recommand parameters is: num_executors=4,executor_core=3,executor_memory=3,driver_memory=2,default_parallelism=38,storageLevel=0,shuffle_partition=1 + The 4th evaluation value: (duration=45.69,throughput=41629603.00)(6.39%) + Current Tuning Progress......(5/50) + Used time: 4m50s, Total Time: 4m50s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 5th recommand parameters is: num_executors=2,executor_core=4,executor_memory=1,driver_memory=1,default_parallelism=39,storageLevel=1,shuffle_partition=3 + The 5th evaluation value: (duration=53.73,throughput=35400039.00)(-10.53%) + Current Tuning Progress......(6/50) + Used time: 5m40s, Total Time: 5m40s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 6th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=35,storageLevel=1,shuffle_partition=3 + The 6th evaluation value: (duration=49.34,throughput=38548304.00)(-1.50%) + Current Tuning Progress......(7/50) + Used time: 6m27s, Total Time: 6m27s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 7th recommand parameters is: num_executors=3,executor_core=4,executor_memory=1,driver_memory=1,default_parallelism=38,storageLevel=0,shuffle_partition=2 + The 7th evaluation value: (duration=47.10,throughput=40383314.00)(3.21%) + Current Tuning Progress......(8/50) + Used time: 7m12s, Total Time: 7m12s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 8th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 8th evaluation value: (duration=45.06,throughput=42213541.00)(7.88%) + Current Tuning Progress......(9/50) + Used time: 8m0s, Total Time: 8m0s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 9th recommand parameters is: num_executors=3,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=12,storageLevel=0,shuffle_partition=3 + The 9th evaluation value: (duration=46.89,throughput=40560721.00)(3.67%) + Current Tuning Progress......(10/50) + Used time: 8m50s, Total Time: 8m50s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 10th recommand parameters is: num_executors=3,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=30,storageLevel=1,shuffle_partition=3 + The 10th evaluation value: (duration=50.58,throughput=37604012.00)(-4.05%) + Current Tuning Progress......(11/50) + Used time: 9m41s, Total Time: 9m41s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 11th recommand parameters is: num_executors=2,executor_core=4,executor_memory=4,driver_memory=1,default_parallelism=40,storageLevel=0,shuffle_partition=1 + The 11th evaluation value: (duration=49.54,throughput=38390354.00)(-1.92%) + Current Tuning Progress......(12/50) + Used time: 10m32s, Total Time: 10m32s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 12th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=33,storageLevel=0,shuffle_partition=4 + The 12th evaluation value: (duration=49.37,throughput=38528782.00)(-1.56%) + Current Tuning Progress......(13/50) + Used time: 11m23s, Total Time: 11m23s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 13th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=33,storageLevel=0,shuffle_partition=4 + The 13th evaluation value: (duration=50.74,throughput=37484693.00)(-4.38%) + Current Tuning Progress......(14/50) + Used time: 12m13s, Total Time: 12m13s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 14th recommand parameters is: num_executors=2,executor_core=2,executor_memory=4,driver_memory=1,default_parallelism=14,storageLevel=0,shuffle_partition=1 + The 14th evaluation value: (duration=48.31,throughput=39370993.00)(0.62%) + Current Tuning Progress......(15/50) + Used time: 13m5s, Total Time: 13m5s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 15th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=37,storageLevel=0,shuffle_partition=1 + The 15th evaluation value: (duration=51.56,throughput=36887828.00)(-6.07%) + Current Tuning Progress......(16/50) + Used time: 13m54s, Total Time: 13m54s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 16th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=2,default_parallelism=17,storageLevel=1,shuffle_partition=2 + The 16th evaluation value: (duration=48.44,throughput=39264519.00)(0.35%) + Current Tuning Progress......(17/50) + Used time: 14m44s, Total Time: 14m44s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 17th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=3 + The 17th evaluation value: (duration=47.74,throughput=39836907.00)(1.82%) + Current Tuning Progress......(18/50) + Used time: 15m32s, Total Time: 15m32s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 18th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=2,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 18th evaluation value: (duration=47.72,throughput=39861119.00)(1.87%) + Current Tuning Progress......(19/50) + Used time: 16m22s, Total Time: 16m22s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 19th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=17,storageLevel=0,shuffle_partition=1 + The 19th evaluation value: (duration=48.78,throughput=38990843.00)(-0.35%) + Current Tuning Progress......(20/50) + Used time: 17m11s, Total Time: 17m11s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 20th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=4 + The 20th evaluation value: (duration=48.30,throughput=39382406.00)(0.64%) + Current Tuning Progress......(21/50) + Used time: 17m58s, Total Time: 17m58s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 21th recommand parameters is: num_executors=2,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 21th evaluation value: (duration=46.49,throughput=40913210.00)(4.56%) + Current Tuning Progress......(22/50) + Used time: 18m49s, Total Time: 18m49s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 22th recommand parameters is: num_executors=4,executor_core=3,executor_memory=3,driver_memory=2,default_parallelism=38,storageLevel=0,shuffle_partition=1 + The 22th evaluation value: (duration=49.63,throughput=38324601.00)(-2.10%) + Current Tuning Progress......(23/50) + Used time: 19m34s, Total Time: 19m34s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 23th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2 + The 23th evaluation value: (duration=44.78,throughput=42473723.00)(8.55%) + Current Tuning Progress......(24/50) + Used time: 20m21s, Total Time: 20m21s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 24th recommand parameters is: num_executors=2,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2 + The 24th evaluation value: (duration=45.56,throughput=41747477.00)(6.69%) + Current Tuning Progress......(25/50) + Used time: 21m8s, Total Time: 21m8s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 25th recommand parameters is: num_executors=4,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2 + The 25th evaluation value: (duration=46.73,throughput=40697850.00)(4.02%) + Current Tuning Progress......(26/50) + Used time: 21m54s, Total Time: 21m54s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 26th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 26th evaluation value: (duration=44.84,throughput=42415943.00)(8.41%) + Current Tuning Progress......(27/50) + Used time: 22m40s, Total Time: 22m40s, Best Performance: (duration=44.55,throughput=42695879.00), Performance Improvement Rate: 9.11% + The 27th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2 + The 27th evaluation value: (duration=44.76,throughput=42496499.00)(8.60%) + Current Tuning Progress......(28/50) + Used time: 23m25s, Total Time: 23m25s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 28th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3 + The 28th evaluation value: (duration=44.38,throughput=42861370.00)(9.53%) + Current Tuning Progress......(29/50) + Used time: 24m11s, Total Time: 24m11s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 29th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3 + The 29th evaluation value: (duration=45.40,throughput=41891840.00)(7.07%) + Current Tuning Progress......(30/50) + Used time: 25m8s, Total Time: 25m8s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 30th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2 + The 30th evaluation value: (duration=56.03,throughput=33942595.00)(-15.27%) + Current Tuning Progress......(31/50) + Used time: 26m2s, Total Time: 26m2s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 31th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=13,storageLevel=0,shuffle_partition=1 + The 31th evaluation value: (duration=53.17,throughput=35774241.00)(-9.38%) + Current Tuning Progress......(32/50) + Used time: 26m56s, Total Time: 26m56s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 32th recommand parameters is: num_executors=2,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=2 + The 32th evaluation value: (duration=52.67,throughput=36111132.00)(-8.35%) + Current Tuning Progress......(33/50) + Used time: 27m49s, Total Time: 27m49s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 33th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3 + The 33th evaluation value: (duration=50.66,throughput=37540182.00)(-4.22%) + Current Tuning Progress......(34/50) + Used time: 28m39s, Total Time: 28m39s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 34th recommand parameters is: num_executors=3,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=3 + The 34th evaluation value: (duration=49.44,throughput=38474225.00)(-1.71%) + Current Tuning Progress......(35/50) + Used time: 29m26s, Total Time: 29m26s, Best Performance: (duration=44.38,throughput=42861370.00), Performance Improvement Rate: 9.53% + The 35th recommand parameters is: num_executors=3,executor_core=2,executor_memory=3,driver_memory=1,default_parallelism=12,storageLevel=1,shuffle_partition=3 + The 35th evaluation value: (duration=45.28,throughput=42003783.00)(7.35%) + Current Tuning Progress......(36/50) + Used time: 30m8s, Total Time: 30m8s, Best Performance: (duration=41.91,throughput=45385575.00), Performance Improvement Rate: 15.99% + The 36th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 36th evaluation value: (duration=41.91,throughput=45385575.00)(15.99%) + Current Tuning Progress......(37/50) + Used time: 30m51s, Total Time: 30m51s, Best Performance: (duration=41.81,throughput=45494135.00), Performance Improvement Rate: 16.26% + The 37th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 37th evaluation value: (duration=41.81,throughput=45494135.00)(16.26%) + Current Tuning Progress......(38/50) + Used time: 31m33s, Total Time: 31m33s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56% + The 38th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=1 + The 38th evaluation value: (duration=41.35,throughput=45999161.00)(17.56%) + Current Tuning Progress......(39/50) + Used time: 32m15s, Total Time: 32m15s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56% + The 39th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=19,storageLevel=0,shuffle_partition=1 + The 39th evaluation value: (duration=41.48,throughput=45858308.00)(17.19%) + Current Tuning Progress......(40/50) + Used time: 32m58s, Total Time: 32m58s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56% + The 40th recommand parameters is: num_executors=2,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=40,storageLevel=0,shuffle_partition=1 + The 40th evaluation value: (duration=41.81,throughput=45486519.00)(16.26%) + Current Tuning Progress......(41/50) + Used time: 33m41s, Total Time: 33m41s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56% + The 41th recommand parameters is: num_executors=4,executor_core=4,executor_memory=4,driver_memory=2,default_parallelism=13,storageLevel=1,shuffle_partition=4 + The 41th evaluation value: (duration=41.36,throughput=45983592.00)(17.53%) + Current Tuning Progress......(42/50) + Used time: 34m23s, Total Time: 34m23s, Best Performance: (duration=41.35,throughput=45999161.00), Performance Improvement Rate: 17.56% + The 42th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=18,storageLevel=0,shuffle_partition=3 + The 42th evaluation value: (duration=41.52,throughput=45808606.00)(17.08%) + Current Tuning Progress......(43/50) + Used time: 35m5s, Total Time: 35m5s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 43th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=20,storageLevel=0,shuffle_partition=2 + The 43th evaluation value: (duration=41.33,throughput=46014741.00)(17.61%) + Current Tuning Progress......(44/50) + Used time: 35m48s, Total Time: 35m48s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 44th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=30,storageLevel=0,shuffle_partition=2 + The 44th evaluation value: (duration=41.53,throughput=45800884.00)(17.05%) + Current Tuning Progress......(45/50) + Used time: 36m30s, Total Time: 36m30s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 45th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=2,default_parallelism=14,storageLevel=1,shuffle_partition=4 + The 45th evaluation value: (duration=41.44,throughput=45902578.00)(17.30%) + Current Tuning Progress......(46/50) + Used time: 37m13s, Total Time: 37m13s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 46th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=20,storageLevel=1,shuffle_partition=2 + The 46th evaluation value: (duration=41.77,throughput=45533344.00)(16.37%) + Current Tuning Progress......(47/50) + Used time: 37m57s, Total Time: 37m57s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 47th recommand parameters is: num_executors=4,executor_core=3,executor_memory=1,driver_memory=2,default_parallelism=39,storageLevel=1,shuffle_partition=2 + The 47th evaluation value: (duration=43.16,throughput=44067964.00)(12.63%) + Current Tuning Progress......(48/50) + Used time: 38m42s, Total Time: 38m42s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 48th recommand parameters is: num_executors=3,executor_core=3,executor_memory=3,driver_memory=1,default_parallelism=16,storageLevel=0,shuffle_partition=3 + The 48th evaluation value: (duration=43.91,throughput=43312306.00)(10.70%) + Current Tuning Progress......(49/50) + Used time: 39m31s, Total Time: 39m31s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 49th recommand parameters is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=19,storageLevel=1,shuffle_partition=2 + The 49th evaluation value: (duration=47.78,throughput=39807725.00)(1.74%) + Current Tuning Progress......(50/50) + Used time: 40m16s, Total Time: 40m16s, Best Performance: (duration=41.33,throughput=46014741.00), Performance Improvement Rate: 17.61% + The 50th recommand parameters is: num_executors=4,executor_core=4,executor_memory=2,driver_memory=1,default_parallelism=17,storageLevel=1,shuffle_partition=1 + The 50th evaluation value: (duration=43.84,throughput=43388387.00)(10.88%) + + The final optimization result is: num_executors=3,executor_core=3,executor_memory=4,driver_memory=1,default_parallelism=20,storageLevel=0,shuffle_partition=2 + The final evaluation value is: duration=41.33,throughput=46014741.00 + + Baseline Performance is: (duration=48.61,throughput=39128812.00) + + Tuning Finished \ No newline at end of file diff --git a/examples/tuning/spark/spark_auto_deployment/install_hibench.sh b/examples/tuning/spark/spark_auto_deployment_and_tuning/install_hibench.sh similarity index 77% rename from examples/tuning/spark/spark_auto_deployment/install_hibench.sh rename to examples/tuning/spark/spark_auto_deployment_and_tuning/install_hibench.sh index af2e15c..9fdefd5 100644 --- a/examples/tuning/spark/spark_auto_deployment/install_hibench.sh +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/install_hibench.sh @@ -12,6 +12,7 @@ if ! command -v python2 &>/dev/null; then exit fi tar -xf ./Python-2.7.18.tgz + rm -f ./Python-2.7.18.tgz # install python-2.7 echo "installing python-2.7..." td=$(pwd) @@ -19,7 +20,7 @@ if ! command -v python2 &>/dev/null; then ./configure --prefix=$td/python-2.7 make make install - sudo ln -s $td/python-2.7/bin/python2.7 /usr/bin/python2 + ln -s $td/python-2.7/bin/python2.7 /usr/bin/python2 cd .. fi @@ -33,6 +34,7 @@ else exit fi tar -xf ./apache-maven-3.8.8-bin.tar.gz +rm -f ./apache-maven-3.8.8-bin.tar.gz export MAVEN_HOME=$(pwd)/apache-maven-3.8.8 if ! grep -q "export MAVEN_HOME=$(pwd)/apache-maven-3.8.8" ~/.bashrc; then echo "export MAVEN_HOME=$(pwd)/apache-maven-3.8.8" >>~/.bashrc @@ -42,7 +44,20 @@ source ~/.bashrc # maven aliyun mirror mkdir ~/.m2 -cp ./conf/settings.xml ~/.m2 +cat >~/.m2/settings.xml < + + + + aliyunmaven + * + 阿里云公共仓库 + https://maven.aliyun.com/repository/public + + + +EOF # install HiBench echo "installing HiBench..." @@ -72,6 +87,8 @@ cp conf/hadoop.conf.template conf/hadoop.conf sed -i "2c hibench.hadoop.home $HADOOP_HOME" conf/hadoop.conf sed -i "11c hibench.hdfs.master hdfs://localhost:9000" conf/hadoop.conf +sed -i "s|hibench.scale.profile.*|hibench.scale.profile\thuge|g" conf/hibench.conf + cp conf/spark.conf.template conf/spark.conf sed -i "2c hibench.spark.home $SPARK_HOME" conf/spark.conf sed -i "7c hibench.spark.master spark://localhost:7077" conf/spark.conf diff --git a/examples/tuning/spark/spark_auto_deployment/install_spark.sh b/examples/tuning/spark/spark_auto_deployment_and_tuning/install_spark.sh similarity index 54% rename from examples/tuning/spark/spark_auto_deployment/install_spark.sh rename to examples/tuning/spark/spark_auto_deployment_and_tuning/install_spark.sh index 73c8392..12542dc 100644 --- a/examples/tuning/spark/spark_auto_deployment/install_spark.sh +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/install_spark.sh @@ -1,29 +1,9 @@ #!/bin/bash -# install dependencies -echo "install dependencies..." -sudo dnf install gcc make curl wget samba git -y -if [ $? -eq 0 ]; then - echo "------------ dependencies install success ------------" >>./install_spark.log -else - echo "------------ dependencies install failed ------------" >>./install_spark.log - exit -fi - -# stop firewalld -sudo systemctl disable --now firewalld - -# start samba -sudo systemctl enable --now nmbd - -### ssh password-free login -ssh-keygen -t rsa -cat ~/.ssh/id_rsa.pub >>~/.ssh/authorized_keys - # download and install software # JDK 1.8 echo "downloading jdk..." -wget https://mirrors.tuna.tsinghua.edu.cn/Adoptium/8/jdk/x64/linux/OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz +wget https://mirrors.ustc.edu.cn/adoptium/releases/temurin8-binaries/jdk8u372-b07/OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz if [ $? -eq 0 ]; then echo "------------ jdk-1.8 download success ------------" >>./install_spark.log else @@ -32,6 +12,7 @@ else fi # install jdk tar -xf ./OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz +rm -f ./OpenJDK8U-jdk_x64_linux_hotspot_8u372b07.tar.gz export JAVA_HOME=$(pwd)/jdk8u372-b07 if ! grep -q "export JAVA_HOME=$(pwd)/jdk8u372-b07" ~/.bashrc; then echo "export JAVA_HOME=$(pwd)/jdk8u372-b07" >>~/.bashrc @@ -41,7 +22,7 @@ source ~/.bashrc ## Hadoop echo "downloading hadoop..." -wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/core/hadoop-3.2.4/hadoop-3.2.4.tar.gz +wget https://mirrors.ustc.edu.cn/apache/hadoop/core/hadoop-3.2.4/hadoop-3.2.4.tar.gz if [ $? -eq 0 ]; then echo "------------ hadoop-3.2 download success ------------" >>./install_spark.log else @@ -50,12 +31,102 @@ else fi # install hadoop tar -xf ./hadoop-3.2.4.tar.gz +rm -f ./hadoop-3.2.4.tar.gz export HADOOP_HOME=$(pwd)/hadoop-3.2.4 if ! grep -q "export HADOOP_HOME=$(pwd)/hadoop-3.2.4" ~/.bashrc; then echo "export HADOOP_HOME=$(pwd)/hadoop-3.2.4" >>~/.bashrc echo "export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin" >>~/.bashrc fi -cp ./conf/core-site.xml ./conf/hdfs-site.xml ./conf/mapred-site.xml ./conf/yarn-site.xml hadoop-3.2.4/etc/hadoop/ +cat >hadoop-3.2.4/etc/hadoop/core-site.xml < + + + + fs.defaultFS + hdfs://localhost:9000 + + + hadoop.tmp.dir + $(pwd)/tmp + + +EOF + +cat >hadoop-3.2.4/etc/hadoop/hdfs-site.xml < + + + + dfs.replication + 1 + + + dfs.safemode.threshold.pct + 0 + + Specifies the percentage of blocks that should satisfy + the minimal replication requirement defined by dfs.replication.min. + Values less than or equal to 0 mean not to wait for any particular + percentage of blocks before exiting safemode. + Values greater than 1 will make safe mode permanent. + + + +EOF + +cat >hadoop-3.2.4/etc/hadoop/mapred-site.xml < + + + + mapreduce.framework.name + yarn + + + mapreduce.application.classpath + \$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:\$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/* + + +EOF + +cat >hadoop-3.2.4/etc/hadoop/yarn-site.xml < + + + yarn.nodemanager.aux-services + mapreduce_shuffle + + + yarn.nodemanager.env-whitelist + JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME + + + yarn.nodemanager.pmem-check-enabled + false + + + yarn.nodemanager.vmem-check-enabled + false + + + +EOF + +cat >>hadoop-3.2.4/etc/hadoop/hadoop-env.sh <>hadoop-3.2.4/etc/hadoop/yarn-env.sh <>hadoop-3.2.4/etc/hadoop/mapred-env.sh <>~/.bashrc - echo "export PATH=\$PATH:$\SPARK_HOME/bin:$\SPARK_HOME/sbin" >>~/.bashrc + echo "export PATH=\$PATH:\$SPARK_HOME/bin:\$SPARK_HOME/sbin" >>~/.bashrc fi cp spark-3.1.3-bin-hadoop3.2/conf/spark-env.sh.template spark-3.1.3-bin-hadoop3.2/conf/spark-env.sh diff --git a/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench.sh b/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench.sh new file mode 100644 index 0000000..e89b356 --- /dev/null +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench.sh @@ -0,0 +1,17 @@ +num_executors=2 +executor_core=2 +executor_memory=2g +driver_memory=2g +default_parallelism=50 +storageLevel=1 +shuffle_partitions=4 + +sed "s/^hibench.yarn.executor.num.*/hibench.yarn.executor.num\t$num_executors/" -i HiBench/conf/spark.conf +sed "s/^hibench.yarn.executor.cores.*/hibench.yarn.executor.cores\t$executor_core/" -i HiBench/conf/spark.conf +sed "s/^spark.executor.memory.*/spark.executor.memory\t$executor_memory/" -i HiBench/conf/spark.conf +sed "s/^spark.driver.memory.*/spark.driver.memory\t$driver_memory/" -i HiBench/conf/spark.conf +sed "s/^spark.default.parallelism.*/spark.default.parallelism\t$default_parallelism/" -i HiBench/conf/spark.conf +sed "s/^hibench.streambench.spark.storageLevel.*/hibench.streambench.spark.storageLevel\t$storageLevel/" -i HiBench/conf/spark.conf +sed "s/^spark.sql.shuffle.partitions.*/spark.sql.shuffle.partitions\t$shuffle_partitions/" -i HiBench/conf/spark.conf + +sh HiBench/bin/workloads/sql/join/spark/run.sh diff --git a/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_client.yaml b/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_client.yaml new file mode 100644 index 0000000..43bbca3 --- /dev/null +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_client.yaml @@ -0,0 +1,19 @@ +project: "spark_hibench" +engine : "bayes" +iterations : 50 +random_starts : 10 + +benchmark : sh spark_hibench.sh +evaluations : + - + name: "duration" + info: + get: "awk '{print $5}' HiBench/report/hibench.report | tail -n 1" + type: "positive" + weight: 80 + - + name: "throughput" + info: + get: "awk '{print $6}' HiBench/report/hibench.report | tail -n 1" + type: "negative" + weight: 20 \ No newline at end of file diff --git a/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_server.yaml b/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_server.yaml new file mode 100644 index 0000000..1853540 --- /dev/null +++ b/examples/tuning/spark/spark_auto_deployment_and_tuning/spark_hibench_server.yaml @@ -0,0 +1,96 @@ +project: "spark_hibench" +maxiterations: 50 +startworkload: "" +stopworkload: "" +object: + - name: "num_executors" + info: + desc: "num_executors" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^num_executors=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^num_executors=.*/num_executors=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "discrete" + scope: + - 2 + - 4 + dtype: "int" + + - name: "executor_core" + info: + desc: "executor_core" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^executor_core=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^executor_core=.*/executor_core=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "discrete" + scope: + - 2 + - 4 + dtype: "int" + + - name: "executor_memory" + info: + desc: "executor_memory" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^executor_memory=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^executor_memory=[0-9]/executor_memory=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "discrete" + scope: + - 1 + - 2 + - 3 + - 4 + dtype: "int" + + - name: "driver_memory" + info: + desc: "driver_memory" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^driver_memory=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^driver_memory=[0-9]/driver_memory=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "continuous" + scope: + - 1 + - 2 + dtype: "int" + + - name: "default_parallelism" + info: + desc: "default_parallelism" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^default_parallelism=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^default_parallelism=.*/default_parallelism=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "discrete" + scope: + - 10 + - 20 + - 30 + - 40 + - 50 + dtype: "int" + + - name: "storageLevel" + info: + desc: "storageLevel" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^storageLevel=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^storageLevel=.*/storageLevel=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "discrete" + scope: + - 0 + - 1 + - 2 + dtype: "int" + + - name: "shuffle_partition" + info: + desc: "shuffle_partition" + get: "cat /root/sparkbench/spark_hibench.sh | grep -i '^shuffle_partitions=' | awk -F '=' '{print $2}'" + set: 'sed -i "s/^shuffle_partitions=.*/shuffle_partitions=$value/g" /root/sparkbench/spark_hibench.sh' + needrestart: "false" + type: "discrete" + scope: + - 1 + - 2 + - 3 + - 4 + dtype: "int" \ No newline at end of file