下载spark-1.6.1-bin-hadoop2.6.tgz
解压
配置
mv spark-env.sh.template spark-env.shvi spark-env.sh在该配置文件中添加如下配置export JAVA_HOME=/usr/java/jdk1.7.0_45export SPARK_MASTER_IP=mini1export SPARK_MASTER_PORT=7077保存退出重命名并修改slaves.template文件mv slaves.template slavesvi slaves在该文件中添加子节点所在的位置(Worker节点)mini2mini3
启动
sbin/start-all.sh
bin/spark-shell 启动单机版的spark-shell,不会再浏览器中看到他的信息
//启动集群的sparkshell
bin/spark-shell --master spark://mini1:7077 --executor-memory 512m --total-executor-cores 1
--master spark://mini1:7077 指定Master的地址
--executor-memory 2g 指定每个worker可用内存为2G
--total-executor-cores 2 指定整个集群使用的cup核数为2个
wc
bin/spark-submit --class org.apache.spark.examples.SparkPi --master spark://mini1:7077 --total-executor-cores 1 --executor-memory 612m lib/spark-examples-1.6.1-hadoop2.6.0.jar 50
sc.textFile("hdfs://mini1:9000/wc/sparkInput").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_,1).sortBy(_._2,false).saveAsTextFile("hdfs://mini1:9000/wc/sparkOutput2/")
object WordCount { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("WC") val sc = new SparkContext(conf) sc.textFile(args(0)).flatMap(_.split(" ")). map((_,1)).reduceByKey(_+_).sortBy(_._2,false).saveAsTextFile(args(1))// sc.textFile("hdfs://mini1:9000/wc/sparkInput").flatMap(_.split(" ")) // .map((_,1)).reduceByKey(_+_,1).sortBy(_._2,false).saveAsTextFile("hdfs://mini1:9000/wc/sparkOutput2/") sc.stop() }}
pom.xml
4.0.0 cn.my.spark helloSpark 2.0 1.8 1.8 UTF-8 2.10.6 1.6.1 2.6.4 org.scala-lang scala-library ${scala.version} org.apache.spark spark-core_2.10 ${spark.version} org.apache.hadoop hadoop-client ${hadoop.version} src/main/scala src/test/scala net.alchim31.maven scala-maven-plugin 3.2.2 compile testCompile -make:transitive -dependencyfile ${project.build.directory}/.scala_dependencies org.apache.maven.plugins maven-shade-plugin 2.4.3 package shade *:* META-INF/*.SF META-INF/*.DSA META-INF/*.RSA