
AI Shift コンペティションを開催
1 2 3 |
git clone https://github.com/apache/incubator-zeppelin.git cd ~/incubator-zeppelin mvn clean package -Pspark-1.6 -DskipTests |
1 |
./bin/zeppelin-daemon.sh start |
1 |
com.databricks:spark-csv_2.10:1.4.0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
%spark import org.apache.spark.sql.types.{ StructType, StructField, StringType, FloatType } import org.apache.spark.sql.Row // スキーマ情報 val customSchema = StructType(Seq( StructField("sepalLength", FloatType, true), StructField("sepalWidth", FloatType, true), StructField("petalLength", FloatType, true), StructField("petalWidth", FloatType, true), StructField("species", StringType, true) )) // データフレームで取得 val df = sqlContext.read .format("com.databricks.spark.csv") .option("header", "true") .schema(customSchema) .load("/tmp/iris.csv") // SparkSQL用にテーブルを作成 df.registerTempTable("iris") // データを抽出してみる val dfIris = sqlContext.sql("SELECT * FROM iris") dfIris.show() |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
customSchema: org.apache.spark.sql.types.StructType = StructType(StructField(sepalLength,FloatType,true), StructField(sepalWidth,FloatType,true), StructField(petalLength,FloatType,true), StructField(petalWidth,FloatType,true), StructField(species,StringType,true)) df: org.apache.spark.sql.DataFrame = [sepalLength: float, sepalWidth: float, petalLength: float, petalWidth: float, species: string] dfIris: org.apache.spark.sql.DataFrame = [sepalLength: float, sepalWidth: float, petalLength: float, petalWidth: float, species: string] +-----------+----------+-----------+----------+-------+ |sepalLength|sepalWidth|petalLength|petalWidth|species| +-----------+----------+-----------+----------+-------+ | 5.1| 3.5| 1.4| 0.2| setosa| | 4.9| 3.0| 1.4| 0.2| setosa| | 4.7| 3.2| 1.3| 0.2| setosa| | 4.6| 3.1| 1.5| 0.2| setosa| | 5.0| 3.6| 1.4| 0.2| setosa| ・ ・ ・ |
1 2 |
%spark.sql SELECT * FROM iris |
1 2 |
%spark.sql SELECT * FROM iris WHERE ("${sepalLengthMin}" <= sepalLength) AND (sepalLength < "${sepalLengthMax}") |
1 2 |
%spark.sql SELECT * FROM iris WHERE species = "${species=setosa,setosa|versicolor|virginica}" |
Author