Connect To Hive With Spark 1 5

My class

import java.util.*;
import org.apache.spark.SparkContext;
import org.apache.spark.SparkConf;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.hive.HiveContext;
import org.datanucleus.api.jdo.JDOPersistenceManagerFactory;
import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
import org.datanucleus.exceptions.NucleusException;

public class RequeteKindiClass{
        public static void main(String[] args) {
                String request = args[0];
                SparkConf conf = new SparkConf().setAppName("BenchHawk");
                SparkContext sc = new SparkContext(conf);
                HiveContext hiveContext = new org.apache.spark.sql.hive.HiveContext(sc);
                DataFrame df = hiveContext.sql(request);

Compile it

javac -cp "/usr/hdp_mount/hdp/*:/usr/hdp_mount/hdp/*"  ~/

Package it

jar cf BenchmarkHawk.jar ./RequeteKindiClass.class

Run it

  • Tip 1: If your spark-submit needs some additional jars, you may add some missing jars to your spark submit using –jars option
  • Tip 2: You must copy your hive-site.xml to all your workers int /etc/spark/conf
spark-submit \
--jars /usr/hdp_mount/hdp/,/usr/hdp_mount/hdp/,/usr/hdp_mount/hdp/ \
--class RequeteKindiClass \
--master yarn \
--deploy-mode cluster \
--files /etc/spark/conf/hive-site.xml \
--queue q_datalab \
BenchmarkHawk.jar \
"show databases"

comments powered by Disqus