Hadoop GCP with Hive
GCP Hadoop Hive Tutorials

export REGION=us-central1 export ZONE=us-central1-a gcloud config set compute/zone $ZONE

gcloud services enable dataproc.googleapis.com sqladmin.googleapis.com

export PROJECT=$(gcloud info --format='value(config.project)') gsutil mb -l $REGION gs://$PROJECT-warehouse

gcloud sql instances create hive-metastore \ --database-version="MYSQL_5_7" \ --activation-policy=ALWAYS \ --gce-zone $ZONE

gcloud dataproc clusters create hive-cluster \ --scopes sql-admin \ --image-version 1.3 \ --initialization-actions gs://dataproc-initialization-actions/cloud-sql-proxy/cloud-sql-proxy.sh \ --properties hive:hive.metastore.warehouse.dir=gs://$PROJECT-warehouse/datasets \ --metadata "hive-metastore-instance=$PROJECT:$REGION:hive-metastore"

gsutil cp gs://hive-solution/part-00000.parquet \ gs://$PROJECT-warehouse/datasets/transactions/part-00000.parquet

gcloud dataproc jobs submit hive \ --cluster hive-cluster \ --execute " CREATE EXTERNAL TABLE transactions (SubmissionDate DATE, TransactionAmount DOUBLE, TransactionType STRING) STORED AS PARQUET LOCATION 'gs://$PROJECT-warehouse/datasets/transactions';"

gcloud dataproc jobs submit hive \ --cluster hive-cluster \ --execute " SELECT * FROM transactions LIMIT 10;"

