我在本地环境中运行 spark scala 作业并尝试从 GCS 存储桶读取文件但面临授权问题。
"code" : 403,
"errors" : [ {
"domain" : "global",
"location" : "Authorization",
"locationType" : "header",
"message" : "The project to be billed is associated with an absent billing account.",
"reason" : "accountDisabled"
我已经从我的 IntelliJ 终端验证并设置了项目
gcloud config set account [email protected]
gcloud config set project my-test-project
我的身份 '[电子邮件保护]'storageadmin
在 IAM 页面上具有角色访问权限。使用同一终端,我可以列出存储桶信息
gsutil ls -l gs://my-test-bucket/spark-job-configs
这是我的代码:
package com.vikrant.test
import com.google.cloud.storage.{BlobId, StorageOptions}
import org.apache.spark.sql.SparkSession
object ReadBucketFileData {
def main(args: Array[String]) {
val spark = SparkSession.builder()
.appName("spark-bigquery-demo")
.config("spark.master", "local")
.getOrCreate()
val storage = StorageOptions.getDefaultInstance.getService
val my_blob = storage.get(BlobId.of("bucket_name", "gs://my-test-bucket/spark-job-configs/my_test.yml"))
println("my_blob:"+my_blob)
spark.stop()
}
}
这是我的build.sbt:
name := "MyTestProject"
version := "0.1"
scalaVersion := "2.11.12"
libraryDependencies ++= {
val sparkVersion = "2.4.3"
Seq( "org.apache.spark" %% "spark-core" % sparkVersion)
Seq( "org.apache.spark" %% "spark-sql" % sparkVersion)
}
libraryDependencies += "com.google.cloud.spark" %% "spark-bigquery-with-dependencies" % "0.16.1"
libraryDependencies += "net.jcazevedo" %% "moultingyaml" % "0.4.2"
libraryDependencies += "net.liftweb" %% "lift-json" % "3.4.1"
libraryDependencies += "com.google.cloud" % "google-cloud-storage" % "1.23.0"