`
distantlight1
  • 浏览: 43548 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

spark源码梳理(1)-Action提交2

 
阅读更多

续上篇

 

Step 5-TaskSchedulerImpl

override def submitTasks(taskSet: TaskSet) {

  val tasks = taskSet.tasks

  logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")

  this.synchronized {

    // 创建TaskSetManager

    val manager = createTaskSetManager(taskSet, maxTaskFailures)

    val stage = taskSet.stageId

 

    // TaskSetManager注册到Stage的TaskSet列表,taskSetsByStageIdAndAttempt是根据StageId和尝试次数标记的TaskSetManager的HashMap

    val stageTaskSets = taskSetsByStageIdAndAttempt.getOrElseUpdate(stage, new HashMap[Int, TaskSetManager])

    stageTaskSets(taskSet.stageAttemptId) = manager

 

    // 如果有冲突的TaskSet,则抛异常

    val conflictingTaskSet = stageTaskSets.exists { case (_, ts) => ts.taskSet != taskSet && !ts.isZombie }

    if (conflictingTaskSet) {

      throw new IllegalStateException(s"more than one active taskSet for stage $stage:" + s" ${stageTaskSets.toSeq.map{_._2.taskSet.id}.mkString(",")}")

    }

 

    /* 申请任务调度,有FIFO和FAIR两种策略。根据executor的空闲资源状态及locality策略将task分配给executor。调度的数据结构封装为Pool类,对于FIFO,Pool就是TaskSetManager的队列;对于Fair,则是TaskSetManager组成的树。Pool维护TaskSet的优先级,等待executor接受资源offer(resourceOffer)的时候出列并提交Executor计算。这一步实现参见《内幕》4.3.3节*/

    schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)

 

    // 这个Timer只是为了在一定时间内未获得调度打报警日志,不影响计算执行

    if (!isLocal && !hasReceivedTask) {

      starvationTimer.scheduleAtFixedRate(new TimerTask() {

        override def run() {

          if (!hasLaunchedTask) {

            logWarning("Initial job has not accepted any resources; " + "check your cluster UI to ensure that workers are registered " + "and have sufficient resources")

          } else {

            this.cancel()

          }

        }

      }, STARVATION_TIMEOUT_MS, STARVATION_TIMEOUT_MS)

    }

    hasReceivedTask = true

  }

 

  // 发送ReviveOffers extends CoarseGrainedClusterMessage消息给SchedulerBackend,不同资源调度模式下有不同实现。通信有Akka/Netty两种实现

  // Standalone模式下是CoarseGrainedSchedulerBackend,触发Executor.launchTask,消息被Driver端的DriverEndPoint接收到

  backend.reviveOffers()

}

 

Step 6-CoarseGrainedSchedulerBackend

 

override def reviveOffers() {

  driverEndpoint.send(ReviveOffers)

}

 

AkkaRpcEnv

override def send(message: Any): Unit = {

  actorRef ! AkkaMessage(message, false)

}

 

CoarseGrainedSchedulerBackend.DriverEndPoint

override def receive: PartialFunction[Any, Unit] = {

  case ReviveOffers =>// 接收请求并开始分发

    makeOffers()

    ……// 其他消息

}

 

//向executor发送一个fake offer,所谓fake指Standalone模式下spark driver自己充当资源调度器。yarn/mesos模式中,调用的是其他SchedulerBackend实现类

private def makeOffers() {

  // 过滤Alive的Executor。ExecutorData封装了Executor的属性和状态信息

  val activeExecutors = executorDataMap.filterKeys(executorIsAlive)

  val workOffers = activeExecutors.map { case (id, executorData) =>

    // 封装offer,WorkerOffer就是一个executorId,host和freeCores的pojo

    new WorkerOffer(id, executorData.executorHost, executorData.freeCores)

  }.toSeq

  launchTasks(scheduler.resourceOffers(workOffers))

}

 

TaskSchedulerImpl

// 集群manager向slave发送资源offer时调用,从任务集合中按优先级取出任务匹配offer. 对每个节点通过round-robin方式分发任务,以实现任务在集群上分散

def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {

  // 所有提供offer的Executor标记位Alive,如果有新的Executor就注册该Executor  var newExecAvail = false

  for (o <- offers) {

    // o.host就是ExecutorData.host,这里缓存一下

    executorIdToHost(o.executorId) = o.host

    // executor上task数量的计数器

    executorIdToTaskCount.getOrElseUpdate(o.executorId, 0)

    if (!executorsByHost.contains(o.host)) {

      // 如果是新的ExecutorId,就注册一下新的Executor

      executorsByHost(o.host) = new HashSet[String]()

      executorAdded(o.executorId, o.host)

      newExecAvail = true

    }

    // rack是机架,Standalone模式下没有真实的机架信息,rack都是FakeRackUtil这个类mock出来的

    for (rack <- getRackForHost(o.host)) {

      hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += o.host

    }

  }

  // 随机打乱offer

  val shuffledOffers = Random.shuffle(offers)

  // 每个offer(executor)创建一个任务列表,TaskDescription包括任务id,TaskSet的Id,重试次数等

  val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores))

 

  val availableCpus = shuffledOffers.map(o => o.cores).toArray

 

  // 按照优先级策略排序TaskSet

  val sortedTaskSets = rootPool.getSortedTaskSetQueue

  for (taskSet <- sortedTaskSets) {

    logDebug("parentName: %s, name: %s, runningTasks: %s".format(taskSet.parent.name, taskSet.name, taskSet.runningTasks))

    if (newExecAvail) {

      // 如果有新Executor,则重新计算就近原则,具体实现在TaskSetManager. recomputeLocality

      taskSet.executorAdded()

    }

  }

 

  // 根据设置的就近策略和Task优先级逐个分发任务,分发就是扔tasks这个map里去

  // 默认的就近策略从高优到低优依次为PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY

  var launchedTask = false

  for (taskSet <- sortedTaskSets; maxLocality <- taskSet.myLocalityLevels) {

    do {

      launchedTask = resourceOfferSingleTaskSet(taskSet, maxLocality, shuffledOffers, availableCpus, tasks)

    } while (launchedTask)

  }

 

  if (tasks.size > 0) {

    hasLaunchedTask = true

  }

  return tasks

}

 

// 尝试分发单个TaskSet

private def resourceOfferSingleTaskSet(

    taskSet: TaskSetManager, maxLocality: TaskLocality, shuffledOffers: Seq[WorkerOffer], availableCpus: Array[Int], tasks: Seq[ArrayBuffer[TaskDescription]]) : Boolean = {

  var launchedTask = false

  // round-robin轮询executor

  for (i <- 0 until shuffledOffers.size) {

    val execId = shuffledOffers(i).executorId

    val host = shuffledOffers(i).host

    // cpu足够的话,就尝试分发

    if (availableCpus(i) >= CPUS_PER_TASK) {

      try {

        // resourceOffer是向该executor分发任务,返回成功分发的任务列表

        for (task <- taskSet.resourceOffer(execId, host, maxLocality)) {

          tasks(i) += task

          // 成功的话就记录这个分发结果

          val tid = task.taskId

          taskIdToTaskSetManager(tid) = taskSet

          taskIdToExecutorId(tid) = execId

          executorIdToTaskCount(execId) += 1

          executorsByHost(host) += execId

          availableCpus(i) -= CPUS_PER_TASK

          assert(availableCpus(i) >= 0)

          launchedTask = true

        }

      } catch {

        case e: TaskNotSerializableException =>

          logError(s"Resource offer failed, task set ${taskSet.name} was not serializable")

          // Do not offer resources for this task, but don't throw an error to allow other task sets to be submitted.

          return launchedTask

      }

    }

  }

  return launchedTask

}

 

Step 7-TaskSetManager

/**

 * 响应对单个executor的offer并分配Task

 * NOTE: this function is either called with a maxLocality which would be adjusted by delay scheduling algorithm or it will be with a special NO_PREF locality which will be not modified

 * @param execId the executor Id of the offered resource

 * @param host  the host Id of the offered resource

 * @param maxLocality the maximum locality we want to schedule the tasks at

 */

@throws[TaskNotSerializableException]

def resourceOffer(execId: String, host: String, maxLocality: TaskLocality.TaskLocality) : Option[TaskDescription] = {

  if (!isZombie) {

    val curTime = clock.getTimeMillis()

    var allowedLocality = maxLocality

    // 如果涉及wait-time的Locality,则根据空闲时间获取最终的locality

    if (maxLocality != TaskLocality.NO_PREF) {

      allowedLocality = getAllowedLocalityLevel(curTime)

      if (allowedLocality > maxLocality) {

        // We're not allowed to search for farther-away tasks

        allowedLocality = maxLocality

      }

    }

 

    // 根据调度策略,从当前TaskSetManager出列一个满足locality的Task的Id

    dequeueTask(execId, host, allowedLocality) match {

      case Some((index, taskLocality, speculative)) => {

        // 如果找到了,则分发成功,记录相关信息并返回TaskDescription

        val task = tasks(index)

        val taskId = sched.newTaskId()

        // Do various bookkeeping

        copiesRunning(index) += 1

        val attemptNum = taskAttempts(index).size

        // 生成并记录TaskInfo

        val info = new TaskInfo(taskId, index, attemptNum, curTime, execId, host, taskLocality, speculative)

        taskInfos(taskId) = info

        taskAttempts(index) = info :: taskAttempts(index)

        // 更新locality相关时间戳

        if (maxLocality != TaskLocality.NO_PREF) {

          currentLocalityIndex = getLocalityIndex(taskLocality)

          lastLaunchTime = curTime

        }

        // 序列化Task,包括下载依赖

        val startTime = clock.getTimeMillis()

        val serializedTask: ByteBuffer = try {

          Task.serializeWithDependencies(task, sched.sc.addedFiles, sched.sc.addedJars, ser)

        } catch {

          // 如果序列化失败就直接抛异常终止计算。所以Driver会看到下面这种报错

          case NonFatal(e) =>

            val msg = s"Failed to serialize task $taskId, not attempting to retry it." logError(msg, e)

            abort(s"$msg Exception during serialization: $e")

            throw new TaskNotSerializableException(e)

        }

 

        // 如果序列化的结果太大(默认是100K),就会触发Warning,但此处不会抛出异常

        if (serializedTask.limit > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024 && !emittedTaskSizeWarning) {

          emittedTaskSizeWarning = true

          logWarning(s"Stage ${task.stageId} contains a task of very large size " + s"(${serializedTask.limit / 1024} KB). The maximum recommended task size is " + s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.")

        }

        // 记录task状态为running

        addRunningTask(taskId)

 

        // Task命名规范

        val taskName = s"task ${info.id} in stage ${taskSet.id}"

        logInfo(s"Starting $taskName (TID $taskId, $host, partition ${task.partitionId}," + s"$taskLocality, ${serializedTask.limit} bytes)")

 

        /* 向DagScheduler. eventProcessLoop发送一个BeginEvent事件,异步触发dagScheduler.handleBeginEvent,这个方法首先向监听总线(ListenerBus发送事件消息),然后回调DagScheduler.submitStage(),尝试提交剩余Stage。当然如果之前的Stage没跑完的时候回调并没有实际内容。DagScheduler的很多消息都会触发submitStage,形成一个回调回路,最终提交所有Stage

        sched.dagScheduler.taskStarted(task, info)

 

        // 返回TaskDescription

        return Some(new TaskDescription(taskId = taskId, attemptNumber = attemptNum, execId,

          taskName, index, serializedTask))

      }

      case _ =>

    }

  }

  None

}

 

Task

// 序列化Task及其依赖文件

def serializeWithDependencies(task: Task[_],currentFiles: HashMap[String, Long], currentJars: HashMap[String, Long], serializer: SerializerInstance): ByteBuffer = {

 

  val out = new ByteBufferOutputStream(4096)

  val dataOut = new DataOutputStream(out)

 

  // 序列化sc.addFiles的内容,(其实只是依赖的元数据,包括文件名和时间戳),不会下载文件本身,addJars也一样。addFiles可以包含资源文件

  dataOut.writeInt(currentFiles.size)

  for ((name, timestamp) <- currentFiles) {

    dataOut.writeUTF(name)

    dataOut.writeLong(timestamp)

  }

  // 序列化sc.addJars的内容

  dataOut.writeInt(currentJars.size)

  for ((name, timestamp) <- currentJars) {

    dataOut.writeUTF(name)

    dataOut.writeLong(timestamp)

  }

 

  // 最后序列化Task本身

  dataOut.flush()

  val taskBytes = serializer.serialize(task)

  Utils.writeByteBuffer(taskBytes, out)

  out.toByteBuffer

}

 

Step-8:CoarseGrainedSchedulerBackend

// 启动分配好资源的Task列表

private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {

  // 遍历所有已分发资源的TaskDesicription

  for (task <- tasks.flatten) {

  // 序列化TaskDescription,注意依赖已经序列化过了,这里是序列化TaskDesicription本身,有Java/Kyro两种实现,返回字节码。其中Java方式就是java.io.Serializable的序列化实现,算子本类需要实现Serializable(如果是内嵌类,宿主也需要)

    val serializedTask = ser.serialize(task)

    // 估算Akka消息的大小,这里如果超过上限,就直接失败。上限可在spark.akka.frameSize配置,AkkaUtils.reservedSizeBytes是固定为200K

    if (serializedTask.limit >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {

      scheduler.taskIdToTaskSetManager.get(task.taskId).foreach { taskSetMgr =>

        try {

          var msg = "Serialized task %s:%d was %d bytes, which exceeds max allowed: " + "spark.akka.frameSize (%d bytes) - reserved (%d bytes). Consider increasing " + "spark.akka.frameSize or using broadcast variables for large values."

          msg = msg.format(task.taskId, task.index, serializedTask.limit, akkaFrameSize, AkkaUtils.reservedSizeBytes)

          taskSetMgr.abort(msg)

        } catch {

          case e: Exception => logError("Exception in error callback", e)

        }

      }

    } else {

val executorData = executorDataMap(task.executorId)

       // 记录executor消耗CPUS_PER_TASK来处理当前Task

       executorData.freeCores -= scheduler.CPUS_PER_TASK

       // 发送AKKA消息通知Executor,控制权移交到CoarseGrainedExecutorBackend

       executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))

    }

  }

}

 

Step-9: CoarseGrainedExecutorBackend

《内幕》6.2节开始,此处ExecutorBackend已经属于Executor进程。Executor进程在SparkContext创建的时候就已经创建(并非触发Action时才创建进程)

 

override def receive: PartialFunction[Any, Unit] = {

  case LaunchTask(data) =>

    if (executor == null) {

      logError("Received LaunchTask command but executor was null")

      System.exit(1)

    } else {

       // 反序列化TaskDescription

      val taskDesc = ser.deserialize[TaskDescription](data.value)

      logInfo("Got assigned task " + taskDesc.taskId)

       // Executor 包含Worker节点的元数据和环境信息(SparkEnv),通过Executor启动Worker进程进行计算

      executor.launchTask(this, taskId = taskDesc.taskId, attemptNumber = taskDesc.attemptNumber, taskDesc.name, taskDesc.serializedTask)

    }

      …… //其他消息

}

 

Executor

def launchTask(context: ExecutorBackend, taskId: Long, attemptNumber: Int, taskName: String, serializedTask: ByteBuffer): Unit = {

  // TaskRunner是一个Runnable,提交到线程池中执行

  val tr = new TaskRunner(context, taskId = taskId, attemptNumber = attemptNumber, taskName, serializedTask)

  runningTasks.put(taskId, tr)

  threadPool.execute(tr)

}

 

TaskRunner

override def run(): Unit = {

  // TaskMemoryManager负责管理Worker进程内存相关

  val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)

  val deserializeStartTime = System.currentTimeMillis()

 

  // replClassLoader是 spark.repl.class.uri和org.apache.spark.repl.ExecutorClassLoader配置的外部Executor的Classloader,默认没有值

  Thread.currentThread.setContextClassLoader(replClassLoader)

  

  val ser = env.closureSerializer.newInstance()

  logInfo(s"Running $taskName (TID $taskId)")

  // 更新状态

  execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)

  var taskStart: Long = 0

  startGCTime = computeTotalGcTime()

 

  try {

    val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask)    // 反序列化Task及其依赖(元数据)

    updateDependencies(taskFiles, taskJars)    // 下载/更新依赖

    task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)    // 反序列化Task本身

    task.setTaskMemoryManager(taskMemoryManager)    // 绑定内存管理器

    if (killed) {// 如果已经killed,抛异常退出

      throw new TaskKilledException

    }

 

    logDebug("Task " + taskId + "'s epoch is " + task.epoch)

    // MapOutputTracker是管理Stage输出结果位置的,以便后续的Shuffle

    env.mapOutputTracker.updateEpoch(task.epoch)

 

    // 运行Task并记录运行时信息,value是计算结果,accumUpdates是对累加器的更新结果

    taskStart = System.currentTimeMillis()

    var threwException = true

    val (value, accumUpdates) = try {

      val res = task.run(taskAttemptId = taskId, attemptNumber = attemptNumber, metricsSystem = env.metricsSystem)

      threwException = false

      res

    } finally {

      // 清理内存,如果发现内存泄露则报警或退出

      val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()

      if (freedMemory > 0) {

        val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, TID = $taskId"

        if (conf.getBoolean("spark.unsafe.exceptionOnMemoryLeak", false) && !threwException) {

          throw new SparkException(errMsg)

        } else {

          logError(errMsg)

        }

      }

    }

    val taskFinish = System.currentTimeMillis()

 

    // If the task has been killed, let's fail it.

    if (task.killed) {

      throw new TaskKilledException

    }

 

    // 准备回发返回值(计算结果)

    val resultSer = env.serializer.newInstance()

    val beforeSerialization = System.currentTimeMillis()

    val valueBytes = resultSer.serialize(value)

    val afterSerialization = System.currentTimeMillis()

 

    // 填充Task的统计信息,包括执行时间、GC时间、序列化时间等,并且更新累加器

    for (m <- task.metrics) {

      // 反序列化包括Task对象本身(包含分区信息)以及RDD和算子函数

m.setExecutorDeserializeTime((taskStart - deserializeStartTime) + task.executorDeserializeTime)

      // 扣除反序列化时间,剩下的是算子执行时间

      m.setExecutorRunTime((taskFinish - taskStart) - task.executorDeserializeTime)

      m.setJvmGCTime(computeTotalGcTime() - startGCTime)

      m.setResultSerializationTime(afterSerialization - beforeSerialization)

      m.updateAccumulators()

    }

 

    // 封装DirectResult

    val directResult = new DirectTaskResult(valueBytes, accumUpdates, task.metrics.orNull)

    val serializedDirectResult = ser.serialize(directResult)

    val resultSize = serializedDirectResult.limit

 

    // directSend = sending directly back to the driver

    val serializedResult: ByteBuffer = {

      // 如果结果超过maxResultSize则丢弃这个结果

      if (maxResultSize > 0 && resultSize > maxResultSize) {

        logWarning(s"Finished $taskName (TID $taskId). Result is larger than maxResultSize " + s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " + s"dropping it.")

        ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))

      } else if (resultSize >= akkaFrameSize - AkkaUtils.reservedSizeBytes) {

        // 否则如果超过消息尺寸上限,则保存到持久化成block,并通过Indirect方式返回

        val blockId = TaskResultBlockId(taskId)

        env.blockManager.putBytes(blockId, serializedDirectResult, StorageLevel.MEMORY_AND_DISK_SER)

        logInfo( s"Finished $taskName (TID $taskId). $resultSize bytes result sent via BlockManager)"}

        ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))

      } else {

        // 如果小于消息尺寸上限,就直接返回DirectResult

        logInfo(s"Finished $taskName (TID $taskId). $resultSize bytes result sent to driver")

        serializedDirectResult

      }

    }

 

    // 发送StatusUpdate AKKA消息通知Driver Task计算完成并回发结果,控制权转回CoarseGrainedSchedulerBackend(发送实现在AkkaRpcEnv.send,接收在CoarseGrainedSchedulerBackend.DriverEndPoint.receive)

    execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)

 

  } catch {

    // N多异常处理

    case ffe: FetchFailedException =>// 获取文件失败

      val reason = ffe.toTaskEndReason

      execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))

    case _: TaskKilledException | _: InterruptedException if task.killed =>// 任务被Killed

      logInfo(s"Executor killed $taskName (TID $taskId)")

      execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(TaskKilled))

    case cDE: CommitDeniedException =>// 上传失败(如hdfs)

      val reason = cDE.toTaskEndReason

      execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))

    case t: Throwable =>

      // Attempt to exit cleanly by informing the driver of our failure.  If anything goes wrong (or this was a fatal exception), we will delegate to the default uncaught exception handler, which will terminate the Executor.

      logError(s"Exception in $taskName (TID $taskId)", t)

 

      val metrics: Option[TaskMetrics] = Option(task).flatMap { task =>

        task.metrics.map { m =>

          m.setExecutorRunTime(System.currentTimeMillis() - taskStart)

          m.setJvmGCTime(computeTotalGcTime() - startGCTime)

          m.updateAccumulators()

          m

        }

      }

      val serializedTaskEndReason = {

        try {

          ser.serialize(new ExceptionFailure(t, metrics))

        } catch {

          case _: NotSerializableException =>

            // t is not serializable so just send the stacktrace

            ser.serialize(new ExceptionFailure(t, metrics, false))

        }

      }

      execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason)

 

      // Don't forcibly exit unless the exception was inherently fatal, to avoid stopping other tasks unnecessarily.

      if (Utils.isFatalError(t)) {

        SparkUncaughtExceptionHandler.uncaughtException(t)

      }

  } finally {

    runningTasks.remove(taskId)

  }

}

 

// 下载缺失的file或jar依赖。并加载到classloader

private def updateDependencies(newFiles: HashMap[String, Long], newJars: HashMap[String, Long]) {

  // 获取Hadoop配置

  lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)

  synchronized {

    // file依赖(资源文件)

    for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {

      logInfo("Fetching " + name + " with timestamp " + timestamp)

      // Fetch file with useCache mode, close cache for local mode.

      Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf, env.securityManager, hadoopConf, timestamp, useCache = !isLocal)

      currentFiles(name) = timestamp

    }

    // jar依赖

    for ((name, timestamp) <- newJars) {

      val localName = name.split("/").last

      val currentTimeStamp = currentJars.get(name) .orElse(currentJars.get(localName)).getOrElse(-1L)

      if (currentTimeStamp < timestamp) {

        logInfo("Fetching " + name + " with timestamp " + timestamp)

        // Fetch file with useCache mode, close cache for local mode.

        Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf, env.securityManager, hadoopConf, timestamp, useCache = !isLocal)

        currentJars(name) = timestamp

        // Jar需要额外添加到 classloader

        val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL

        if (!urlClassLoader.getURLs().contains(url)) {

          logInfo("Adding " + url + " to class loader")

          urlClassLoader.addURL(url)

        }

      }

    }

  }

}

 

/* 下载文件或文件夹,文件支持Http、Hadoop、File FS文件只支持hadoop

  如useCache==true, 首先尝试从本地缓存获取。本地缓存在运行同一Application的executor之间共享*

  如果文件已存在但内容不一样,则抛出SparkException */

def fetchFile(url: String, targetDir: File, conf: SparkConf, securityMgr: SecurityManager, hadoopConf: Configuration, timestamp: Long, useCache: Boolean) {

  val fileName = url.split("/").last

  val targetFile = new File(targetDir, fileName)

  val fetchCacheEnabled = conf.getBoolean("spark.files.useFetchCache", defaultValue = true)

  if (useCache && fetchCacheEnabled) {

    // 尝试拿缓存

    val cachedFileName = s"${url.hashCode}${timestamp}_cache"

    val lockFileName = s"${url.hashCode}${timestamp}_lock"

    val localDir = new File(getLocalDir(conf))

    val lockFile = new File(localDir, lockFileName)

    val lockFileChannel = new RandomAccessFile(lockFile, "rw").getChannel()

    // 文件锁,防止并发读写

    val lock = lockFileChannel.lock()

    val cachedFile = new File(localDir, cachedFileName)

    try {

      // 未命中缓存,则下载

      if (!cachedFile.exists()) {

        doFetchFile(url, localDir, cachedFileName, conf, securityMgr, hadoopConf)

      }

    } finally {

      lock.release()

      lockFileChannel.close()

    }

    copyFile(url, cachedFile, targetFile, conf.getBoolean("spark.files.overwrite", false))

  } else {

    // 未开启缓存,也直接下载,不同方式实现,hadoop、http等

    doFetchFile(url, targetDir, fileName, conf, securityMgr, hadoopConf)

  }

 

  // 如果是压缩包,调用命令行解压

  if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {

    logInfo("Untarring " + fileName)

    executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir)

  } else if (fileName.endsWith(".tar")) {

    logInfo("Untarring " + fileName)

    executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir)

  }

 

  // 更改执行权限,为脚本文件

  FileUtil.chmod(targetFile.getAbsolutePath, "a+x")

 

  // 更改windows下的只读权限

  if (isWindows) {

    FileUtil.chmod(targetFile.getAbsolutePath, "u+r")

  }

}

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics