Skip to content

Commit a7b9458

Browse files
author
Wang, Gang(Gary)
committed
MNEMONIC-257: Add APIs about DurableRDD's direct IO
1 parent 5c54177 commit a7b9458

File tree

4 files changed

+94
-22
lines changed

4 files changed

+94
-22
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,5 @@ testlog/
4747
.cache-tests
4848
.cache-main
4949
*.mne
50-
50+
*.iml
51+
.idea/

mnemonic-spark/mnemonic-spark-core/src/main/scala/org/apache/mnemonic/spark/rdd/DurableRDD.scala

Lines changed: 39 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,40 +15,37 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.mnemonic.spark.rdd;
18+
package org.apache.mnemonic.spark.rdd
1919

2020
import java.io.File
21-
import scala.util._
2221

2322
import org.apache.spark.rdd.RDD
24-
import org.apache.spark.{ Partition, TaskContext, SparkContext }
25-
import org.apache.spark.internal.Logging
23+
import org.apache.spark._
2624
import org.apache.commons.io.FileUtils
27-
import scala.reflect.{ classTag, ClassTag }
28-
import scala.collection.mutable.HashMap
25+
import scala.reflect.{ ClassTag }
2926
import scala.collection.JavaConverters._
3027
import scala.collection.mutable.ArrayBuffer
31-
import org.apache.mnemonic.ConfigurationException
3228
import org.apache.mnemonic.DurableType
3329
import org.apache.mnemonic.EntityFactoryProxy
3430
import org.apache.mnemonic.NonVolatileMemAllocator
35-
import org.apache.mnemonic.sessions.DurableInputSession
36-
import org.apache.mnemonic.sessions.SessionIterator
3731
import org.apache.mnemonic.sessions.ObjectCreator
3832
import org.apache.mnemonic.spark.MneDurableInputSession
3933
import org.apache.mnemonic.spark.MneDurableOutputSession
4034
import org.apache.mnemonic.spark.DurableException
4135

4236
private[spark] class DurableRDD[D: ClassTag, T: ClassTag] (
43-
private var rdd: RDD[T],
37+
@transient private var _sc: SparkContext,
38+
@transient private var deps: Seq[Dependency[_]],
4439
serviceName: String, durableTypes: Array[DurableType],
4540
entityFactoryProxies: Array[EntityFactoryProxy], slotKeyId: Long,
4641
partitionPoolSize: Long, durableDirectory: String,
4742
f: (T, ObjectCreator[D, NonVolatileMemAllocator]) => Option[D],
4843
preservesPartitioning: Boolean = false)
49-
extends RDD[D](rdd) {
44+
extends RDD[D](_sc, deps) {
5045

51-
val durdddir = DurableRDD.getRddDirName(durableDirectory, id)
46+
private val isInputOnly = null == deps
47+
48+
private val durdddir = DurableRDD.getRddDirName(durableDirectory, id)
5249
DurableRDD.resetRddDir(durdddir)
5350

5451
override val partitioner = if (preservesPartitioning) firstParent[T].partitioner else None
@@ -80,7 +77,7 @@ private[spark] class DurableRDD[D: ClassTag, T: ClassTag] (
8077
val memplist = mempListOpt match {
8178
case None => {
8279
val mplst = prepareDurablePartition(split, context, firstParent[T].iterator(split, context))
83-
logInfo(s"Done transformed RDD #${rdd.id} to durableRDD #${id} on ${durdddir.toString}")
80+
logInfo(s"Done transformed RDD #${firstParent[T].id} to durableRDD #${id} on ${durdddir.toString}")
8481
mplst
8582
}
8683
case Some(mplst) => mplst
@@ -92,12 +89,15 @@ private[spark] class DurableRDD[D: ClassTag, T: ClassTag] (
9289

9390
override def clearDependencies {
9491
super.clearDependencies()
95-
rdd = null
9692
}
9793

9894
def reset {
9995
DurableRDD.resetRddDir(durdddir)
10096
}
97+
98+
def destroy {
99+
DurableRDD.deleteRddDir(durdddir)
100+
}
101101
}
102102

103103
object DurableRDD {
@@ -141,15 +141,24 @@ object DurableRDD {
141141
}
142142

143143
def resetRddDir(rddDirName: String) {
144+
deleteRddDir(rddDirName)
145+
createRddDir(rddDirName)
146+
}
147+
148+
def createRddDir(rddDirName: String) {
144149
val durdddir = new File(rddDirName)
145-
if (durdddir.exists) {
146-
FileUtils.deleteDirectory(durdddir)
147-
}
148150
if (!durdddir.mkdir) {
149151
throw new DurableException(s"Durable RDD directory ${durdddir.toString} cannot be created")
150152
}
151153
}
152154

155+
def deleteRddDir(rddDirName: String) {
156+
val durdddir = new File(rddDirName)
157+
if (durdddir.exists) {
158+
FileUtils.deleteDirectory(durdddir)
159+
}
160+
}
161+
153162
def genDurableFileName(splitId: Int)(mempidx: Long): String = {
154163
durableFileNameTemplate.format(splitId, mempidx)
155164
}
@@ -182,14 +191,24 @@ object DurableRDD {
182191
partitionPoolSize: Long,
183192
f: (T, ObjectCreator[D, NonVolatileMemAllocator]) => Option[D],
184193
preservesPartitioning: Boolean = false) = {
185-
val sc: SparkContext = rdd.context
186-
val ret = new DurableRDD[D, T](rdd,
194+
// val sc: SparkContext = rdd.context
195+
val ret = new DurableRDD[D, T](rdd.context , List(new OneToOneDependency(rdd)),
187196
serviceName, durableTypes, entityFactoryProxies, slotKeyId,
188-
partitionPoolSize, getDurableDir(sc).get, f, preservesPartitioning)
197+
partitionPoolSize, getDurableDir(rdd.context).get, f, preservesPartitioning)
189198
//sc.cleaner.foreach(_.registerRDDForCleanup(ret))
190199
ret
191200
}
192201

202+
def apply[D: ClassTag] (
203+
sc: SparkContext, pathname: String,
204+
serviceName: String, durableTypes: Array[DurableType],
205+
entityFactoryProxies: Array[EntityFactoryProxy], slotKeyId: Long) = {
206+
val ret = new DurableRDD[D, Unit](sc, null,
207+
serviceName, durableTypes, entityFactoryProxies, slotKeyId,
208+
1024*1024*1024L, pathname, null)
209+
ret
210+
}
211+
193212
def cleanupForApp(sc: SparkContext) {
194213
FileUtils.deleteDirectory(new File(getDurableDir(sc).get))
195214
}

mnemonic-spark/mnemonic-spark-core/src/main/scala/org/apache/mnemonic/spark/rdd/DurableRDDFunctions.scala

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import org.apache.mnemonic.sessions.ObjectCreator
2727

2828
class DurableRDDFunctions[T: ClassTag](rdd: RDD[T]) extends Serializable {
2929

30-
def makeDurable[D: ClassTag](
30+
def makeDurable[D: ClassTag] (
3131
serviceName: String,
3232
durableTypes: Array[DurableType],
3333
entityFactoryProxies: Array[EntityFactoryProxy],
@@ -39,6 +39,16 @@ class DurableRDDFunctions[T: ClassTag](rdd: RDD[T]) extends Serializable {
3939
serviceName, durableTypes, entityFactoryProxies, slotKeyId,
4040
partitionPoolSize, f, preservesPartitioning)
4141
}
42+
43+
def saveAsMnemonic[D: ClassTag] (dir: String,
44+
serviceName: String,
45+
durableTypes: Array[DurableType],
46+
entityFactoryProxies: Array[EntityFactoryProxy],
47+
slotKeyId: Long,
48+
partitionPoolSize: Long,
49+
f: (T, ObjectCreator[D, NonVolatileMemAllocator]) => Option[D]) {
50+
//TODO: implement export operationl
51+
}
4252
}
4353

4454
object DurableRDDFunctions {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.mnemonic.spark
19+
20+
import org.apache.spark.rdd.RDD
21+
import org.apache.spark._
22+
import scala.reflect.ClassTag
23+
import scala.language.implicitConversions
24+
import org.apache.mnemonic.DurableType
25+
import org.apache.mnemonic.EntityFactoryProxy
26+
import org.apache.mnemonic.spark.rdd.DurableRDD
27+
28+
class DurableSparkFunctions(sc: SparkContext) extends Serializable {
29+
30+
def mnemonic[D: ClassTag] (pathname: String,
31+
serviceName: String,
32+
durableTypes: Array[DurableType],
33+
entityFactoryProxies: Array[EntityFactoryProxy],
34+
slotKeyId: Long) = {
35+
DurableRDD[D](sc, pathname: String,
36+
serviceName, durableTypes, entityFactoryProxies, slotKeyId)
37+
}
38+
}
39+
40+
object DurableSparkFunctions {
41+
implicit def addDurableFunctions(sc: SparkContext) = new DurableSparkFunctions(sc)
42+
}

0 commit comments

Comments
 (0)