@@ -37,7 +37,9 @@ import org.apache.spark.rdd.{PartitionPruningRDD, RDD}
37
37
38
38
import shark .{LogHelper , SharkConfVars , SharkEnv }
39
39
import shark .execution .optimization .ColumnPruner
40
- import shark .memstore2 .{CacheType , ColumnarSerDe , MemoryMetadataManager }
40
+ import shark .memstore2 .CacheType
41
+ import shark .memstore2 .CacheType ._
42
+ import shark .memstore2 .{ColumnarSerDe , MemoryMetadataManager }
41
43
import shark .memstore2 .{TablePartition , TablePartitionStats }
42
44
import shark .util .HiveUtils
43
45
@@ -70,22 +72,25 @@ class TableScanOperator extends TopOperator[TableScanDesc] {
70
72
71
73
@ BeanProperty var tableDesc : TableDesc = _
72
74
75
+ // True if table data is stored the Spark heap.
73
76
@ BeanProperty var isInMemoryTableScan : Boolean = _
74
77
78
+ @ BeanProperty var cacheMode : CacheType .CacheType = _
79
+
75
80
76
81
override def initializeOnMaster () {
77
82
// Create a local copy of the HiveConf that will be assigned job properties and, for disk reads,
78
83
// broadcasted to slaves.
79
84
localHConf = new HiveConf (super .hconf)
85
+ cacheMode = CacheType .fromString(
86
+ tableDesc.getProperties().get(" shark.cache" ).asInstanceOf [String ])
80
87
isInMemoryTableScan = SharkEnv .memoryMetadataManager.containsTable(
81
88
table.getDbName, table.getTableName)
82
89
}
83
90
84
91
override def outputObjectInspector () = {
85
- val cacheMode = CacheType .fromString(
86
- tableDesc.getProperties().get(" shark.cache" ).asInstanceOf [String ])
87
92
if (parts == null ) {
88
- val serializer = if (CacheType .shouldCache(cacheMode) ) {
93
+ val serializer = if (isInMemoryTableScan || cacheMode == CacheType .TACHYON ) {
89
94
new ColumnarSerDe
90
95
} else {
91
96
tableDesc.getDeserializerClass().newInstance()
@@ -94,7 +99,7 @@ class TableScanOperator extends TopOperator[TableScanDesc] {
94
99
serializer.getObjectInspector()
95
100
} else {
96
101
val partProps = firstConfPartDesc.getProperties()
97
- val partSerDe = if (CacheType .shouldCache(cacheMode) ) {
102
+ val partSerDe = if (isInMemoryTableScan || cacheMode == CacheType .TACHYON ) {
98
103
new ColumnarSerDe
99
104
} else {
100
105
firstConfPartDesc.getDeserializerClass().newInstance()
@@ -115,8 +120,6 @@ class TableScanOperator extends TopOperator[TableScanDesc] {
115
120
// 1. Spark heap (block manager), accessed through the Shark MemoryMetadataManager
116
121
// 2. Tachyon table
117
122
// 3. Hive table on HDFS (or other Hadoop storage)
118
- val cacheMode = CacheType .fromString(
119
- tableDesc.getProperties().get(" shark.cache" ).asInstanceOf [String ])
120
123
// TODO(harvey): Pruning Hive-partitioned, cached tables isn't supported yet.
121
124
if (isInMemoryTableScan || cacheMode == CacheType .TACHYON ) {
122
125
if (isInMemoryTableScan) {
@@ -147,8 +150,6 @@ class TableScanOperator extends TopOperator[TableScanDesc] {
147
150
// the input table and we have statistics on the table.
148
151
val columnsUsed = new ColumnPruner (this , table).columnsUsed
149
152
150
- val cacheMode = CacheType .fromString(
151
- tableDesc.getProperties().get(" shark.cache" ).asInstanceOf [String ])
152
153
if (! table.isPartitioned && cacheMode == CacheType .TACHYON ) {
153
154
SharkEnv .tachyonUtil.pushDownColumnPruning(rdd, columnsUsed)
154
155
}
0 commit comments