diff --git a/src/main/scala/com/databricks/spark/sql/perf/Tables.scala b/src/main/scala/com/databricks/spark/sql/perf/Tables.scala index 177d38c..bec425a 100644 --- a/src/main/scala/com/databricks/spark/sql/perf/Tables.scala +++ b/src/main/scala/com/databricks/spark/sql/perf/Tables.scala @@ -177,7 +177,18 @@ abstract class Tables(sqlContext: SQLContext, scaleFactor: String, numPartitions: Int): Unit = { val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Ignore - val data = df(format != "text", numPartitions) + val dataTemp = df(format != "text", numPartitions) + val data = if (name.toLowerCase.contains("store_sales")) { + dataTemp.sortWithinPartitions("ss_sold_date_sk") + } else if (name.toLowerCase.contains("web_sales")) { + dataTemp.sortWithinPartitions("ws_sold_date_sk") + } else if (name.toLowerCase.contains("catalog_sales")) { + dataTemp.sortWithinPartitions("cs_sold_date_sk") + } else { + dataTemp + } + + val tempTableName = s"${name}_text" data.createOrReplaceTempView(tempTableName) @@ -291,7 +302,8 @@ abstract class Tables(sqlContext: SQLContext, scaleFactor: String, filterOutNullPartitionValues: Boolean, tableFilter: String = "", numPartitions: Int = 100): Unit = { - var tablesToBeGenerated = if (partitionTables) { + // DO NOT GENERATE PARTITIONED TABLES + var tablesToBeGenerated = if (false) { tables } else { tables.map(_.nonPartitioned)