-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathStats.java
180 lines (161 loc) · 6.68 KB
/
Stats.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
package com.dbms.utils;
import com.dbms.index.Index;
import com.dbms.index.TreeDeserializer;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
/** Class that keeps track of the stats of all the tables in the database. */
public class Stats extends IO {
/** Maps unaliased table name to its stats */
private Map<String, TableStats> stats = new HashMap<>();
/** Constructor for a {@code Stats} object
*
* @param bw {@code BufferedWriter} for writing the stats to {@code stats.txt}
* @param schema schema of our database
* @throws IOException */
Stats(BufferedWriter bw, Map<String, List<Attribute>> schema) throws IOException {
for (String table : schema.keySet()) {
TupleReader tr = new TupleReader(Catalog.pathToTable(table));
List<Attribute> columnNames = schema.get(table);
String result = table;
Map<String, Range> columnInfo = new HashMap<>();
Integer max[] = new Integer[columnNames.size()];
Integer min[] = new Integer[columnNames.size()];
Arrays.fill(max, Integer.MIN_VALUE);
Arrays.fill(min, Integer.MAX_VALUE);
List<Integer> row;
Integer numRows = 0;
while ((row = tr.nextTuple()) != null) {
for (int i = 0; i < row.size(); i++) {
int element = row.get(i);
if (element > max[i]) max[i] = element;
if (element < min[i]) min[i] = element;
}
numRows++;
}
result += " " + numRows + " ";
for (int i = 0; i < columnNames.size(); i++) {
String columnName = columnNames.get(i).COLUMN;
result += String.join(",", columnName, min[i].toString(), max[i].toString());
if (i < columnNames.size() - 1) result += " ";
columnInfo.put(columnName, new Range(min[i], max[i]));
}
stats.put(table, new TableStats(numRows, columnInfo));
bw.write(result);
bw.newLine();
}
bw.close();
}
/** @param tableName (unaliased) table name
* @return number of pages in the relation */
private double getNumPages(String tableName) {
TableStats tstats = stats.get(tableName);
return tstats.ROWS * tstats.NUM_ATTRIBUTES * 4.0 / PAGE_SIZE;
}
/** @param tableName (unaliased) table name
* @return I/O cost of scanning the table (number of pages) */
public double getTableScanCost(String tableName) {
return getNumPages(tableName);
}
/** @param i the index for which to calculate the cost
* @param extent the extent of values being selected
* @return I/O cost of using the index to select the values
* @throws IOException */
public double getTableIndexCost(Index i, int extent) throws IOException {
TreeDeserializer td = new TreeDeserializer(i);
int numLeaves = td.numLeaves;
td.close();
double reductionFactor = getReductionFactor(i.name, extent);
if (i.isClustered) {
return 3 + getNumPages(i.name.TABLE) * reductionFactor;
} else {
return 3 + numLeaves * reductionFactor + extent * reductionFactor;
}
}
/** Gets the range of values for a given table and attribute
*
* @param a {@code Attribute} object that stores the unaliased table name and column name
* @return {@code Range} object that contains the minimum and maximum of the table and column */
public Range getAttributeRange(Attribute a) {
return stats.get(a.TABLE).get(a.COLUMN);
}
/** Number of rows in a given table
*
* @param tableName the aliased name of table
* @return number of rows in that table */
public int numRows(String tableName) {
return stats.get(Catalog.getRealTableName(tableName)).ROWS;
}
/** Number of attributes in a given table
*
* @param tableName the unaliased name of table
* @return number of attributes/columns it has */
public int numAttributes(String tableName) {
return stats.get(tableName).NUM_ATTRIBUTES;
}
/** @param a Attribute with (aliased) table name
* @return base table V-Value for this attribute */
public int baseTableV(Attribute a) {
String unaliased = Catalog.getRealTableName(a.TABLE);
return stats.get(unaliased).get(a.COLUMN).extent();
}
/** @param a Attribute with (aliased) table name
* @param extent extent of values in selection
* @return the reduction factor: ratio of selected values to total possible values */
public double getReductionFactor(Attribute a, int extent) {
String unaliased = Catalog.getRealTableName(a.TABLE);
return extent * 1.0 / stats.get(unaliased).get(a.COLUMN).extent();
}
/** Generates random tuples with the given {@code stats}
*
* @param path destination directory of table with random tuples
* @throws IOException */
void generate(String path) throws IOException {
for (String tableName : stats.keySet()) {
TupleWriter tw = new TupleWriter(String.join(File.separator, path, tableName));
TableStats ts = stats.get(tableName);
Set<Attribute> schema = new HashSet<>();
for (int i = 0; i < ts.ROWS; i++) {
List<Integer> rngList = new LinkedList<>();
for (String column : ts.columns()) {
int min = ts.get(column).min;
int max = ts.get(column).max;
int rng = (int) (Math.random() * (max - min)) + min;
rngList.add(rng);
schema.add(Attribute.bundle(tableName, column));
}
Tuple t = new Tuple(schema, rngList);
tw.writeTuple(t);
}
tw.close();
}
}
}
/** Class that keeps track of stats of a table: number of rows, attributes, and stats about its
* columns */
class TableStats {
/** number of rows in the table */
final int ROWS;
/** number of attributes in a tuple */
final int NUM_ATTRIBUTES;
/** range of each table column */
private final Map<String, Range> COLUMN_STATS;
TableStats(int rows, Map<String, Range> columnStats) {
ROWS = rows;
NUM_ATTRIBUTES = columnStats.size();
COLUMN_STATS = columnStats;
}
Range get(String columnName) {
return COLUMN_STATS.get(columnName);
}
Set<String> columns() {
return COLUMN_STATS.keySet();
}
}