-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmysort.py
210 lines (176 loc) · 6.43 KB
/
mysort.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# -*- coding: utf-8 -*-
#Part1: Get started
#chosing heapsort
#have to do two three things
#1)impliment it in python
#2)test implementation to make sure it works
#3) write a brief description of how good you think the
#sorting alg is (how many loops does it have to do with n elements)
#http://docs.python.org/2/library/heapq.html
from random import shuffle
pass_it = lambda x: x
def heappush(heap, item, key=pass_it):
"""Push item onto heap, maintaining the heap invariant."""
heap.append(item)
_siftdown(heap, 0, len(heap)-1, key=key)
def _siftdown(heap, startpos, pos, key):
newitem = heap[pos]
# Follow the path to the root, moving parents down until finding a place
# newitem fits.
while pos > startpos:
parentpos = (pos - 1) >> 1
parent = heap[parentpos]
if key(parent) <= key(newitem):
break
heap[pos] = parent
pos = parentpos
heap[pos] = newitem
def _siftup(heap, pos, key):
endpos = len(heap)
startpos = pos
newitem = heap[pos]
# Bubble up the smaller child until hitting a leaf.
childpos = 2*pos + 1 # leftmost child position
while childpos < endpos:
# Set childpos to index of smaller child.
rightpos = childpos + 1
if rightpos < endpos and key(heap[rightpos]) <= key(heap[childpos]):
childpos = rightpos
# Move the smaller child up.
heap[pos] = heap[childpos]
pos = childpos
childpos = 2*pos + 1
# The leaf at pos is empty now. Put newitem there, and bubble it up
# to its final resting place (by sifting its parents down).
heap[pos] = newitem
_siftdown(heap, startpos, pos, key)
def heappop(heap, key=pass_it):
"""Pop the smallest item off the heap, maintaining the heap
invariant."""
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
if heap:
returnitem = heap[0]
heap[0] = lastelt
_siftup(heap, 0, key)
else:
returnitem = lastelt
return returnitem
def heap_sort(a, count):
#input: an unordered array a of length count
#(first place a in max-heap order)
heapify(a, count)
end = count-1
#in languages with zero-based arrays the children are 2*i+1 and 2*i+2
while end > 0:
#(swap the root(maximum value) of the heap with the last element of the heap)
swap(a[end], a[0])
#(decrease the size of the heap by one so that the previous max value will
#stay in its proper placement)
end = end - 1
#(put the heap back in max-heap order)
siftDown(a, 0, end)
def heapify(a, count):
#(start is assigned the index in a of the last parent node)
start = (count - 2 ) / 2
while start > 0:
#(sift down the node at index start to the proper place such that all nodes below
#the start index are in heap order)
siftDown(a, start, count-1)
start = start - 1
#(after sifting down the root all nodes/elements are in heap order)
def siftDown(a, start, end):
#input: end represents the limit of how far down the heap to sift.
root = start
while root * 2 + 1 < end:
#(While the root has at least one child)
child = root * 2 + 1
#(root*2 + 1 points to the left child)
swap = root
#(keeps track of child to swap with)
#(check if root is smaller than left child)
if a[swap] < a[child]:
swap = child
#(check if right child exists, and if it's bigger than what we're currently swapping with)
if child+1 <= end and a[swap] < a[child+1]:
swap = child + 1
#(check if we need to swap at all)
if swap != root:
swap(a[root], a[swap])
root = swap
#(repeat to continue sifting down the child now)
else:
return
#original heapsort used, was told in tutorial to try above
#def heap_sort(iterable):
# 'Equivalent to sorted(iterable)'
# h = []
# for value in iterable:
# heappush(h, value)
# return [heappop(h) for i in range(len(h))]
#testing heap with basic test
def test_heap():
lister=[2,1,0,4,6,5,7,9,8,10]
test_dis = heap_sort(lister)
print test_dis
return test_dis
#have to implement sorting based on someone else’s description
#part 2: data analytics
#find your own dataset
#going to
#1) pick a data set
#http://archive.ics.uci.edu/ml/datasets/Bank+Marketing
#2) figure out how to load in python
#http://docs.python.org/2/library/csv.html
def load_data ():
import csv
with open('bank-full.csv',) as csvfile: bank_data = csv.reader(csvfile)
#3) do 2 different visualizations of the data, and interpret them
#http://www.csd.uwo.ca/~jcamer7/CS2120/class11.html
#http://matplotlib.org/gallery.html
#1 lines & bars
#http://matplotlib.org/examples/lines_bars_and_markers/barh_demo.html
def lines_bars():
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
#only numeric values in graph
bank_data = ('age','yearly balance', 'contact day', 'contact duration', 'contacts performed', 'pdays', 'number of contacts',)
y_pos = np.arange(len(bank_data))
performance = np.random.rand(len(bank_data))
error = np.random.rand(len(bank_data))
plt.barh(y_pos, performance, xerr=error, align='center', alpha=0.4)
plt.yticks(y_pos, bank_data)
plt.xlabel('Performance')
plt.title('See the bank dada?')
plt.show()
#2
#circles
#categorical
#http://matplotlib.org/examples/shapes_and_collections/scatter_demo.html
import numpy as np
import matplotlib.pyplot as plt
N = 50
x = np.random.rand(N)
y = np.random.rand(N)
area = np.pi * (15 * np.random.rand(N))**2
plt.scatter(x, y, s=area, alpha=0.5)
plt.show()
# The slices will be ordered and plotted counter-clockwise.
#4) Use two different machine learning approaches to analyze,
#and interpret your data.
#Visualization1, 4 features
def data_set():
import numpy
from sklearn import datasets
data_doe= datasets.bank_data()
data = data_doe.data
data.shape
labels = bank_data.target
numpy.unique(labels)
#Visualization 2
#Linear Support Vector Machine
def data_set_two():
from sklearn import svm
svc = svm.bank_data(kernel='linear')
svc.fit(data,labels)
#http://www.csd.uwo.ca/~jcamer7/CS2120/class14.html