-
Notifications
You must be signed in to change notification settings - Fork 0
/
Concurrency.py
37 lines (33 loc) · 1.15 KB
/
Concurrency.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import threading
from collections import deque
class Solution:
def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]:
def addChildren(url, q):
urls = htmlParser.getUrls(url)
q += urls
def bfs(url, parser):
q = deque()
visited = set()
hostname = url.split('/')[2]
q.append(url)
res = []
while q:
threads = []
# The current queue length is the same level
for i in range(len(q)):
current = q.popleft()
if current in visited:
continue
visited.add(current)
if hostname != current.split('/')[2]:
continue
res.append(current)
t = threading.Thread(target=addChildren, args=(current, q))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
return res
res = bfs(startUrl, htmlParser)
return res