Skip to content

Commit

Permalink
add main file and add BeautifulSoup package
Browse files Browse the repository at this point in the history
  • Loading branch information
myg133 committed Jun 24, 2014
1 parent 00edfb6 commit fe45805
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 17 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,6 @@ docs/_build/
.project
.pydevproject
*.ldb
*.out
*.files
*.file
30 changes: 13 additions & 17 deletions HtmlAnalyze/Worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,20 @@
sys.path.append('../')
pass

from bs4 import BeautifulSoup
from html.parser import HTMLParser


class Worker(HTMLParser):

'''需要读取配置,获取需要爬的xpath集合'''

def __init__(self):
HTMLParser.__init__(self)
pass

def handle_starttag(self, tag, attrs):
pass

def handle_endtag(self, tag):
pass

def handle_data(self, data):
pass

class Worker():
def __init__(self):
self.soup = BeautifulSoup()
pass

def Feed(self,htmlMarkup):
# Read rules in config file
# filter the tags by rules
# build entity with html content
# return the entity
if self.soup:
self.soup.find()
pass
10 changes: 10 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# -*- Coding:utf-8 -*-

import os

def main():
pass

if __name__=='__main__':
main()
pass

0 comments on commit fe45805

Please sign in to comment.