Skip to content

Commit

Permalink
Initial Commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
tmr232 committed Mar 6, 2016
0 parents commit 52f19f9
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 0 deletions.
140 changes: 140 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Created by .ignore support plugin (hsz.mobi)
### Windows template
# Windows image file caches
Thumbs.db
ehthumbs.db

# Folder config file
Desktop.ini

# Recycle Bin used on file shares
$RECYCLE.BIN/

# Windows Installer files
*.cab
*.msi
*.msm
*.msp

# Windows shortcuts
*.lnk
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover

# Translations
*.mo
*.pot

# Django stuff:
*.log

# Sphinx documentation
docs/_build/

# PyBuilder
target/
### VirtualEnv template
# Virtualenv
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
.Python
[Bb]in
[Ii]nclude
[Ll]ib
[Ss]cripts
pyvenv.cfg
pip-selfcheck.json
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio

*.iml

## Directory-based project format:
.idea/
# if you remove the above rule, at least ignore the following:

# User-specific stuff:
# .idea/workspace.xml
# .idea/tasks.xml
# .idea/dictionaries

# Sensitive or high-churn files:
# .idea/dataSources.ids
# .idea/dataSources.xml
# .idea/sqlDataSources.xml
# .idea/dynamic.xml
# .idea/uiDesigner.xml

# Gradle:
# .idea/gradle.xml
# .idea/libraries

# Mongo Explorer plugin:
# .idea/mongoSettings.xml

## File-based project format:
*.ipr
*.iws

## Plugin-specific files:

# IntelliJ
/out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
### IPythonNotebook template
# Temporary data
.ipynb_checkpoints/

32 changes: 32 additions & 0 deletions scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import scrapy

# From Monthly Update
# print set(response.css('td[data-th="Bulletin ID"] a::attr("href")').extract())

class BulletinSpider(scrapy.Spider):
name = 'BulletinSpider'
start_urls = ['https://technet.microsoft.com/library/security/ms13-095']

def parse(self, response):
download_pages = {x for x in response.css('td a::attr("href")').extract() if 'familyid' in x.lower()}
for download_page in download_pages:
yield scrapy.Request(response.urljoin(download_page), self.resolve_download_page)

def resolve_download_page(self, response):
yield scrapy.Request(response.urljoin(response.url.replace('details.aspx','confirmation.aspx')), self.download_updates)

def download_updates(self, response):
print response.css('td.file-link a::attr("href")').extract()







#
# {x for x in response.css('td a::attr("href")').extract() if 'familyid' in x.lower()}
#
# response.url.replace('details.aspx','confirmation.aspx')
#
# response.css('td.file-link a::attr("href")').extract()

0 comments on commit 52f19f9

Please sign in to comment.