-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 52f19f9
Showing
2 changed files
with
172 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
# Created by .ignore support plugin (hsz.mobi) | ||
### Windows template | ||
# Windows image file caches | ||
Thumbs.db | ||
ehthumbs.db | ||
|
||
# Folder config file | ||
Desktop.ini | ||
|
||
# Recycle Bin used on file shares | ||
$RECYCLE.BIN/ | ||
|
||
# Windows Installer files | ||
*.cab | ||
*.msi | ||
*.msm | ||
*.msp | ||
|
||
# Windows shortcuts | ||
*.lnk | ||
### Python template | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*,cover | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
### VirtualEnv template | ||
# Virtualenv | ||
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/ | ||
.Python | ||
[Bb]in | ||
[Ii]nclude | ||
[Ll]ib | ||
[Ss]cripts | ||
pyvenv.cfg | ||
pip-selfcheck.json | ||
### JetBrains template | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio | ||
|
||
*.iml | ||
|
||
## Directory-based project format: | ||
.idea/ | ||
# if you remove the above rule, at least ignore the following: | ||
|
||
# User-specific stuff: | ||
# .idea/workspace.xml | ||
# .idea/tasks.xml | ||
# .idea/dictionaries | ||
|
||
# Sensitive or high-churn files: | ||
# .idea/dataSources.ids | ||
# .idea/dataSources.xml | ||
# .idea/sqlDataSources.xml | ||
# .idea/dynamic.xml | ||
# .idea/uiDesigner.xml | ||
|
||
# Gradle: | ||
# .idea/gradle.xml | ||
# .idea/libraries | ||
|
||
# Mongo Explorer plugin: | ||
# .idea/mongoSettings.xml | ||
|
||
## File-based project format: | ||
*.ipr | ||
*.iws | ||
|
||
## Plugin-specific files: | ||
|
||
# IntelliJ | ||
/out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
### IPythonNotebook template | ||
# Temporary data | ||
.ipynb_checkpoints/ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import scrapy | ||
|
||
# From Monthly Update | ||
# print set(response.css('td[data-th="Bulletin ID"] a::attr("href")').extract()) | ||
|
||
class BulletinSpider(scrapy.Spider): | ||
name = 'BulletinSpider' | ||
start_urls = ['https://technet.microsoft.com/library/security/ms13-095'] | ||
|
||
def parse(self, response): | ||
download_pages = {x for x in response.css('td a::attr("href")').extract() if 'familyid' in x.lower()} | ||
for download_page in download_pages: | ||
yield scrapy.Request(response.urljoin(download_page), self.resolve_download_page) | ||
|
||
def resolve_download_page(self, response): | ||
yield scrapy.Request(response.urljoin(response.url.replace('details.aspx','confirmation.aspx')), self.download_updates) | ||
|
||
def download_updates(self, response): | ||
print response.css('td.file-link a::attr("href")').extract() | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# | ||
# {x for x in response.css('td a::attr("href")').extract() if 'familyid' in x.lower()} | ||
# | ||
# response.url.replace('details.aspx','confirmation.aspx') | ||
# | ||
# response.css('td.file-link a::attr("href")').extract() |