Skip to content

Commit

Permalink
Continued work on custom project lists feature
Browse files Browse the repository at this point in the history
  • Loading branch information
mrthankyou committed Feb 17, 2021
1 parent d4098d3 commit e69003a
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 85 deletions.
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,38 @@ python3 follow_repos_by_search_term_via_code_instances.py <LANGUAGE> <SEARCH_TER
python3 follow_repos_by_search_term.py <LANGUAGE> <SEARCH_TERM>

# Finds top repositories that have a minimum 500 stars and use the provided programming language.
python3 follow_top_repos_by_star_count.py <LANGUAGE>
python3 follow_top_repos_by_star_count.py <LANGUAGE> <CUSTOM_LIST_NAME>(optional)
```

## The Custom Projects Lists Feature
In developing these collection of scripts, we realized that when a user follows thousands of repos in their LGTM account, there is a chance that the LGTM account will break. You won't be able to use the query console and some API
calls will be broken.

To resolve this, we decided to create a feature users can opt-in. This feature called "Custom Projects Lists" does the
following:

- Follows all repos (aka project) in your LGTM account.
- Stores every project you follow in a txt file.
- At a later date (we suggest 24 hours), the user may run a follow-up command that will take the repos followed, add them to a LGTM custom list, and finally unfollow the projects in the user's LGTM account.

Although these steps are tedious, this is the best work-around we've found. We avoid bricking the LGTM account when projects are placed in custom lists. Also, we typically wait 24 hours since if the project is new to LGTM it will want to first process the project and projects being processed can't be added to custom lists.

Finally, by having custom lists we hope that the security researcher will have an easier time picking which repos they want to test.

### How To Run The Custom Projects Lists Feature
In some of the commands above, you will see the <CUSTOM_LIST_NAME> option. This is optional for all
commands. This CUSTOM_LIST_NAME represents the name of a LGTM project list that will be created and used to add projects to. Any projects found from that command will then be added to the LGTM custom list. Let's show an example below to get a better idea of how this works:

1. Run a command passing in the name of the custom list name. The command below will follow Javascript repos and generate a cache file of every repo you follow for the project list called "cool_javascript_projects".

`python3 follow_top_repos_by_star_count.py javascript cool_javascript_projects`

2. Wait 1 - 24 hours.

3. Run the command below. This will take a cached file you created earlier, create a LGTM custom project list, add the projects to that project list, and finally unfollow the repositories in your LGTM account.

`python3 move_repos_to_lgtm_lists.py`

## Legal

The author of this script assumes no liability for your use of this project, including,
Expand Down
Empty file added cache/test.txt
Empty file.
23 changes: 18 additions & 5 deletions follow_top_repos_by_star_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,27 @@

import utils.github_dates
import utils.github_api
import utils.cacher # utils.cacher.write_project_ids_to_file
import sys
import time

def save_project_to_lgtm(site: 'LGTMSite', repo_name: str):
def save_project_to_lgtm(site: 'LGTMSite', repo_name: str) -> dict:
print("Adding: " + repo_name)
# Another throttle. Considering we are sending a request to Github
# owned properties twice in a small time-frame, I would prefer for
# this to be here.
time.sleep(1)

repo_url: str = 'https://github.com/' + repo_name
site.follow_repository(repo_url)
project = site.follow_repository(repo_url)

print("Saved the project: " + repo_name)
return project

def find_and_save_projects_to_lgtm(language: str):
def find_and_save_projects_to_lgtm(language: str, custom_list_name: str) -> List[str]:
github = utils.github_api.create()
site = LGTMSite.create_from_file()
saved_project_ids: List[str] = []

for date_range in utils.github_dates.generate_dates():
repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}')
Expand All @@ -57,7 +61,11 @@ def find_and_save_projects_to_lgtm(language: str):
if repo.archived or repo.fork:
continue

save_project_to_lgtm(site, repo.full_name)
saved_project = save_project_to_lgtm(site, repo.full_name)
saved_project_id = saved_project['realProject'][0]['key']
saved_project_ids.append(saved_project)

return saved_project_ids

if len(sys.argv) < 2:
print("Please provide a language you want to search")
Expand All @@ -66,4 +74,9 @@ def find_and_save_projects_to_lgtm(language: str):
language = sys.argv[1].capitalize()

print('Following the top repos for %s' % language)
find_and_save_projects_to_lgtm(language)
saved_project_ids = find_and_save_projects_to_lgtm(language)

# If the user provided a second arg then they want to create a custom list.
if len(sys.argv) < 3:
custom_list_name = sys.argv[2]
utils.cacher.write_project_ids_to_file(saved_project_ids, custom_list_name)
80 changes: 21 additions & 59 deletions move_repos_to_lgtm_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,70 +9,32 @@

import sys
import time
import os

cached_files = os.listdir("cache")
site = LGTMSite.create_from_file()

file_name = "test.txt"
project_list_name = file_name.split(".")[0]
for cached_file in cached_files:
# This is dirty. Is there an easier way to do this?
cached_file = "cache/" + cached_file

# We want to find or create a project list based on the the name of
# the text file that holds all of the projects we are currently following.
project_list_data = site.get_or_create_project_list(project_list_name)
project_list_id = project_list_data['realProject'][0]['key']
file = open(file_name, "r")
project_list_name = cached_file.split(".")[0]

project_ids = file.read()
# With the project list id and the project ids, we now want to save the repos
# we currently follow to the project list
site.load_into_project_list(project_list_id, project_ids)
# We want to find or create a project list based on the the name of
# the text file that holds all of the projects we are currently following.
project_list_data = site.get_or_create_project_list(project_list_name)
project_list_id = project_list_data['realProject'][0]['key']
file = open("cache/" + cached_file, "r")

for project_id in project_ids:
print(project_id)
# The last thing we need to do is tidy up and unfollow all the repositories
# we just added to our project list.
site.unfollow_repository_by_id(project_id)
project_ids = file.read()
# With the project list id and the project ids, we now want to save the repos
# we currently follow to the project list
site.load_into_project_list(project_list_id, project_ids)

for project_id in project_ids:
print(project_id)
# The last thing we need to do is tidy up and unfollow all the repositories
# we just added to our project list.
site.unfollow_repository_by_id(project_id)

# lgtm methods we need to use
# get_or_create_project_list
# unfollow_repository_by_id
# load_into_project_list


#
# def save_project_to_lgtm(site: 'LGTMSite', repo_name: str):
# print("Adding: " + repo_name)
# # Another throttle. Considering we are sending a request to Github
# # owned properties twice in a small time-frame, I would prefer for
# # this to be here.
# time.sleep(1)
#
# repo_url: str = 'https://github.com/' + repo_name
# site.follow_repository(repo_url)
# print("Saved the project: " + repo_name)
#
# def find_and_save_projects_to_lgtm(language: str):
# github = utils.github_api.create()
# site = LGTMSite.create_from_file()
#
# for date_range in utils.github_dates.generate_dates():
# repos = github.search_repositories(query=f'stars:>500 created:{date_range} fork:false sort:stars language:{language}')
#
# for repo in repos:
# # Github has rate limiting in place hence why we add a sleep here. More info can be found here:
# # https://docs.github.com/rest/overview/resources-in-the-rest-api#rate-limiting
# time.sleep(1)
#
# if repo.archived or repo.fork:
# continue
#
# save_project_to_lgtm(site, repo.full_name)
#
# if len(sys.argv) < 2:
# print("Please provide a language you want to search")
# exit
#
# language = sys.argv[1].capitalize()
#
# print('Following the top repos for %s' % language)
# find_and_save_projects_to_lgtm(language)
os.remove(cached_file)
52 changes: 32 additions & 20 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,36 @@
from lgtm import LGTMSite
lgtm_site = LGTMSite.create_from_file()
from typing import List
import sys
import os

repo_url: str = 'https://github.com/google/jax'
print(os.listdir("cache"))

result = lgtm_site.follow_repository(repo_url)
print("1111111111")
print("1111111111")
print("1111111111")
print("1111111111")
print("1111111111")
project_id = result['realProject'][0]['key']
print(project_id)

print("1111111111")
print("1111111111")
print("1111111111")
print("1111111111")
project_list_id = lgtm_site.get_or_create_project_list("test_project_16")
print(project_list_id)
#
# projects: List[str] = []
#
# print(sys.argv)

lgtm_site.load_into_project_list(project_list_id, [project_id])

lgtm_site.unfollow_repository_by_id(project_id)
# from lgtm import LGTMSite
# lgtm_site = LGTMSite.create_from_file()
#
# repo_url: str = 'https://github.com/google/jax'
#
# result = lgtm_site.follow_repository(repo_url)
# print("1111111111")
# print("1111111111")
# print("1111111111")
# print("1111111111")
# print("1111111111")
# project_id = result['realProject'][0]['key']
# print(project_id)
#
# print("1111111111")
# print("1111111111")
# print("1111111111")
# print("1111111111")
# project_list_id = lgtm_site.get_or_create_project_list("test_project_16")
# print(project_list_id)
#
# lgtm_site.load_into_project_list(project_list_id, [project_id])
#
# lgtm_site.unfollow_repository_by_id(project_id)
7 changes: 7 additions & 0 deletions utils/cacher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from typing import List

def write_project_ids_to_file(project_ids: List[str], file_name: str):
file = open("cache/" + file_name + ".txt", "a")
for project_id in project_ids:
file.write(project_id + "\n")
file.close()

0 comments on commit e69003a

Please sign in to comment.