-
-
Notifications
You must be signed in to change notification settings - Fork 572
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add proof of concept for Python3 remoting with execnet.
See ticket #1400 for more details This is an example of how to call Scancode as a function from Python2 or Python3. The benefits are that when the server process has loaded the license index, and imported its modules there is no per-call import/loading penalty anymore. This is using execnet which is the multiprocessing library used by py.test and therefore a rather stable and high quality engine. Signed-off-by: Philippe Ombredanne <[email protected]>
- Loading branch information
1 parent
bc99c74
commit 8afa686
Showing
3 changed files
with
158 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# | ||
# Copyright (c) 2019 nexB Inc. and others. All rights reserved. | ||
# http://nexb.com and https://github.com/nexB/scancode-toolkit/ | ||
# The ScanCode software is licensed under the Apache License version 2.0. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
from __future__ import unicode_literals | ||
|
||
import json | ||
from os.path import abspath | ||
from os.path import dirname | ||
from os.path import join | ||
from os.path import normpath | ||
|
||
import execnet | ||
|
||
import scanserv | ||
|
||
""" | ||
This is a module designed to be called from Python 2 or 3 and is the client | ||
side. See scanserv for the back server module that runs on Python 2 and runs | ||
effectively scancode. | ||
""" | ||
|
||
|
||
def scan(locations, deserialize=False, scancode_root_dir=None): | ||
""" | ||
Scan the list of paths at `location` and return the results as an iterable | ||
of JSON strings. If `deserialize` is True the iterable contains a python data | ||
instead. | ||
Each location is scanned independently. | ||
""" | ||
if not scancode_root_dir: | ||
scancode_root_dir = abspath(normpath(__file__)) | ||
scancode_root_dir = dirname(dirname(dirname(scancode_root_dir))) | ||
python2 = join(scancode_root_dir, 'bin', 'python') | ||
spec = 'popen//python={python2}'.format(**locals()) | ||
gateway = execnet.makegateway(spec) # NOQA | ||
channel = gateway.remote_exec(scanserv) | ||
|
||
for location in locations: | ||
# build a mapping of options to use for this scan | ||
scan_kwargs = dict( | ||
location=location, | ||
license=True, | ||
license_text=True, | ||
license_diag=True, | ||
copyright=True, | ||
info=True, | ||
processes=0, | ||
) | ||
|
||
channel.send(scan_kwargs) # execute func-call remotely | ||
results = channel.receive() | ||
if deserialize: | ||
results = json.loads(results) | ||
yield results | ||
|
||
|
||
if __name__ == '__main__': | ||
import sys # NOQA | ||
args = sys.argv[1:] | ||
for s in scan(args): | ||
print(s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
A simple proof of concept for Python3 remoting with execnet. | ||
|
||
See ticket #1400 for more. | ||
|
||
This is an example of how to call Scancode as a function from Python2 or Python3. | ||
The benefits are that when the server process has loaded the license index, | ||
and imported its modules there is no per-call import/loading penalty anymore. | ||
|
||
This is using execnet which is the multiprocessing library used by | ||
py.test and therefore a rather stable and high quality engine. | ||
|
||
To test, do this:: | ||
|
||
1. checkout scancode and run ./configure in a first shell. This is for a plain | ||
ScanCode using Python 2 that will be used as a "server". | ||
|
||
2. in another shell, create a virtualenv with Python 3 in another | ||
location. Activate that venv, and `pip install simplejson execnet` | ||
|
||
3. Change dir to the install scancode-toolkit/etc/scripts where the scancli.py | ||
and scancserv.py scripts are. Then run:: | ||
|
||
python3 scancli.py ../../NOTICE ../../setup.py | ||
|
||
This will effectively make remote functions calls to the Python2 | ||
scancode and gets the result in Python3 alright. It also allows to have | ||
multiple calls that reuse the same process, hence amortizing any startup | ||
costs. Here this will run two scans: one on NOTICE and another on setup.py. | ||
It could have been directories too. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
# | ||
# Copyright (c) 2019 nexB Inc. and others. All rights reserved. | ||
# http://nexb.com and https://github.com/nexB/scancode-toolkit/ | ||
# The ScanCode software is licensed under the Apache License version 2.0. | ||
# ScanCode is a trademark of nexB Inc. | ||
# | ||
# You may not use this software except in compliance with the License. | ||
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 | ||
# Unless required by applicable law or agreed to in writing, software distributed | ||
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR | ||
# CONDITIONS OF ANY KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations under the License. | ||
|
||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
from __future__ import unicode_literals | ||
|
||
""" | ||
Python2 "server side" of the scan server. In a given execnet session, this | ||
process will hold a loaded license index and can be invoked multiple times | ||
without the index load penalty on each call. | ||
""" | ||
|
||
|
||
def as_json(results, pretty=True): | ||
""" | ||
Return a JSON string from a `results` data structuret. | ||
""" | ||
# this is used for its ability to handle iterables as arrays. | ||
import simplejson | ||
|
||
kwargs = dict(iterable_as_array=True, encoding='utf-8') | ||
if pretty: | ||
kwargs.update(dict(indent=2 * b' ')) | ||
else: | ||
kwargs.update(dict(separators=(b',', b':',))) | ||
return simplejson.dumps(results, **kwargs) + b'\n' | ||
|
||
|
||
def run_scan(location, **kwargs): | ||
from scancode import cli | ||
pretty = kwargs.pop('pretty', True) | ||
return as_json(cli.run_scan(location, **kwargs), pretty=pretty) | ||
|
||
|
||
if __name__ == '__channelexec__': | ||
for kwargs in channel: # NOQA | ||
# a mapping of kwargs or a location string | ||
if isinstance(kwargs, (str, unicode)): | ||
channel.send(run_scan(kwargs)) # NOQA | ||
elif isinstance(kwargs, dict): | ||
channel.send(run_scan(**kwargs)) # NOQA | ||
else: | ||
raise Exception('Unknown arguments type: ' + repr(kwargs)) |