From c8c5149622fdd5d58300ebefcc52e21e714de1e9 Mon Sep 17 00:00:00 2001
From: m8r0wn <m8r0wn@protonmail.com>
Date: Thu, 15 Jun 2023 15:51:48 -0400
Subject: [PATCH] updates & fix issue #20

---
 README.md               | 99 +++++++++++++++++++++++++++++------------
 crosslinked/__init__.py | 43 +++++++++++-------
 crosslinked/logger.py   |  7 +--
 crosslinked/search.py   | 50 ++++++++++-----------
 requirements.txt        | 20 ++++-----
 setup.py                | 10 ++---
 6 files changed, 139 insertions(+), 90 deletions(-)
diff --git a/README.md b/README.md
index 11b9cd3..c2fc424 100644
--- a/README.md
+++ b/README.md
@@ -1,22 +1,43 @@
-# CrossLinked
+<div align="center">
+    <h1>CrossLinked</h1>
+</div>
+
 <p align="center">
-    <img src="https://img.shields.io/badge/License-GPL%20v3.0-green"/>&nbsp;
+    <img src="https://img.shields.io/badge/Version-v0.3.0-green"/>&nbsp;
     <a href="https://www.twitter.com/m8sec">
         <img src="https://img.shields.io/badge/Twitter-@m8sec-gray?logo=twitter"/>
     </a>&nbsp;
-    <img src="https://img.shields.io/badge/python-3.6%20|%203.7%20|%203.8%20|%203.9%20-blue.svg"/>&nbsp;
- </p>
+    <img src="https://img.shields.io/badge/python-3.6%20|%203.7%20|%203.8%20|%203.9%20|%203.10%20-blue.svg"/>&nbsp;
+</p>
+
 
 CrossLinked is a LinkedIn enumeration tool that uses search engine scraping to collect valid employee names from an 
 organization. This technique provides accurate results without the use of API keys, credentials, or accessing 
 LinkedIn directly!
 
-## Sponsors
-<br>
 
+## Table of Contents
+- [Install](#install)
+- [Prerequisites](#prerequisites)
+    + [Naming Format](#naming-format)
+    + [Advanced Formatting](#advanced-formatting)
+- [Search](#search)
+  * [Example Usage](#example-usage)
+  * [Screenshots](#screenshots)
+- [Parse](#parse)
+  * [Example Usage](#example-usage-1)
+  * [Screenshots](#screenshots-1)
+- [Additional Options](#additional-options)
+  * [Proxy Rotation](#proxy-rotation)
+- [Command-Line Arguments](#command-line-arguments)
+- [Contribute](#contribute)
+
+
+# Sponsors
 > 🚩 Consider sponsoring this project to ensure the latest improvements, have your company logo listed here, and get priority support - visit [github.com/sponsors/m8sec](https://github.com/sponsors/m8sec)
 
-## Install
+
+# Install
 Install the last stable release from PyPi:
 ```commandline
 pip3 install crosslinked
@@ -29,61 +50,84 @@ python3 setup.py install
 ```
 
 
-## Prerequisite
-CrossLinked assumes the organization's account naming convention has already been identified. This is required for execution and should be added to the CMD args based on your expected output. See the `Naming Format` and `Example Usage` sections below:
+# Prerequisites
+CrossLinked assumes the organization's account naming convention has already been identified. This is required for execution and should be added to the CMD args based on your expected output. See the [Naming Format](#naming-format) and [Example Usage](#example-usage) sections below:
 
 ### Naming Format
 ```text
-{f}.{last}              = j.smith
 {first.{last}           = john.smith
 CMP\{first}{l}          = CMP\johns
 {f}{last}@company.com   = jsmith@company.com
 ```
 
-> ***Still Stuck?** Metadata is always a good place to check for hidden information such as account naming convention. see [PyMeta](https://github.com/m8sec/pymeta) for more.*
+> 🦖 ***Still Stuck?** Metadata is always a good place to check for hidden information such as account naming convention. see [PyMeta](https://github.com/m8sec/pymeta) for more.*
+<br>
+
+
+### Advanced Formatting
+:boom: **New Feature** :boom:
 
+To be compatible with alternate naming conventions CrossLinked allows users to control the index position of the name extracted from search text. Should the name not be long enough, or errors encountered with the search string, CrossLinked will revert back to its default format.
 
-## Search
+***Note**: the search string array starts at `0`. Negative numbers can also be used to count backwards from the last value.*
+
+```
+# Default output
+crosslinked -f '{first}.{last}@company.com' Company
+John David Smith = john.smith@company.com
+
+# Use the second-to-last name as "last"
+crosslinked -f '{0:first}.{-2:last}@company.com' Company
+John David Smith    = john.david@company.com
+Jane Doe            = jane.doe@company.com
+
+# Use the second item in the array as "last"
+crosslinked -f '{first}.{1:last}@company.com' Company
+John David Smith    = john.david@company.com
+Jane Doe            = jane.doe@company.com
+```
+
+
+# Search
 By default, CrossLinked will use `google` and `bing` search engines to identify employees of the target organization. After execution, two files (`names.txt` & `names.csv`) will appear in the current directory, unless modified in the CMD args.
 
 * *names.txt* - List of unique user accounts in the specified format.
 * *names.csv* - Raw search data. See the `Parse` section below for more.
 
 
-### Example Usage
+## Example Usage
 ```bash
 python3 crosslinked.py -f '{first}.{last}@domain.com' company_name
 ```
 
+
 ```bash
 python3 crosslinked.py -f 'domain\{f}{last}' -t 15 -j 2 company_name
 ```
-
-> ***Note:** For best results, use the company name as it appears on LinkedIn `"Target Company"` not the domain name.*
+> ⚠️ For best results, use the company name as it appears on LinkedIn `"Target Company"` not the domain name.
 
 
-### Screenshots
+## Screenshots
 ![](https://user-images.githubusercontent.com/13889819/190488899-0f4bea2d-6c31-422f-adce-b56f7be3d906.png)
 
 
-## Parse
-:boom: **New Feature** :boom:
-
+# Parse
 *Account naming convention changed after execution and now your hitting CAPTCHA requests? No Problem!*
 
-CrossLinked v0.2.0 now includes a `names.csv` output file, which stores all scraping data including: `first name`, `last name`, `job title`, and `url`. This can be ingested and parsed to reformat user accounts as needed.
+CrossLinked includes a `names.csv` output file, which stores all scraping data including: `name`, `job title`, and `url`. This can be ingested and parsed to reformat user accounts as needed.
 
-### Example Usage
+
+## Example Usage
 ```
 python3 crosslinked.py -f '{f}{last}@domain.com' names.csv
 ```
 
-### Screenshots
+## Screenshots
 ![](https://user-images.githubusercontent.com/13889819/190494309-c6da8cdc-4312-4e53-a0bb-1fffbc9698e4.png)
 
 
-## Additional Options
-### Proxy Rotation
+# Additional Options
+## Proxy Rotation
 The latest version of CrossLinked provides proxy support to rotate source addresses. Users can input a single proxy with `--proxy 127.0.0.1:8080` or use multiple via `--proxy-file proxies.txt`.
 
 
@@ -95,11 +139,10 @@ socks5://222.222.222.222
 
 > python3 crosslinked.py --proxy-file proxies.txt -f '{first}.{last}@company.com' -t 10 "Company"
 ```
-
-> ***Note:** `HTTP/S` proxies can be added by IP:Port notation. However, socks proxies will require a `socks4://` or `socks5://` prefix.*
+> ⚠️ `HTTP/S` proxies can be added by IP:Port notation. However, socks proxies will require a `socks4://` or `socks5://` prefix.*
 
 
-### Usage
+# Command-Line Arguments
 ```
 positional arguments:
   company_name        Target company name
@@ -122,7 +165,7 @@ Proxy arguments:
 ```
 
 
-## Contribute
+# Contribute
 Contribute to the project by:
 * Like and share the tool!
 * Create an issue to report any problems or, better yet, initiate a PR.
diff --git a/crosslinked/__init__.py b/crosslinked/__init__.py
index f8c1428..644b86e 100644
--- a/crosslinked/__init__.py
+++ b/crosslinked/__init__.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # Author: @m8sec
 # License: GPLv3
-
+import re
 import argparse
 from sys import exit
 from csv import reader
@@ -12,7 +12,7 @@
 
 def banner():
 
-    VERSION = 'v0.2.1'
+    VERSION = 'v0.3.0'
 
     print('''
      _____                    _             _            _ 
@@ -64,11 +64,12 @@ def start_parse(args):
     utils.file_exists(args.company_name, contents=False)
     Log.info('Parsing employee names from \"{}\"'.format(args.company_name))
 
-    with open(args.company_name) as f:
+    with open(args.company_name, 'r') as f:
         csv_data = reader(f, delimiter=',')
         next(csv_data)
         for r in csv_data:
-            tmp.append({'fname': r[2], 'lname': r[3]})
+            print(r)
+            tmp.append({'name': r[2].strip()}) if r[2] else False
     return tmp
 
 
@@ -77,23 +78,35 @@ def format_names(args, data, logger):
     Log.info('{} names collected'.format(len(data)))
 
     for d in data:
-        fname = d['fname'].lower().strip()
-        lname = d['lname'].lower().strip()
-        name = nformatter(args.nformat, fname, lname)
+        name = nformatter(args.nformat, d['name'])
         if name not in tmp:
             logger.info(name)
             tmp.append(name)
     Log.success("{} unique names added to {}!".format(len(tmp), args.outfile+".txt"))
 
 
-def nformatter(nformat, first, last):
-    # place names in user-defined format
-    name = nformat
-    name = name.replace('{f}', first[0])
-    name = name.replace('{first}', first)
-    name = name.replace('{l}', last[0])
-    name = name.replace('{last}', last)
-    return name
+def nformatter(nformat, name):
+    # Get position of name values in text
+    tmp = nformat.split('}')
+    f_position = int(re.search(r'(-?\d+)', tmp[0]).group(0)) if ':' in tmp[0] else 0
+    l_position = int(re.search(r'(-?\d+)', tmp[1]).group(0)) if ':' in tmp[1] else -1
+
+    # Extract names from raw text
+    tmp = name.split(' ')
+    try:
+        f_name = tmp[f_position] if len(tmp) > 2 else tmp[0]
+        l_name = tmp[l_position] if len(tmp) > 2 else tmp[-1]
+    except:
+        f_name = tmp[0]
+        l_name = tmp[-1]
+
+    # Use replace function to create final output
+    val = re.sub(r'-?\d+:', '', nformat)
+    val = val.replace('{f}', f_name[0])
+    val = val.replace('{first}', f_name)
+    val = val.replace('{l}', l_name[0])
+    val = val.replace('{last}', l_name)
+    return val
 
 
 def main():
diff --git a/crosslinked/logger.py b/crosslinked/logger.py
index c6d92c0..3c78755 100644
--- a/crosslinked/logger.py
+++ b/crosslinked/logger.py
@@ -59,8 +59,6 @@ def setup_debug_logger():
 
 
 def setup_file_logger(file_name, log_name='cLinked_file', file_mode='w'):
-    init = False if os.path.exists(file_name) else True
-
     formatter = logging.Formatter('%(message)s')
     fileHandler = logging.FileHandler(file_name, file_mode)
     fileHandler.setFormatter(formatter)
@@ -70,14 +68,13 @@ def setup_file_logger(file_name, log_name='cLinked_file', file_mode='w'):
     logger.addHandler(fileHandler)
     logger.setLevel(logging.INFO)
 
-    first_run(logger) if init else False
-
+    first_run(logger) if not os.path.exists(file_name) else False
     return logger
 
 
 def first_run(logger):
     # init headings in CSV log file
-    logger.info('Datetime, Search, First, Last, Title, URL, rawText')
+    logger.info('Datetime, Search, Name, Title, URL, rawText')
 
 
 def setup_cli_logger(log_level=logging.INFO, logger_name='cLinked'):
diff --git a/crosslinked/search.py b/crosslinked/search.py
index 893c19c..a8dc32d 100644
--- a/crosslinked/search.py
+++ b/crosslinked/search.py
@@ -88,8 +88,7 @@ def link_parser(self, url, link):
         u = {'url': url}
         u['text'] = unidecode(link.text.split("|")[0].split("...")[0])  # Capture link text before trailing chars
         u['title'] = self.parse_linkedin_title(u['text'])               # Extract job title
-        u['fname'] = self.parse_linkedin_fname(u['text'])               # Extract first name
-        u['lname'] = self.parse_linkedin_lname(u['text'])               # Extract last name
+        u['name'] = self.parse_linkedin_name(u['text'])                 # Extract whole name
         return u
 
     def parse_linkedin_title(self, data):
@@ -99,19 +98,10 @@ def parse_linkedin_title(self, data):
         except:
             return 'N/A'
 
-    def parse_linkedin_fname(self, data):
+    def parse_linkedin_name(self, data):
         try:
-            fname = data.split("-")[0].split(' ')[0].strip()
-            fname = fname.replace("'", "")
-            return unidecode(fname)
-        except:
-            return False
-
-    def parse_linkedin_lname(self, data):
-        try:
-            name = list(filter(None, data.split("-")[0].split(' ')))
-            lname = name[-1].strip()
-            return unidecode(lname[:-1]) if lname.endswith(".") else unidecode(lname)
+            name = data.split("-")[0].strip()
+            return unidecode(name)
         except:
             return False
 
@@ -124,17 +114,20 @@ def results_handler(self, link):
             return False
 
         data = self.link_parser(url, link)
-        if data['fname'] and data['lname']:
-            self.log_results(data)
+        self.log_results(data) if data['name'] else False
+
 
-    def log_results(self, data):
-        if data in self.results:
+    def log_results(self, d):
+        # Prevent Duplicates & non-standard responses (i.e: "<span>linkedin.com</span></a>")
+        if d in self.results:
             return
-        self.results.append(data)
+        elif 'linkedin.com' in d['name']:
+            return
+
+        self.results.append(d)
         # Search results are logged to names.csv but names.txt is not generated until end to prevent duplicates
-        logging.debug('  Fname: {:13} Lname: {:13} RawTxt: {}'.format(data['fname'], data['lname'], data['text']))
-        csv.info('"{}","{}","{}","{}","{}","{}","{}",'.format(self.runtime, self.search_engine, data['fname'],
-                                                           data['lname'], data['title'], data['url'], data['text']))
+        logging.debug('name: {:25} RawTxt: {}'.format(d['name'], d['text']))
+        csv.info('"{}","{}","{}","{}","{}","{}",'.format(self.runtime, self.search_engine, d['name'], d['title'], d['url'], d['text']))
 
 
 def get_statuscode(resp):
@@ -151,12 +144,15 @@ def get_proxy(proxies):
 
 def get_agent():
     return choice([
-        'Mozilla/5.0 (Macintosh; Intel Mac OS X 12.5; rv:104.0) Gecko/20100101 Firefox/104.0',
-        'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
-        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
-        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
-        'Mozilla/5.0 (Macintosh; Intel Mac OS X 12_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36',
         'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:104.0) Gecko/20100101 Firefox/104.0'
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 12.5; rv:104.0) Gecko/20100101 Firefox/104.0',
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15'
     ])
 
 
diff --git a/requirements.txt b/requirements.txt
index 131c771..f9b450b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
-beautifulsoup4>=4.11.1
-bs4>=0.0.1
-certifi>=2022.9.14
-charset-normalizer>=2.1.1
-idna>=3.4
-lxml>=4.9.1
-requests>=2.28.1
-soupsieve>=2.3.2.post1
-Unidecode>=1.3.4
-urllib3>=1.26.12
\ No newline at end of file
+beautifulsoup4
+bs4
+certifi
+charset-normalizer
+idna
+lxml
+requests
+soupsieve
+Unidecode
+urllib3
\ No newline at end of file
diff --git a/setup.py b/setup.py
index a2a54ff..99aa23f 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='crosslinked',
-    version='0.2.1',
+    version='0.3.0',
     author='m8sec',
     license='GPLv3',
     long_description=long_description,
@@ -15,10 +15,10 @@
         "crosslinked", "crosslinked.*"
     ]),
     install_requires=[
-        'bs4>=0.0.1',
-        'lxml>=4.9.1',
-        'requests>=2.28.1',
-        'Unidecode>=1.3.4'
+        'bs4',
+        'lxml',
+        'requests',
+        'Unidecode'
     ],
     classifiers=[
         "Environment :: Console",