Skip to content

Commit

Permalink
fix and clean finetunning data collection
Browse files Browse the repository at this point in the history
  • Loading branch information
kuronosec committed Mar 25, 2024
1 parent 42e770f commit b5fe5fa
Showing 1 changed file with 8 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
"# Malicious Smart Contract Detection Training Dataset Collection Notebook\n",
"\n",
"This notebook collects smart contract creation bytecode and decompiled opcodes for malicious contract classification. \n",
"Benign contracts are gathered from Zettablock and malicious contracts from [Forta Network's labelled datasets github repo](\"https://github.com/forta-network/labelled-datasets\"). "
"Benign contracts are gathered from Zettablock and malicious contracts from [Forta Network's labelled datasets github repo](\"https://github.com/forta-network/labelled-datasets\").\n",
"\n",
"# Code provided by the Forta project"
]
},
{
Expand Down Expand Up @@ -54,7 +56,7 @@
"metadata": {},
"outputs": [],
"source": [
"ALCHEMY_URL = \"https://eth-mainnet.g.alchemy.com/v2/f8hEqd_nKEpamacB-zXeWPv7R9QmWKPx\"\n",
"ALCHEMY_URL = \"https://eth-mainnet.g.alchemy.com/v2/XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\"\n",
"w3_eth = Web3(Web3.HTTPProvider(ALCHEMY_URL))\n",
"TRACES = {}\n",
"\n",
Expand Down Expand Up @@ -129,7 +131,7 @@
"outputs": [],
"source": [
"def get_malicious_contracts() -> pd.DataFrame:\n",
" data_path = '/data/forta/ethereum/text/malicious_data.pkl'\n",
" data_path = '/data/forta/ethereum/text/finetuning/malicious_data.pkl'\n",
" malicious = None\n",
"\n",
" if os.path.exists(data_path):\n",
Expand Down Expand Up @@ -157,7 +159,7 @@
"outputs": [],
"source": [
"CONTRACT_DATA = {}\n",
"ETHERSCAN_API_KEY=\"SX7737W8M9DDTYHSXSV8XG8G945PMC9D4U\"\n",
"ETHERSCAN_API_KEY=\"XXXXXXXXXXXXXXXX\"\n",
"\n",
"def get_contract_transactions(contracts):\n",
" \"\"\"Get contract transaction info from Etherscan.\"\"\"\n",
Expand Down Expand Up @@ -211,7 +213,7 @@
"'''\n",
"\n",
"def get_benign_contracts() -> pd.DataFrame:\n",
" data_path = '/data/forta/ethereum/text/benign_data.pkl'\n",
" data_path = '/data/forta/ethereum/text/finetuning/malicious_data.pkl'\n",
" benign = None\n",
"\n",
" if os.path.exists(data_path):\n",
Expand Down Expand Up @@ -297,7 +299,7 @@
"metadata": {},
"outputs": [],
"source": [
"dataset.fillna('').to_parquet('/data/forta/ethereum/text/malicious_contract_training_dataset_final.parquet', index=None)"
"dataset.fillna('').to_parquet('/data/forta/ethereum/text/finetuning/malicious_contract_training_dataset_final.parquet', index=None)"
]
},
{
Expand All @@ -309,14 +311,6 @@
"source": [
"dataset['malicious'].value_counts().plot(kind='pie', figsize=(7, 7))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c9e10b1b-61bb-47fc-b608-2bc9f226b7f8",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down

0 comments on commit b5fe5fa

Please sign in to comment.