Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: ethereum/node-crawler
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: main
Choose a base ref
...
head repository: angaz/node-crawler
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
Can’t automatically merge. Don’t worry, you can still create the pull request.
Loading
Showing with 16,143 additions and 22,623 deletions.
  1. +11 −12 .github/workflows/test.yaml
  2. +12 −4 .gitignore
  3. +3 −1 .golangci.yml
  4. +2 −2 Dockerfile
  5. +21 −6 README.md
  6. +57 −118 cmd/crawler/api.go
  7. +195 −0 cmd/crawler/crawler_cmd.go
  8. +0 −131 cmd/crawler/crawlercmd.go
  9. +101 −16 cmd/crawler/flags.go
  10. +54 −0 cmd/crawler/gen_keys_cmd.go
  11. +13 −18 cmd/crawler/main.go
  12. +38 −0 cmd/crawler/migrate.go
  13. +52 −0 cmd/crawler/print_enode.go
  14. +17 −75 cmd/crawler/setup.go
  15. +125 −11 cmd/crawler/utils.go
  16. +27 −17 docker-compose.yml
  17. +125 −41 flake.lock
  18. +349 −63 flake.nix
  19. +0 −1 frontend/.env
  20. +0 −16 frontend/Dockerfile
  21. +0 −1 frontend/clients.json
  22. +0 −1 frontend/deploy.bat
  23. +0 −20 frontend/nginx.conf
  24. +0 −17,652 frontend/package-lock.json
  25. +0 −60 frontend/package.json
  26. BIN frontend/public/android-chrome-192x192.png
  27. BIN frontend/public/android-chrome-512x512.png
  28. BIN frontend/public/apple-touch-icon.png
  29. +0 −9 frontend/public/browserconfig.xml
  30. BIN frontend/public/favicon-16x16.png
  31. BIN frontend/public/favicon-32x32.png
  32. +0 −51 frontend/public/index.html
  33. BIN frontend/public/mstile-144x144.png
  34. BIN frontend/public/mstile-150x150.png
  35. BIN frontend/public/mstile-310x150.png
  36. BIN frontend/public/mstile-310x310.png
  37. BIN frontend/public/mstile-70x70.png
  38. +0 −3 frontend/public/robots.txt
  39. +0 −32 frontend/public/safari-pinned-tab.svg
  40. +0 −19 frontend/public/site.webmanifest
  41. +0 −27 frontend/src/atoms/Card.tsx
  42. +0 −20 frontend/src/atoms/CustomResponsiveContainer.tsx
  43. +0 −26 frontend/src/atoms/Logo.tsx
  44. +0 −29 frontend/src/atoms/TablePlus.tsx
  45. +0 −22 frontend/src/atoms/TooltipCard.tsx
  46. +0 −22 frontend/src/config.ts
  47. +0 −34 frontend/src/data/DataMassager.ts
  48. +0 −202 frontend/src/data/DataProcessor.test.ts
  49. +0 −366 frontend/src/data/DataProcessor.ts
  50. +0 −36 frontend/src/data/FilterRunner.test.ts
  51. +0 −51 frontend/src/data/FilterRunner.ts
  52. +0 −198 frontend/src/data/FilterUtils.ts
  53. +0 −31 frontend/src/data/SortedMap.ts
  54. +0 −16 frontend/src/index.tsx
  55. +0 −30 frontend/src/organisms/ColorModeSwitcher.tsx
  56. +0 −353 frontend/src/organisms/Filtering.tsx
  57. +0 −23 frontend/src/organisms/Footer.tsx
  58. +0 −44 frontend/src/organisms/Header.tsx
  59. +0 −18 frontend/src/organisms/Loader.tsx
  60. +0 −269 frontend/src/pages/Home.tsx
  61. +0 −1 frontend/src/react-app-env.d.ts
  62. +0 −11 frontend/src/setupProxy.js
  63. +0 −5 frontend/src/setupTests.ts
  64. +0 −20 frontend/src/templates/Layout.tsx
  65. +0 −17 frontend/src/templates/Routing.tsx
  66. +0 −64 frontend/src/theme.ts
  67. +0 −26 frontend/tsconfig.json
  68. +141 −99 go.mod
  69. +512 −418 go.sum
  70. +58 −292 pkg/api/api.go
  71. +19 −0 pkg/api/help.go
  72. +99 −0 pkg/api/history.go
  73. +55 −0 pkg/api/nodes.go
  74. +91 −0 pkg/api/nodes_list.go
  75. +219 −0 pkg/api/params.go
  76. +201 −0 pkg/api/portal_stats.go
  77. +385 −0 pkg/api/root.go
  78. +88 −0 pkg/api/snapshots.go
  79. +28 −0 pkg/api/static.go
  80. +0 −141 pkg/apidb/database.go
  81. +8 −8 pkg/common/client_info.go
  82. +375 −0 pkg/common/enr.go
  83. +113 −0 pkg/common/enr_test.go
  84. +52 −0 pkg/common/forkid.go
  85. +169 −0 pkg/common/node_keys.go
  86. +63 −1 pkg/common/nodes.go
  87. +717 −0 pkg/common/parse_client.go
  88. +198 −0 pkg/common/parse_client_test.go
  89. +57 −0 pkg/common/parse_portal_client.go
  90. +50 −0 pkg/common/parse_portal_client_test.go
  91. +56 −0 pkg/common/since.go
  92. +62 −0 pkg/common/since_test.go
  93. +486 −0 pkg/consensus/crawler/crawler.go
  94. +11 −0 pkg/consensus/types/types.go
  95. +133 −0 pkg/consensus/types/types_encoding.go
  96. +0 −363 pkg/crawler/crawl.go
  97. +0 −110 pkg/crawler/enr.go
  98. +0 −201 pkg/crawler/handshake.go
  99. +0 −66 pkg/crawler/helper.go
  100. +0 −56 pkg/crawlerdb/crawlerdb.go
  101. +0 −172 pkg/crawlerdb/db.go
  102. +1,122 −0 pkg/database/api.go
  103. +81 −0 pkg/database/cleaner.go
  104. +344 −0 pkg/database/db.go
  105. +386 −0 pkg/database/disc.go
  106. +518 −0 pkg/database/execution_crawler.go
  107. +30 −0 pkg/database/geoip_daemon.go
  108. +210 −0 pkg/database/history.go
  109. +156 −0 pkg/database/migrate.go
  110. +375 −0 pkg/database/migrations/000_schema.go
  111. +1,058 −0 pkg/database/migrations/001_sqlite_to_pg.go
  112. +131 −0 pkg/database/migrations/002_stats_views.go
  113. +39 −0 pkg/database/migrations/003_geoip.go
  114. +61 −0 pkg/database/migrations/004_portal.go
  115. +117 −0 pkg/database/migrations/005_portal_stats_view.go
  116. +22 −0 pkg/database/migrations/006_message_too_big_error.go
  117. +49 −0 pkg/database/migrations/100_consensus.go
  118. +143 −0 pkg/database/migrations/execution_node_view.go
  119. +247 −0 pkg/database/migrations/geoip.go
  120. +170 −0 pkg/database/migrations/insert_networks.go
  121. +68 −0 pkg/database/migrations/portal_nodes_view.go
  122. +242 −0 pkg/database/migrations/upsert_strings.go
  123. +62 −0 pkg/database/node_list.go
  124. +250 −0 pkg/database/node_table.go
  125. +194 −0 pkg/database/portal.go
  126. +305 −0 pkg/database/portal_stats.go
  127. +59 −0 pkg/database/schema_versions.go
  128. +1 −0 pkg/database/sql/schema.sql
  129. +1 −0 pkg/database/sql/stats.sql
  130. +613 −0 pkg/database/stats.go
  131. +144 −0 pkg/execution/crawler/crawler.go
  132. +48 −0 pkg/execution/disc/bootnodes.go
  133. +281 −0 pkg/execution/disc/daemon.go
  134. +41 −0 pkg/execution/disc/shared_udp_conn.go
  135. +191 −0 pkg/execution/listener/listener.go
  136. +192 −0 pkg/execution/p2p/get_client_info.go
  137. +122 −0 pkg/execution/p2p/handshake.go
  138. +24 −30 pkg/{crawler/connection.go → execution/p2p/messages.go}
  139. +62 −0 pkg/fifomemory/fifo_memory.go
  140. +63 −0 pkg/fifomemory/fifo_memory_test.go
  141. +153 −0 pkg/metrics/metrics.go
  142. +129 −0 pkg/networks/ephemery.go
  143. +165 −0 pkg/networks/forks.go
  144. +37 −0 pkg/networks/genesis.go
  145. +116 −0 pkg/networks/networks.go
  146. +22 −0 pkg/portal/disc/bootnodes.go
  147. +187 −0 pkg/portal/disc/disc.go
  148. +90 −0 pkg/portal/listener/listener.go
  149. +51 −0 pkg/version/version.go
  150. +0 −173 pkg/vparser/vparser.go
  151. +0 −152 pkg/vparser/vparser_test.go
  152. BIN public/blue-marble.png
  153. BIN public/eth-diamond-purple.png
  154. BIN {frontend → }/public/favicon.ico
  155. +1,257 −0 public/node.templ
  156. +48 −0 public/portal.templ
  157. +18 −0 public/static.go
23 changes: 11 additions & 12 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
name: node-crawler workflow


on:
push:
branches:
@@ -16,26 +15,26 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v2
uses: actions/setup-go@v5
with:
go-version: 1.21.4
- name: golangci-lint
uses: golangci/golangci-lint-action@v4
with:
go-version: 1.20.5
- name: Download golangci-lint
run: wget -O- -nv https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s latest
- name: Lint
run: ./bin/golangci-lint run --config .golangci.yml
version: v1.56.2
- name: Vet
run: go vet ./...

test:
runs-on: ubuntu-latest
needs: lint
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v2
uses: actions/setup-go@v5
with:
go-version: 1.20.5
go-version: 1.21.4
- name: Test
run: go test -v ./...
run: go test -v ./...
16 changes: 12 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -16,11 +16,19 @@ build

npm-debug.log*

nodetable
nodes
node-crawler-backend.exe
node-crawler-backend
# application
/data/
/snapshots/
/*.db
/*.db.gz
/*.db-*
node-crawler-backend
node-crawler-backend.exe
*.keys
nodes
nodetable
public/*_templ.go

# nix
result
.data
4 changes: 3 additions & 1 deletion .golangci.yml
Original file line number Diff line number Diff line change
@@ -18,14 +18,16 @@ linters:
- misspell
- unconvert
- typecheck
- exhaustruct
# this repo contains a few copied files from go-ethereum,
# and some of them have unused fields/functions
#- unused
# - unused
- staticcheck
- bidichk
- durationcheck
- exportloopref
- whitespace
- promlinter

# - structcheck # lots of false positives
# - errcheck #lot of false positives
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Compile api
FROM golang:1.20-alpine AS builder
FROM golang:1.21-alpine AS builder
WORKDIR /app

COPY go.mod go.sum ./
@@ -10,7 +10,7 @@ RUN go build ./cmd/crawler


# Copy compiled stuff and run it
FROM golang:1.20-alpine
FROM alpine

COPY --from=builder /app/crawler /app/crawler

27 changes: 21 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -93,9 +93,9 @@ go run ./cmd/crawler
- golang
- sqlite3

##### Country location
##### City location

- `GeoLite2-Country.mmdb` file from [https://dev.maxmind.com/geoip/geolite2-free-geolocation-data?lang=en](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data?lang=en)
- `GeoLite2-City.mmdb` file from [https://dev.maxmind.com/geoip/geolite2-free-geolocation-data?lang=en](https://dev.maxmind.com/geoip/geolite2-free-geolocation-data?lang=en)
- you will have to create an account to get access to this file

#### Development
@@ -119,7 +119,7 @@ go build ./cmd/crawler -o /usr/bin/node-crawler
```

Create a systemd service similarly to above API example. In executed command, override default settings by pointing crawler database to chosen path and setting period to write crawled nodes.
If you want to get the country that a Node is in you have to specify the location the geoIP database as well.
If you want to get the city that a Node is in you have to specify the location the geoIP database as well.

##### No GeoIP

@@ -130,7 +130,7 @@ node-crawler crawl --timeout 10m --crawler-db /path/to/database
##### With GeoIP

```
node-crawler crawl --timeout 10m --crawler /path/to/database --geoipdb GeoLite2-Country.mmdb
node-crawler crawl --timeout 10m --crawler /path/to/database --geoipdb GeoLite2-City.mmdb
```

### Docker setup
@@ -240,19 +240,20 @@ Your example `configuration.nix`:
nodeCrawler = {
enable = true;
hostName = "server hostname";
api.enodePubkey = "asdf1234...";
nginx = {
forceSSL = true;
enableACME = true;
};
};
# Needed for the node crawler to get the country
# Needed for the node crawler to get the city
# of the crawled IP address.
geoipupdate = {
enable = true;
settings = {
EditionIDs = [
"GeoLite2-Country"
"GeoLite2-City"
];
AccountID = account_id;
LicenseKey = "location of licence key on server";
@@ -268,3 +269,17 @@ Your example `configuration.nix`:
};
}
```

### TODO
- [ ] Enums instead of numbers in the URLs
- [ ] More stats
- [ ] Client Versions
- [ ] Countries link to show cities in that country
- [ ] More filters
- [ ] Country/City
- [ ] OS/Arch
- [ ] Custom inputs for Network ID filter
- [ ] Info/help where more details could be useful
- [ ] Expand help page
- [ ] What do the error messages mean, what should the user do for each one?
- [ ] Instructions on how to connect for each client
175 changes: 57 additions & 118 deletions cmd/crawler/api.go
Original file line number Diff line number Diff line change
@@ -1,149 +1,88 @@
package main

import (
"database/sql"
"fmt"
"os"
"net/http"
"sync"
"time"

_ "modernc.org/sqlite"

"github.com/ethereum/go-ethereum/log"
"log/slog"

"github.com/ethereum/node-crawler/pkg/api"
"github.com/ethereum/node-crawler/pkg/apidb"
"github.com/ethereum/node-crawler/pkg/crawlerdb"
"github.com/ethereum/node-crawler/pkg/common"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/urfave/cli/v2"
)

var (
apiCommand = &cli.Command{
Name: "api",
Usage: "API server for the crawler",
Action: startAPI,
Flags: []cli.Flag{
&apiDBFlag,
&apiListenAddrFlag,
&autovacuumFlag,
&busyTimeoutFlag,
&crawlerDBFlag,
&dropNodesTimeFlag,
},
//nolint:exhaustruct
var apiCommand = &cli.Command{
Name: "api",
Usage: "API server for the crawler",
Action: runAPI,
Flags: []cli.Flag{
&apiListenAddrFlag,
&busyTimeoutFlag,
&dropNodesTimeFlag,
&enodeAddrFlag,
&listenStartPortFlag,
&metricsAddressFlag,
&executionNodeKeysFileFlag,
&postgresFlag,
&snapshotDirFlag,
&statsUpdateFrequencyFlag,
},
}

func runAPI(cCtx *cli.Context) error {
db, err := openDBReader(cCtx)
if err != nil {
return fmt.Errorf("open db failed: %w", err)
}
)
defer db.Close()

func startAPI(ctx *cli.Context) error {
autovacuum := ctx.String(autovacuumFlag.Name)
busyTimeout := ctx.Uint64(busyTimeoutFlag.Name)
wg := new(sync.WaitGroup)
wg.Add(1)

crawlerDB, err := openSQLiteDB(
ctx.String(crawlerDBFlag.Name),
autovacuum,
busyTimeout,
)
enodes, err := readEnodes(cCtx)
if err != nil {
return err
return fmt.Errorf("Read enodes failed: %w", err)
}

apiDBPath := ctx.String(apiDBFlag.Name)
shouldInit := false
if _, err := os.Stat(apiDBPath); os.IsNotExist(err) {
shouldInit = true
}
nodeDB, err := openSQLiteDB(
apiDBPath,
autovacuum,
busyTimeout,
// Start the API deamon
api := api.New(
db,
statsUpdateFrequencyFlag.Get(cCtx),
enodes,
snapshotDirFlag.Get(cCtx),
)
go api.StartServer(
wg,
apiListenAddrFlag.Get(cCtx),
)
if err != nil {
return err
}
if shouldInit {
log.Info("DB did not exist, init")
if err := apidb.CreateDB(nodeDB); err != nil {
return err
}
}
var wg sync.WaitGroup
wg.Add(3)

// Start reading deamon
go newNodeDeamon(&wg, crawlerDB, nodeDB)
go dropDeamon(&wg, nodeDB, ctx.Duration(dropNodesTimeFlag.Name))
// Start metrics server
metricsAddr := metricsAddressFlag.Get(cCtx)
slog.Info("starting metrics server", "address", metricsAddr)
http.Handle("/metrics", promhttp.Handler())
http.ListenAndServe(metricsAddr, nil)

// Start the API deamon
apiAddress := ctx.String(apiListenAddrFlag.Name)
apiDeamon := api.New(apiAddress, nodeDB)
go apiDeamon.HandleRequests(&wg)
wg.Wait()

return nil
}

func transferNewNodes(crawlerDB, nodeDB *sql.DB) error {
crawlerDBTx, err := crawlerDB.Begin()
func readEnodes(cCtx *cli.Context) ([]string, error) {
keys, err := common.ReadNodeKeys(executionNodeKeysFileFlag.Get(cCtx))
if err != nil {
// Sometimes error occur trying to read the crawler database, but
// they are normally recoverable, and a lot of the time, it's
// because the database is locked by the crawler.
return fmt.Errorf("error starting transaction to read nodes: %w", err)
return nil, fmt.Errorf("read node keys: %w", err)
}
defer crawlerDBTx.Rollback()

nodes, err := crawlerdb.ReadAndDeleteUnseenNodes(crawlerDBTx)
if err != nil {
// Simiar to nodeDB.Begin() error
return fmt.Errorf("error reading nodes: %w", err)
}

if len(nodes) > 0 {
err := apidb.InsertCrawledNodes(nodeDB, nodes)
if err != nil {
// This shouldn't happen because the database is not shared in this
// instance, so there shouldn't be lock errors, but anything can
// happen. We will still try again.
return fmt.Errorf("error inserting nodes: %w", err)
}
log.Info("Nodes inserted", "len", len(nodes))
}

crawlerDBTx.Commit()
return nil
}

// newNodeDeamon reads new nodes from the crawler and puts them in the db
// Might trigger the invalidation of caches for the api in the future
func newNodeDeamon(wg *sync.WaitGroup, crawlerDB, nodeDB *sql.DB) {
defer wg.Done()

// This is so that we can make some kind of exponential backoff for the
// retries.
retryTimeout := time.Minute

for {
err := transferNewNodes(crawlerDB, nodeDB)
if err != nil {
log.Error("Failure in transferring new nodes", "err", err)
time.Sleep(retryTimeout)
retryTimeout *= 2
continue
}

retryTimeout = time.Minute
time.Sleep(time.Second)
}
}

func dropDeamon(wg *sync.WaitGroup, db *sql.DB, dropTimeout time.Duration) {
defer wg.Done()
ticker := time.NewTicker(10 * time.Minute)
defer ticker.Stop()
enodes := common.KeysToEnodes(
keys,
enodeAddrFlag.Get(cCtx),
listenStartPortFlag.Get(cCtx),
)

for {
<-ticker.C
err := apidb.DropOldNodes(db, dropTimeout)
if err != nil {
panic(err)
}
}
return enodes, nil
}
Loading