diff --git a/README.md b/README.md index 0eeb774..9be6f60 100644 --- a/README.md +++ b/README.md @@ -100,25 +100,31 @@ We recommend to use [ClickHouse local](https://clickhouse.com/docs/en/operations # count rows $ clickhouse local -q "SELECT count(*) FROM 'transactions.parquet' LIMIT 1;" -# get the first hash+rawTx +# show hash+rawTx from first entry $ clickhouse local -q "SELECT hash,hex(rawTx) FROM 'transactions.parquet' LIMIT 1;" -# get details of a particular hash +# details of a particular hash $ clickhouse local -q "SELECT timestamp,hash,from,to,hex(rawTx) FROM 'transactions.parquet' WHERE hash='0x152065ad73bcf63f68572f478e2dc6e826f1f434cb488b993e5956e6b7425eed';" -# get exclusive transactions from bloxroute +# all transactions seen from mempoolguru +$ clickhouse local -q "SELECT COUNT(*) FROM 'transactions.parquet' WHERE has(sources, 'mempoolguru');" + +# all transactions that were seen by both mempoolguru and chainbound +$ clickhouse local -q "SELECT COUNT(*) FROM 'transactions.parquet' WHERE hasAll(sources, ['mempoolguru', 'local']);" + +# exclusive transactions from bloxroute $ clickhouse local -q "SELECT COUNT(*) FROM 'transactions.parquet' WHERE length(sources) == 1 AND sources[1] == 'bloxroute';" -# get count of landed vs not-landed exclusive transactions, by source +# count of landed vs not-landed exclusive transactions, by source $ clickhouse local -q "WITH includedBlockTimestamp!=0 as included SELECT sources[1], included, count(included) FROM 'out/out/transactions.parquet' WHERE length(sources) == 1 GROUP BY sources[1], included;" -# get uniswap v2 transactions +# uniswap v2 transactions $ clickhouse local -q "SELECT COUNT(*) FROM 'transactions.parquet' WHERE to='0x7a250d5630b4cf539739df2c5dacb4c659f2488d';" -# get uniswap v2 transactions and separate by included/not-included +# uniswap v2 transactions and separate by included/not-included $ clickhouse local -q "WITH includedBlockTimestamp!=0 as included SELECT included, COUNT(included) FROM 'transactions.parquet' WHERE to='0x7a250d5630b4cf539739df2c5dacb4c659f2488d' GROUP BY included;" -# get inclusion delay for uniswap v2 transactions (time between receiving and being included on-chain) +# inclusion delay for uniswap v2 transactions (time between receiving and being included on-chain) $ clickhouse local -q "WITH inclusionDelayMs/1000 as incdelay SELECT quantiles(0.5, 0.9, 0.99)(incdelay), avg(incdelay) as avg FROM 'transactions.parquet' WHERE to='0x7a250d5630b4cf539739df2c5dacb4c659f2488d' AND includedBlockTimestamp!=0;" # count uniswap v2 contract methods diff --git a/scripts/README.md b/scripts/README.md index 48b3905..77abfc2 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -35,6 +35,9 @@ journalctl -u mempool-collector -o cat --since "10m ago" | grep "source_stats_al # source stats - tx first journalctl -u mempool-collector -o cat --since "1h ago" | grep "source_stats_first" | awk '{ $1=""; $2=""; $3=""; print $0}' | jq + +# count types of traash +cat trash/*.csv | sed 's/,/ /g' | awk '{ print $4 }' | sort | uniq -c ``` CSV tricks diff --git a/scripts/upload.sh b/scripts/upload.sh index d6b642e..237cc35 100755 --- a/scripts/upload.sh +++ b/scripts/upload.sh @@ -74,6 +74,10 @@ zip "${date}.csv.zip" "${date}.csv" gzip -k "${date}.csv" zip "${date}_sourcelog.csv.zip" "${date}_sourcelog.csv" +# combine and zip trash files +cat trash/*.csv > "${date}_trash.csv" +zip "${date}_trash.csv.zip" "${date}_trash.csv" + # upload to Cloudflare R2 and AWS S3 echo "Uploading ${date}.parquet ..." aws s3 cp --no-progress "${date}.parquet" "s3://flashbots-mempool-dumpster/ethereum/mainnet/${ym}/" --endpoint-url "https://${CLOUDFLARE_R2_ACCOUNT_ID}.r2.cloudflarestorage.com" @@ -94,22 +98,6 @@ echo "Uploading ${date}_sourcelog.csv.zip ..." aws s3 cp --no-progress "${date}_sourcelog.csv.zip" "s3://flashbots-mempool-dumpster/ethereum/mainnet/${ym}/" --endpoint-url "https://${CLOUDFLARE_R2_ACCOUNT_ID}.r2.cloudflarestorage.com" aws --profile aws s3 cp --no-progress "${date}_sourcelog.csv.zip" "s3://flashbots-mempool-dumpster/ethereum/mainnet/${ym}/" -# -# # Create analysis -# # -# echo "Creating summary..." -# cd $1 -# /server/mempool-dumpster/build/analyze \ -# --out "${date}_summary.txt" \ -# --input-parquet "${date}.parquet" \ -# --input-sourcelog "${date}_sourcelog.csv.zip" - -# /server/mempool-dumpster/build/analyze test \ -# --tx-blacklist "../${yesterday}/${yesterday}.csv.zip" \ -# --tx-whitelist "${date}.csv.zip" \ -# --out "${date}_summary.txt" \ -# "${date}_sourcelog.csv" - echo "Uploading ${date}_summary.txt ..." aws s3 cp --no-progress "${date}_summary.txt" "s3://flashbots-mempool-dumpster/ethereum/mainnet/${ym}/" --endpoint-url "https://${CLOUDFLARE_R2_ACCOUNT_ID}.r2.cloudflarestorage.com" aws --profile aws s3 cp --no-progress "${date}_summary.txt" "s3://flashbots-mempool-dumpster/ethereum/mainnet/${ym}/" @@ -118,7 +106,7 @@ aws --profile aws s3 cp --no-progress "${date}_summary.txt" "s3://flashbots-memp # CLEANUP # if [ -z ${YES:-} ]; then - read -p "Upload successful. Remove the raw transactions directory? " -n 1 -r + read -p "Upload successful. Remove the raw files and directories? " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]] then @@ -126,7 +114,7 @@ if [ -z ${YES:-} ]; then fi fi -rm -rf transactions sourcelog -rm -rf "${date}_transactions.csv" "${date}.csv" "${date}_sourcelog.csv" +rm -rf "${date}_transactions.csv" "${date}.csv" "${date}_sourcelog.csv" "${date}_trash.csv" +rm -rf transactions sourcelog trash echo "All done!" echo ""