Skip to content

Commit

Permalink
Merge pull request #19 from graphsense/feature/exchangerates_cryptoco…
Browse files Browse the repository at this point in the history
…mpare

Feature/exchangerates cryptocompare
  • Loading branch information
soad003 authored Jun 17, 2024
2 parents 2bcc59d + 18f58c5 commit f83ead9
Show file tree
Hide file tree
Showing 2 changed files with 322 additions and 4 deletions.
125 changes: 121 additions & 4 deletions src/graphsenselib/rates/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
from .coinmarketcap import fetch as fetchCMK
from .coinmarketcap import fetch_impl as fetchCMKDump
from .coinmarketcap import ingest as ingestCMK
from .cryptocompare import MIN_START as MS_CC
from .cryptocompare import fetch as fetchCC
from .cryptocompare import fetch_impl as dumpCC
from .cryptocompare import ingest as ingestCC

logger = logging.getLogger(__name__)

Expand All @@ -38,6 +42,8 @@ def inner(function):
min_date = MS_CMK
elif provider == "cdesk":
min_date = MS_CD
elif provider == "cryptocompare":
min_date = MS_CC

function = click.option(
"--fiat-currencies",
Expand Down Expand Up @@ -137,6 +143,12 @@ def coingecko():
pass


@exchange_rates.group()
def cryptocompare():
"""From cryptocompare."""
pass


@coinmarketcap.command("dump")
@require_currency()
@shared_flags()
Expand All @@ -153,7 +165,7 @@ def fetch_cmk_dump(
end_date: str,
out_file: str,
):
"""Safe exchange rates to file.
"""Save exchange rates to file.
\f
Args:
env (str): -
Expand Down Expand Up @@ -184,7 +196,7 @@ def fetch_cmk_dump(

@coingecko.command("dump")
@require_currency()
@shared_flags()
@shared_flags(provider="gecko")
@click.option(
"--out-file",
default="rates.csv",
Expand All @@ -198,7 +210,7 @@ def fetch_coingecko_dump(
end_date: str,
out_file: str,
):
"""Safe exchange rates to file.
"""Save exchange rates to file.
\f
Args:
env (str): -
Expand Down Expand Up @@ -227,6 +239,48 @@ def fetch_coingecko_dump(
df.to_csv(out_file)


@cryptocompare.command("dump")
@require_currency()
@shared_flags(provider="cryptocompare")
@click.option(
"--out-file",
default="rates.csv",
type=str,
help="file to dump into.",
)
def fetch_cryptocompare_dump(
currency: str,
fiat_currencies: list[str],
start_date: str,
end_date: str,
out_file: str,
):
"""Save exchange rates to file.
\f
Args:
currency (str): -
fiat_currencies (list[str]): -
start_date (str): -
end_date (str): -
out_file (str): -
"""
df = dumpCC(
None,
currency,
list(fiat_currencies),
start_date,
end_date,
None,
True,
True,
False,
)
console.rule("Rates Coingecko")
console.print(df)
console.rule(f"Writing to {out_file}")
df.to_csv(out_file)


@coinmarketcap.command("fetch")
@require_environment()
@require_currency()
Expand All @@ -252,7 +306,7 @@ def fetch_cmk(
@coingecko.command("fetch")
@require_environment()
@require_currency()
@shared_flags()
@shared_flags(provider="gecko")
def fetch_gecko(
env: str, currency: str, fiat_currencies: list[str], start_date: str, end_date: str
):
Expand Down Expand Up @@ -292,6 +346,27 @@ def fetch_cd(
console.print(df)


@cryptocompare.command("fetch")
@require_environment()
@require_currency()
@shared_flags(provider="cryptocompare")
def fetch_cc(
env: str, currency: str, fiat_currencies: list[str], start_date: str, end_date: str
):
"""Fetches and prints exchange rates.
\f
Args:
env (str): -
currency (str): -
fiat_currencies (list[str]): -
start_date (str): -
end_date (str): -
"""
df = fetchCC(env, currency, list(fiat_currencies), start_date, end_date)
console.rule("Rates cryptocompare")
console.print(df)


@coinmarketcap.command("ingest")
@require_environment()
@require_currency()
Expand Down Expand Up @@ -420,3 +495,45 @@ def ingest_cd(
dry_run,
abort_on_gaps,
)


@cryptocompare.command("ingest")
@require_environment()
@require_currency()
@shared_flags(provider="cryptocompare")
@shared_ingest_flags()
def ingest_cc(
env,
currency,
fiat_currencies,
start_date,
end_date,
table,
force,
dry_run,
abort_on_gaps,
):
"""Ingests new exchange rates into cassandra raw keyspace.
\f
Args:
env (str): -
currency (str): -
fiat_currencies (list[str]): -
start_date (str): -
end_date (str): -
table (str): -
force (bool): -
dry_run (bool): -
abort_on_gaps (bool): -
"""
ingestCC(
env,
currency,
list(fiat_currencies),
start_date,
end_date,
table,
force,
dry_run,
abort_on_gaps,
)
201 changes: 201 additions & 0 deletions src/graphsenselib/rates/cryptocompare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
# -*- coding: utf-8 -*-
import json
import logging
from datetime import date, datetime
from typing import List, Optional

import pandas as pd
import requests

from graphsenselib.db import DbFactory
from graphsenselib.db.analytics import DATE_FORMAT
from graphsenselib.rates.coingecko import fetch_ecb_rates

logger = logging.getLogger(__name__)

MIN_START = "2010-07-17"


def cryptocompare_historical_url(start: str, end: str, symbol: str, fiat: str):
# https://min-api.cryptocompare.com/documentation?key=Historical&cat=dataHistoday
toDt = datetime.fromisoformat(end)
sDt = datetime.fromisoformat(start)
limit = (toDt - sDt).days + 1
toTS = toDt.timestamp()
# Docs of cryptocompre say max record limit is 2k so if more just load
# the entire dataset
if limit < 2000:
return (
"https://min-api.cryptocompare.com/data/v2/histoday"
f"?fsym={symbol}&tsym={fiat}&toTS={toTS}&limit={limit}"
)
else:
return (
"https://min-api.cryptocompare.com/data/v2/histoday"
f"?fsym={symbol}&tsym={fiat}&allData=true"
)


def fetch_cryptocompare_rates(start: str, end: str, symbol: str, fiat: str):
r1 = requests.get(cryptocompare_historical_url(start, end, symbol, fiat))
rates = pd.DataFrame(json.loads(r1.content)["Data"]["Data"])
rates["date"] = pd.to_datetime(rates["time"], unit="s").dt.floor("D")

# assert rates.date[0] == pd.Timestamp(MIN_START)
assert len(rates.date) == len(set(rates.date))
rates["date_check"] = rates.date.diff()
diffs = rates.date_check.value_counts()
assert len(diffs) == 1
assert diffs.keys().unique()[0] == pd.Timedelta("1 days")

rates.date = rates.date.dt.strftime(DATE_FORMAT)
rates.rename(columns={"close": fiat}, inplace=True)

return rates[["date", fiat]]


def fetch(env, currency, fiat_currencies, start_date, end_date):
with DbFactory().from_config(env, currency) as db:
return fetch_impl(
db,
currency,
fiat_currencies,
start_date,
end_date,
None,
start_date != MIN_START,
False,
True,
)


def fetch_impl(
db: Optional[object],
currency: str,
fiat_currencies: List[str],
start_date: Optional[str],
end_date: Optional[str],
table: Optional[str],
force: bool,
dry_run: bool,
abort_on_gaps: bool,
):
if datetime.fromisoformat(start_date) < datetime.fromisoformat(MIN_START):
start_date = MIN_START

# query most recent data
if not force and db:
logger.info(f"Get last imported rate from {db.raw.get_keyspace()}")
most_recent_date = db.raw.get_last_exchange_rate_date(table=table)
if most_recent_date is not None:
start_date = most_recent_date.strftime(DATE_FORMAT)

logger.info(f"*** Fetch exchange rates for {currency} ***")
logger.info(f"Start date: {start_date}")
logger.info(f"End date: {end_date}")
logger.info(f"Target fiat currencies: {fiat_currencies}")

if datetime.fromisoformat(start_date) > datetime.fromisoformat(end_date):
logger.error("Error: start date after end date.")
raise SystemExit

usd_rates = fetch_cryptocompare_rates(start_date, end_date, currency, "USD")

ecb_rates = fetch_ecb_rates(fiat_currencies)

# query conversion rates and merge converted values in exchange rates
exchange_rates = usd_rates
date_range = pd.date_range(
date.fromisoformat(start_date), date.fromisoformat(end_date)
)
date_range = pd.DataFrame(date_range, columns=["date"])
date_range = date_range["date"].dt.strftime("%Y-%m-%d")

for fiat_currency in set(fiat_currencies) - {"USD"}:
ecb_rate = ecb_rates[["date", fiat_currency]].rename(
columns={fiat_currency: "fx_rate"}
)
merged_df = exchange_rates.merge(ecb_rate, on="date", how="left").merge(
date_range, how="right"
)

# fill gaps over weekends
merged_df["fx_rate"].fillna(method="ffill", inplace=True)
merged_df["fx_rate"].fillna(method="bfill", inplace=True)

if abort_on_gaps and merged_df["fx_rate"].isnull().values.any():
logger.error(
"Error: found missing values for currency "
f"{fiat_currency}, aborting import. Probably a weekend."
)
logger.error(merged_df[merged_df["fx_rate"].isnull()])
if not dry_run:
# in case of dry run let it run
# to see what would have been written to the db
if len(merged_df[merged_df["fx_rate"].isnull()]) > 4:
# if missing more than 4 days, critical error
raise SystemExit(2)
else:
raise SystemExit(15)
merged_df[fiat_currency] = merged_df["USD"] * merged_df["fx_rate"]
merged_df = merged_df[["date", fiat_currency]]
exchange_rates = exchange_rates.merge(merged_df, on="date")

return exchange_rates


def ingest(
env,
currency,
fiat_currencies,
start_date,
end_date,
table,
force,
dry_run,
abort_on_gaps,
):
if dry_run:
logger.warning("This is a Dry-Run. Nothing will be written to the database!")
with DbFactory().from_config(env, currency) as db:
exchange_rates = fetch_impl(
db,
currency,
fiat_currencies,
start_date,
end_date,
table,
force,
dry_run,
abort_on_gaps,
)

if exchange_rates.isna().values.any():
logger.warning("exchange_rates contain NaNs, dropping them now")
exchange_rates.dropna(inplace=True)

# insert final exchange rates into Cassandra
if "USD" not in fiat_currencies:
exchange_rates.drop("USD", axis=1, inplace=True)
exchange_rates["fiat_values"] = exchange_rates.drop("date", axis=1).to_dict(
orient="records"
)
exchange_rates.drop(fiat_currencies, axis=1, inplace=True)

# insert exchange rates into Cassandra table
if not dry_run:
logger.info(f"Writing to keyspace {db.raw.get_keyspace()}")
if len(exchange_rates) > 0:
db.raw.ingest(table, exchange_rates.to_dict("records"))
logger.info(f"Inserted rates for {len(exchange_rates)} days: ")
logger.info(
f"{exchange_rates.iloc[0].date} - {exchange_rates.iloc[-1].date}"
)
else:
logger.info("Nothing to insert.")
else:
logger.info(
"Dry run: No data inserted. "
f"Would have inserted {len(exchange_rates)} days."
)
logger.info(exchange_rates)

0 comments on commit f83ead9

Please sign in to comment.