From 70243657a70b3accb78de07139e05e1e3d344819 Mon Sep 17 00:00:00 2001 From: ValueRaider Date: Wed, 18 Dec 2024 21:07:30 +0000 Subject: [PATCH] Earnings dates: fix time-shift, remove 'add_cokies' --- yfinance/base.py | 60 ++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/yfinance/base.py b/yfinance/base.py index b258035f..3bca8471 100644 --- a/yfinance/base.py +++ b/yfinance/base.py @@ -29,6 +29,7 @@ import pandas as pd import requests +from datetime import date from . import utils, cache from .data import YfData @@ -41,7 +42,7 @@ from .scrapers.funds import FundsData from .const import _BASE_URL_, _ROOT_URL_ -from datetime import date + class TickerBase: def __init__(self, ticker, session=None, proxy=None): @@ -557,7 +558,7 @@ def get_news(self, proxy=None) -> list: return self._news @utils.log_indent_decorator - def get_earnings_dates(self, limit=12, proxy=None, add_cookies=None) -> Optional[pd.DataFrame]: + def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]: """ Get earning dates (future and historic) @@ -566,7 +567,6 @@ def get_earnings_dates(self, limit=12, proxy=None, add_cookies=None) -> Optional Default value 12 should return next 4 quarters and last 8 quarters. Increase if more history is needed. proxy: requests proxy to use. - add_cookies: additional cookies to use Returns: pd.DataFrame @@ -580,7 +580,7 @@ def get_earnings_dates(self, limit=12, proxy=None, add_cookies=None) -> Optional page_offset = 0 dates = None while True: - url = f"{_ROOT_URL_}/calendar/earnings?day={date.today()}&symbol={self.ticker}&offset={page_offset}&size={page_size}" # Fixed + url = f"{_ROOT_URL_}/calendar/earnings?day={date.today()}&symbol={self.ticker}&offset={page_offset}&size={page_size}" data = self._data.cache_get(url=url, proxy=proxy).text if "Will be right back" in data: @@ -621,36 +621,40 @@ def get_earnings_dates(self, limit=12, proxy=None, add_cookies=None) -> Optional # Drop redundant columns dates = dates.drop(["Symbol", "Company"], axis=1) + # Compatibility + dates = dates.rename(columns={'Surprise (%)': 'Surprise(%)'}) + + # Drop empty rows + for i in range(len(dates)-1, -1, -1): + if dates.iloc[i].isna().all(): + dates = dates.drop(i) + # Convert types - for cn in ["EPS Estimate", "Reported EPS", "Surprise (%)"]: + for cn in ["EPS Estimate", "Reported EPS", "Surprise(%)"]: dates.loc[dates[cn] == '-', cn] = float("nan") dates[cn] = dates[cn].astype(float) - # Convert % to range 0->1: - dates["Surprise (%)"] *= 0.01 - # Parse earnings date string cn = "Earnings Date" - dates[cn] = dates[cn].astype(str) # Fixed - # - remove "at" - dates[cn] = dates[cn].replace(' at', ',', regex=True) # Fixed - # - remove EDT/EST - dates[cn] = dates[cn].replace(' E[DS]T', '', regex=True) # Fixed - # - remove AM/PM and timezone from date string - tzinfo = dates[cn].str.extract('([AP]M[a-zA-Z]*)$') - dates[cn] = dates[cn].replace(' [AP]M[a-zA-Z]*$', '', regex=True) - # - split AM/PM from timezone - tzinfo = tzinfo[0].str.extract('([AP]M)([a-zA-Z]*)', expand=True) - tzinfo.columns = ["AM/PM", "TZ"] - # - combine and parse - dates[cn] = dates[cn] + ' ' + tzinfo["AM/PM"] - dates[cn] = pd.to_datetime(dates[cn], format="%B %d, %Y, %I %p") # Fixed - # - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info': - self._quote.proxy = proxy or self.proxy - tz = self._get_ticker_tz(proxy=proxy, timeout=30) - dates[cn] = dates[cn].dt.tz_localize(tz) - - dates = dates.set_index("Earnings Date") + try: + dates_backup = dates.copy() + # - extract timezone because Yahoo stopped returning in UTC + tzy = dates[cn].str.split(' ').str.get(-1) + tzy[tzy.isin(['EDT', 'EST'])] = 'US/Eastern' + # - tidy date string + dates[cn] = dates[cn].str.split(' ').str[:-1].str.join(' ') + dates[cn] = dates[cn].replace(' at', ',', regex=True) + # - parse + dates[cn] = pd.to_datetime(dates[cn], format="%B %d, %Y, %I %p") + # - convert to exchange timezone + self._quote.proxy = proxy or self.proxy + tz = self._get_ticker_tz(proxy=proxy, timeout=30) + dates[cn] = [dates[cn].iloc[i].tz_localize(tzy.iloc[i], ambiguous=True).tz_convert(tz) for i in range(len(dates))] + + dates = dates.set_index("Earnings Date") + except Exception as e: + utils.get_yf_logger().info(f"{self.ticker}: Problem parsing earnings_dates: {str(e)}") + dates = dates_backup self._earnings_dates[limit] = dates