From 8906bba82a52610df4688da8528d6cb68d6322a8 Mon Sep 17 00:00:00 2001 From: JersyJ Date: Sun, 30 Jun 2024 19:00:20 +0200 Subject: [PATCH] CI improvements and Docker --- .github/workflows/lint.yaml | 20 -- .github/workflows/test.yaml | 15 - .github/workflows/validation.yaml | 116 ++++++++ .pre-commit-config.yaml | 44 ++- Dockerfile | 31 ++ README.md | 7 + app.py | 59 ++-- docker-compose.yml | 28 ++ lunches.py | 459 ++++++++++++++++-------------- poetry.lock | 145 +++++++++- public_transport.py | 54 ++-- pyproject.toml | 35 +++ 12 files changed, 705 insertions(+), 308 deletions(-) delete mode 100644 .github/workflows/lint.yaml delete mode 100644 .github/workflows/test.yaml create mode 100644 .github/workflows/validation.yaml create mode 100644 Dockerfile create mode 100644 docker-compose.yml diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index a358a06..0000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,20 +0,0 @@ -name: lint -on: - push: - pull_request: - -jobs: - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - run: pip install ruff - - run: ruff check . --output-format github --ignore E501 - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - run: pip install pre-commit - - run: pre-commit run --show-diff-on-failure --color=always --all-files diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml deleted file mode 100644 index 47635b7..0000000 --- a/.github/workflows/test.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: test -on: - push: - pull_request: - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - run: sudo apt-get install -y poppler-utils tesseract-ocr-ces - - run: pipx install poetry==1.8.3 - - run: poetry install - - run: poetry run ./lunches.py diff --git a/.github/workflows/validation.yaml b/.github/workflows/validation.yaml new file mode 100644 index 0000000..45fdee9 --- /dev/null +++ b/.github/workflows/validation.yaml @@ -0,0 +1,116 @@ +name: Validation + +on: + workflow_dispatch: + push: + branches: + - 'master' + paths: + - '**.py' + - '.github/workflows/validation.yml' + pull_request: + types: [ opened, synchronize, reopened ] + branches: + - 'master' + paths: + - '**.py' + - '.github/workflows/validation.yml' + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Cache Poetry install + uses: actions/cache@v4 + with: + path: ~/.cache/pypoetry + key: poetry + + - name: Install Poetry + run: | + pipx install poetry + + - name: Setup Python + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'poetry' + cache-dependency-path: 'poetry.lock' + + - name: Cache venv + uses: actions/cache@v4 + id: cache-venv + with: + path: ./.venv/ + key: ${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-app-${{ hashFiles('./poetry.lock') }} + + - name: Install App dependencies + run: | + poetry install --no-interaction --no-root + if: steps.cache-venv.outputs.cache-hit != 'true' + + - name: Install App + run: | + poetry install --no-interaction + + - name: Ruff format + if: success() || failure() + run: | + poetry run ruff format --check . --output-format github + + - name: Ruff lint + if: success() || failure() + run: | + poetry run ruff check . --output-format github + + test: + runs-on: ubuntu-latest + steps: + - name: Checkout repo + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + sudo apt-get install -y poppler-utils tesseract-ocr-ces + + - name: Cache Poetry install + uses: actions/cache@v4 + with: + path: ~/.cache/pypoetry + key: poetry + + - name: Install Poetry + run: | + pipx install poetry + + - name: Setup Python + id: setup_python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + cache: 'poetry' + cache-dependency-path: 'poetry.lock' + + - name: Cache venv + uses: actions/cache@v4 + id: cache-venv + with: + path: ./.venv/ + key: ${{ runner.os }}-${{ steps.setup_python.outputs.python-version }}-app-${{ hashFiles('./poetry.lock') }} + + - name: Install App dependencies + run: | + poetry install --no-interaction --no-root + if: steps.cache-venv.outputs.cache-hit != 'true' + + - name: Install App + run: | + poetry install --no-interaction + + - name: Run tests + run: | + poetry run ./lunches.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e08afc8..8b686af 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,31 @@ repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 - hooks: - - id: end-of-file-fixer - - id: trailing-whitespace -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.291 - hooks: - - id: ruff - args: [--fix, --exit-non-zero-on-fix, --ignore, E501] -- repo: https://github.com/codespell-project/codespell - rev: v2.2.2 - hooks: - - id: codespell + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-yaml + args: [--allow-multiple-documents] + - id: check-case-conflict + name: Check for files with names that would conflict on a case-insensitive filesystem + entry: check-case-conflict + - id: end-of-file-fixer + name: Makes sure files end in a newline and only a newline. + entry: end-of-file-fixer + types: [text] + - id: trailing-whitespace + name: Trims trailing whitespace. + entry: trailing-whitespace-fixer + types: [text] + - id: check-docstring-first + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.0 + hooks: + - id: ruff-format + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.0 + hooks: + - id: ruff + args: [ --fix ] + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..78badfa --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.12-slim-bookworm as build-stage + +RUN pip install poetry + +ENV POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_IN_PROJECT=1 \ + POETRY_VIRTUALENVS_CREATE=1 \ + POETRY_CACHE_DIR=/tmp/poetry_cache + +WORKDIR /app + +COPY pyproject.toml poetry.lock* ./ +RUN touch README.md + +RUN poetry install --without dev --no-root && rm -rf $POETRY_CACHE_DIR + +FROM python:3.12-slim-bookworm as runtime + +WORKDIR /app + +COPY --from=build-stage /app . + +ENV PATH="/app/.venv/bin:$PATH" + +COPY templates ./templates + +COPY *.py . + +EXPOSE 443 + +CMD ["fastapi", "run", "--port", "443"] diff --git a/README.md b/README.md index b28cb73..b826124 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ ## Local setup Install and start redis server for caching. + ```sh $ pip install pre-commit $ pre-commit install @@ -22,3 +23,9 @@ $ cd frontend $ yarn install $ yarn run dev ``` + +## Production setup + +```sh +docker build --target=runtime --tag="lunchmenu" . +``` diff --git a/app.py b/app.py index 151aeff..9e597fa 100755 --- a/app.py +++ b/app.py @@ -1,19 +1,22 @@ #!/usr/bin/env python3 import datetime -import pickle import ipaddress +import pickle + import redis.asyncio as redis from fastapi import FastAPI, Request from fastapi.templating import Jinja2Templates -#from werkzeug.middleware.proxy_fix import ProxyFix -#from flask_redis import FlaskRedis + +# from werkzeug.middleware.proxy_fix import ProxyFix +# from flask_redis import FlaskRedis from lunches import gather_restaurants from public_transport import public_transport_connections app = FastAPI(debug=True) templates = Jinja2Templates(directory="templates") -#app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1) -redis_client = redis.Redis() +# app.wsgi_app = ProxyFix(app.wsgi_app, x_proto=1) +redis_client = redis.Redis(host="redis", port=6379) + @app.get("/public_transport") async def public_transport(request: Request): @@ -23,57 +26,53 @@ async def public_transport(request: Request): srcs, dsts = dsts, srcs return templates.TemplateResponse( - request=request, - name='public_transport.html', - context={ - 'connections': await public_transport_connections(srcs, dsts) - } + request=request, + name="public_transport.html", + context={"connections": await public_transport_connections(srcs, dsts)}, ) + @app.get("/lunch.json") @app.post("/lunch.json") async def lunch(request: Request): now = int(datetime.datetime.now().timestamp()) key = f'restaurants.{datetime.date.today().strftime("%d-%m-%Y")}' result_str = await redis_client.get(key) - if not result_str or request.method == 'POST': - throttle_key = f'{key}.throttle' + if not result_str or request.method == "POST": + throttle_key = f"{key}.throttle" if await redis_client.incr(throttle_key) != 1: - return {'error': 'Fetch limit reached. Try again later.'} + return {"error": "Fetch limit reached. Try again later."} await redis_client.expire(throttle_key, 60 * 3) result = { - 'last_fetch': now, - 'fetch_count': await redis_client.incr(f'{key}.fetch_count'), - 'restaurants': list(await gather_restaurants()), + "last_fetch": now, + "fetch_count": await redis_client.incr(f"{key}.fetch_count"), + "restaurants": list(await gather_restaurants()), } await redis_client.set(key, pickle.dumps(result)) else: result = pickle.loads(result_str) - disallow_nets = [ipaddress.ip_network(net) for net in [ - '127.0.0.0/8', - '::1/128', - '192.168.1.0/24', - '89.103.137.232/32', - '2001:470:5816::/48' - ]] + disallow_nets = [ + ipaddress.ip_network(net) + for net in ["127.0.0.0/8", "::1/128", "192.168.1.0/24", "89.103.137.232/32", "2001:470:5816::/48"] + ] for net in disallow_nets: if net.version == 4: - disallow_nets.append(ipaddress.ip_network(f'::ffff:{net.network_address}/{96 + net.prefixlen}')) + disallow_nets.append(ipaddress.ip_network(f"::ffff:{net.network_address}/{96 + net.prefixlen}")) visitor_addr = ipaddress.ip_address(request.client.host) - if not any([net for net in disallow_nets if visitor_addr in net]): - await redis_client.incr(f'{key}.access_count') - await redis_client.setnx(f'{key}.first_access', now) + if not any([net for net in disallow_nets if visitor_addr in net]): # noqa: C419 + await redis_client.incr(f"{key}.access_count") + await redis_client.setnx(f"{key}.first_access", now) async def get(k): - val = await redis_client.get(f'{key}.{k}') + val = await redis_client.get(f"{key}.{k}") if val: result[k] = int(val) else: result[k] = 0 - await get('access_count') - await get('first_access') + await get("access_count") + await get("first_access") return result diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ba32d97 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,28 @@ +services: + redis: + container_name: lunchmenu-redis + image: redis:alpine + restart: unless-stopped + ports: + - "6379:6379" + command: "redis-server --save 20 1 --loglevel warning" + volumes: + - "redis_data:/data" + extra_hosts: + - host.docker.internal:host-gateway + + lunchmenu: + container_name: lunchmenu + depends_on: + - redis + build: + context: . + dockerfile: ./Dockerfile + restart: unless-stopped + ports: + - "443:443" + extra_hosts: + - "host.docker.internal:host-gateway" + +volumes: + redis_data: diff --git a/lunches.py b/lunches.py index 90725b5..25727f0 100755 --- a/lunches.py +++ b/lunches.py @@ -1,47 +1,54 @@ #!/usr/bin/env python3 -from selectolax.parser import HTMLParser, Selector -import re -import json +import asyncio import datetime -import traceback +import inspect +import json import logging -import httpx +import re import string -import asyncio -import inspect -from html import unescape -from enum import Enum -from dataclasses import dataclass import time +import traceback +from dataclasses import dataclass +from enum import Enum +from html import unescape + +import httpx +from selectolax.parser import HTMLParser, Selector + +days = ["Pondělí", "Úterý", "Středa", "Čtvrtek", "Pátek", "Sobota", "Neděle"] +USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" -days = ['Pondělí', 'Úterý', 'Středa', 'Čtvrtek', 'Pátek', 'Sobota', 'Neděle'] -USER_AGENT = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36' class Location(str, Enum): - Poruba = "Poruba", - Dubina = "Dubina", - Zabreh = "Zábřeh", - Olomouc = "Olomouc", + Poruba = ("Poruba",) + Dubina = ("Dubina",) + Zabreh = ("Zábřeh",) + Olomouc = ("Olomouc",) + -def restaurant(title, url=None, location:Location=None): +def restaurant(title, url=None, location: Location = None): def wrapper(fn): def wrap(*args, **kwargs): return fn(*args, **kwargs) + wrap.parser = { - 'name': fn.__name__, - 'title': title, - 'url': url, - 'location': location, - 'args': fn.__code__.co_varnames[:fn.__code__.co_argcount], + "name": fn.__name__, + "title": title, + "url": url, + "location": location, + "args": fn.__code__.co_varnames[: fn.__code__.co_argcount], } return wrap + return wrapper + @dataclass class Soup: name: str price: int = None + @dataclass class Lunch: name: str @@ -49,37 +56,37 @@ class Lunch: price: int = None ingredients: str = None + def menicka_parser(dom): current_day = datetime.datetime.now().strftime("%-d.%-m.%Y") - for day_dom in dom.css('.content'): - day = day_dom.css_first('h2').text(strip=True).split(' ', 2)[1] + for day_dom in dom.css(".content"): + day = day_dom.css_first("h2").text(strip=True).split(" ", 2)[1] if current_day not in day: continue - soup_el = day_dom.css_first('.soup .food') + soup_el = day_dom.css_first(".soup .food") if soup_el: soup_name = soup_el.text() - if 'Pro tento den nebylo zadáno menu' in soup_name: + if "Pro tento den nebylo zadáno menu" in soup_name: break - yield Soup( - soup_name, - day_dom.css_first('.soup .prize').text() - ) + yield Soup(soup_name, day_dom.css_first(".soup .prize").text()) - for food in day_dom.css('.main'): - match = re.search(r'\((?P.*)\)', food.css_first('.food').text()) - ingredients = match.group('ingredients') if match else None + for food in day_dom.css(".main"): + match = re.search(r"\((?P.*)\)", food.css_first(".food").text()) + ingredients = match.group("ingredients") if match else None yield Lunch( - num=food.css_first('.no').text().strip(' .'), - name=food.css_first('.food').text(), - price=food.css_first('.prize').text(), + num=food.css_first(".no").text().strip(" ."), + name=food.css_first(".food").text(), + price=food.css_first(".prize").text(), ingredients=ingredients, ) + async def subprocess_check_output(cmd, input): p = await asyncio.create_subprocess_exec(*cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE) - return (await p.communicate(input))[0].decode('utf-8') + return (await p.communicate(input))[0].decode("utf-8") + def lcs(strings): if not strings: @@ -105,42 +112,45 @@ def lcs(strings): return common_subsequence.lower().capitalize() + @restaurant("Bistro IN", "https://bistroin.choiceqr.com/delivery", Location.Poruba) def bistroin(dom): - data = json.loads(dom.css_first('#__NEXT_DATA__').text()) + data = json.loads(dom.css_first("#__NEXT_DATA__").text()) for item in data["props"]["app"]["menu"]: - ingredients = re.sub(r'Al\. \(.+', '', item['description']) - price = item['price'] // 100 - if 'Polévka k menu:' in item['name']: - yield Soup(name=item['name'].split(':')[1], price=price) + ingredients = re.sub(r"Al\. \(.+", "", item["description"]) + price = item["price"] // 100 + if "Polévka k menu:" in item["name"]: + yield Soup(name=item["name"].split(":")[1], price=price) else: - match = re.match(r'^\s*(?P[0-9]+)\s*\.\s*(?P.+)', item['name']) + match = re.match(r"^\s*(?P[0-9]+)\s*\.\s*(?P.+)", item["name"]) if match: yield Lunch(**match.groupdict(), price=price - 5, ingredients=ingredients) + @restaurant("U jarosu", "https://www.ujarosu.cz/cz/denni-menu/", Location.Poruba) def u_jarosu(dom): today = datetime.datetime.strftime(datetime.datetime.now(), "%d. %m. %Y") - for row in dom.css('.celyden'): - parsed_day = row.css_first('.datum').text() + for row in dom.css(".celyden"): + parsed_day = row.css_first(".datum").text() if parsed_day == today: - records = row.css('.tabulka p') + records = row.css(".tabulka p") records = [r.text().strip() for r in records] - records = [records[i:i+3] for i in range(0, len(records), 3)] + records = [records[i : i + 3] for i in range(0, len(records), 3)] for first, name, price in records: - if first == 'Polévka': + if first == "Polévka": yield Soup(name) else: - yield Lunch(name, price=price, num=first.split('.')[0]) + yield Lunch(name, price=price, num=first.split(".")[0]) + @restaurant("U zlateho lva", "http://www.zlatylev.com/menu_zlaty_lev.html", Location.Poruba) def u_zlateho_lva(dom): day_nth = datetime.datetime.today().weekday() - text = dom.css_first('.xr_txt.xr_s0').text() + text = dom.css_first(".xr_txt.xr_s0").text() capturing = False - state = 'num' + state = "num" for line in text.splitlines(): line = line.strip() @@ -149,64 +159,66 @@ def u_zlateho_lva(dom): elif capturing: if day_nth < 4 and line.startswith(days[day_nth + 1]): break - soup_prefix = 'Polévka:' + soup_prefix = "Polévka:" if line.startswith(soup_prefix): - yield Soup(line.replace(soup_prefix, '')) + yield Soup(line.replace(soup_prefix, "")) else: - if state == 'num': - if re.match(r'^[0-9]+\.', line): - line, name = line.split('.', 1) + if state == "num": + if re.match(r"^[0-9]+\.", line): + line, name = line.split(".", 1) food = Lunch(name=name, num=line) - state = 'price' if name else 'name' - elif state == 'name': + state = "price" if name else "name" + elif state == "name": if line: food.name = line - state = 'price' - elif state == 'price': - if re.match(r'^[0-9]+\s*(,-|Kč)$', line): - food.price = line.split(' ')[0] + state = "price" + elif state == "price": # noqa: SIM102 + if re.match(r"^[0-9]+\s*(,-|Kč)$", line): + food.price = line.split(" ")[0] yield food - state = 'num' + state = "num" + @restaurant("Globus", "https://www.globus.cz/ostrava/sluzby-a-produkty/restaurace", Location.Poruba) def globus(dom): - for row in dom.css('.space-y-2 .flex'): - spans = row.css('* > span') + for row in dom.css(".space-y-2 .flex"): + spans = row.css("* > span") price = fix_price(spans[2].text()) t = Soup if price < 50 else Lunch yield t(spans[1].text(), price=price) + @restaurant("Jacks Burger", "https://www.zomato.com/cs/widgets/daily_menu.php?entity_id=16525845", Location.Poruba) def jacks_burger(dom): started = False full_name = "" num = None price = None - for el in dom.css('.main-body > div'): - if el.css_matches('.line-wider'): + for el in dom.css(".main-body > div"): + if el.css_matches(".line-wider"): break - name = el.css_first('.item-name') + name = el.css_first(".item-name") if name is None: continue name = name.text(strip=True) - if 'ROZVOZ PŘES' in name.upper() or '---------' in name or 'JBB OSTRAVA' in name.upper(): + if "ROZVOZ PŘES" in name.upper() or "---------" in name or "JBB OSTRAVA" in name.upper(): continue - if re.match(r'^[0-9]+\..+', name): + if re.match(r"^[0-9]+\..+", name): if full_name: yield Lunch(name=full_name, price=price, num=num) full_name = "" price = None - num = name.split('.')[0] + num = name.split(".")[0] full_name += name if not started: - if 'Polévka dle denní nabídky' != full_name: + if full_name != "Polévka dle denní nabídky": yield Soup(name=full_name) full_name = "" started = True else: - price = el.css_first('.item-price') + price = el.css_first(".item-price") if price: price = price.text(strip=True) if price: @@ -215,9 +227,10 @@ def jacks_burger(dom): price = None num = None + @restaurant("Poklad", "https://dkpoklad.cz/restaurace/", Location.Poruba) async def poklad(dom, http): - pdf_url = dom.css_first('.restaurace-box .wp-block-file a').attributes['href'] + pdf_url = dom.css_first(".restaurace-box .wp-block-file a").attributes["href"] pdf = (await http.get(pdf_url)).content text = await subprocess_check_output(["pdftotext", "-layout", "-", "-"], pdf) @@ -230,11 +243,11 @@ async def poklad(dom, http): if today in line: capturing = True elif capturing: - if tomorrow in line or 'NABÍDKA NÁPOJŮ' in line: + if tomorrow in line or "NABÍDKA NÁPOJŮ" in line: break if soup: soup = False - for s in line.split(' I '): + for s in line.split(" I "): yield Soup(s) else: m = re.match(r"^(?P[0-9]+)\s*\.?\s*(?P.*?) (?P[0-9]+) Kč", line) @@ -243,96 +256,112 @@ async def poklad(dom, http): yield Lunch(**item) item = m.groupdict() elif item: - item['name'] += line + item["name"] += line if item: yield Lunch(**item) + @restaurant("Trebovicky mlyn", "https://www.trebovickymlyn.cz/", Location.Poruba) def trebovicky_mlyn(dom): - el = dom.css_first('.soup h2') + el = dom.css_first(".soup h2") if not el: return yield Soup(el.text()) - for lunch in dom.css_first('.owl-carousel').css('.menu-post'): - parts = lunch.css_first('h2').text().split(')') + for lunch in dom.css_first(".owl-carousel").css(".menu-post"): + parts = lunch.css_first("h2").text().split(")") if len(parts) == 2: - yield Lunch(num=parts[0], name=parts[1], ingredients=lunch.css_first('h2 + div').text(), price=lunch.css_first('span').text().split(',')[0]) + yield Lunch( + num=parts[0], + name=parts[1], + ingredients=lunch.css_first("h2 + div").text(), + price=lunch.css_first("span").text().split(",")[0], + ) + @restaurant("La Strada", "http://www.lastrada.cz/cz/?tpl=plugins/DailyMenu/print&week_shift=", Location.Poruba) def lastrada(dom): day_nth = datetime.datetime.today().weekday() capturing = False - for tr in dom.css('tr'): - if tr.css_matches('.day'): + for tr in dom.css("tr"): + if tr.css_matches(".day"): capturing = False - if days[day_nth] in tr.text() or 'Menu na celý týden' in tr.text(): + if days[day_nth] in tr.text() or "Menu na celý týden" in tr.text(): capturing = True elif capturing: - if tr.css_matches('.highlight'): - yield Lunch(name=tr.css_first('td').text(), price=tr.css_first('.price').text()) + if tr.css_matches(".highlight"): + yield Lunch(name=tr.css_first("td").text(), price=tr.css_first(".price").text()) + @restaurant("Ellas", "https://www.restauraceellas.cz/", Location.Poruba) def ellas(dom): day_nth = datetime.datetime.today().weekday() - for div in dom.css('.moduletable .custom'): - if div.css_first('h3').text(strip=True) != days[day_nth]: + for div in dom.css(".moduletable .custom"): + if div.css_first("h3").text(strip=True) != days[day_nth]: continue - foods = div.css('p') + foods = div.css("p") yield Soup(name=foods[0].text()) for food in foods[1:]: if food.text(): - parsed = re.match(r"\s*(?P[0-9]+)\s*\.\s*(?P[A-Z -]+)\s+(?P.*?)\s*(\([0-9 ,]+\))?\s*(?P[0-9]+),-", food.text()).groupdict() + parsed = re.match( + r"\s*(?P[0-9]+)\s*\.\s*(?P[A-Z -]+)\s+(?P.*?)\s*(\([0-9 ,]+\))?\s*(?P[0-9]+),-", # noqa: E501 + food.text(), + ).groupdict() yield Lunch(**parsed) + @restaurant("Saloon Pub", "http://www.saloon-pub.cz/cs/denni-nabidka/", Location.Poruba) def saloon_pub(dom): day = dom.css_first(f'#{datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d")} + section') if not day: return - yield Soup(name=day.css_first('.category-info').text()) - for tr in day.css('.main-meal-info'): - yield Lunch(name=tr.css_first('.meal-name').text(), price=tr.css_first('.meal-price').text()) + yield Soup(name=day.css_first(".category-info").text()) + for tr in day.css(".main-meal-info"): + yield Lunch(name=tr.css_first(".meal-name").text(), price=tr.css_first(".meal-price").text()) + -@restaurant("Parlament", "https://www.restauraceparlament.cz/", Location.Poruba) -def parlament(dom): +@restaurant("Parlament", "https://www.restauraceparlament.cz/", Location.Poruba) # codespell:ignore +def parlament(dom): # codespell:ignore day_nth = datetime.datetime.today().weekday() - day = Selector(dom.css_first('.txt'), 'div div').text_contains(days[day_nth]) + day = Selector(dom.css_first(".txt"), "div div").text_contains(days[day_nth]) if day: day = day.matches[0] - yield Soup(day.css_first('* + dt').text()) - for line in day.css_first('* + dt + p').text().splitlines(): - m = re.match(r'(?P\d+)\.\s*(?P.*?)(?P\d+),-Kč', line) + yield Soup(day.css_first("* + dt").text()) + for line in day.css_first("* + dt + p").text().splitlines(): + m = re.match(r"(?P\d+)\.\s*(?P.*?)(?P\d+),-Kč", line) if m: yield Lunch(**m.groupdict()) + @restaurant("Plzenka aura", "https://www.plzenkaaura.cz/denni-menu", Location.Poruba) def plzenka(dom): food_type = None - for el in dom.css('.list-items > *'): - if el.tag == 'h5': + for el in dom.css(".list-items > *"): + if el.tag == "h5": food_type = { "POLÉVKA": Soup, "HLAVNÍ JÍDLO": Lunch, }.get(el.text(strip=True), None) elif food_type: if food_type == Soup: - yield Soup(el.css_first('.modify_item').text()) + yield Soup(el.css_first(".modify_item").text()) else: yield Lunch( - name=el.css_first('.modify_item').text(), - ingredients=el.css_first('.food-info').text(), - price=el.css_first('.menu-price').text(), - ) + name=el.css_first(".modify_item").text(), + ingredients=el.css_first(".food-info").text(), + price=el.css_first(".menu-price").text(), + ) + @restaurant("El Amigo Muerto", "https://www.menicka.cz/api/iframe/?id=5560", Location.Poruba) def el_amigo_muerto(dom): yield from menicka_parser(dom) + @restaurant("Rusty Bell Pub", "https://www.menicka.cz/api/iframe/?id=1547", Location.Poruba) def rusty_bell_pub(dom): foods = list(menicka_parser(dom)) @@ -343,66 +372,72 @@ def rusty_bell_pub(dom): food.num = None yield food + @restaurant("Kurnik sopa", "https://www.kurniksopahospoda.cz", Location.Poruba) def kurniksopa(dom): - for pivo in dom.css('#naCepu-list tr'): - name = pivo.css_first('.nazev').text() - deg = pivo.css_first('.stupne').text() - type = pivo.css_first('.typ').text() - origin = pivo.css_first('.puvod').text() + for pivo in dom.css("#naCepu-list tr"): + name = pivo.css_first(".nazev").text() + deg = pivo.css_first(".stupne").text() + type = pivo.css_first(".typ").text() + origin = pivo.css_first(".puvod").text() yield Lunch( - name=f"{name} {deg} - {type}, {origin}", + name=f"{name} {deg} - {type}, {origin}", ) + @restaurant("Sbeerka", "https://sbeerka.cz/denni-nabidka", Location.Poruba) async def sbeerka(dom, http): - REGEXP = re.compile(r'(?P.*?)\s*(/[0-9,\s*]+/)?\s*(?P[0-9]+\s*,-)') + REGEXP = re.compile(r"(?P.*?)\s*(/[0-9,\s*]+/)?\s*(?P[0-9]+\s*,-)") t = None - for line in dom.css_first('.wysiwyg').text().splitlines(): + for line in dom.css_first(".wysiwyg").text().splitlines(): line = line.strip() - if 'Polévky' in line: + if "Polévky" in line: t = Soup - elif 'Hlavní jídla' in line: + elif "Hlavní jídla" in line: t = Lunch - elif t and 'Záloha' not in line: + elif t and "Záloha" not in line: m = REGEXP.search(line) if m: yield t(**m.groupdict()) - PRICE_REGEXP = re.compile(r'([0-9]+)\s*,-') - response = await http.get("https://sbeerka.cz/aktualne-na-cepu", headers={'User-Agent': USER_AGENT}) + PRICE_REGEXP = re.compile(r"([0-9]+)\s*,-") + response = await http.get("https://sbeerka.cz/aktualne-na-cepu", headers={"User-Agent": USER_AGENT}) dom = HTMLParser(response.text) - for beer in dom.css('.wysiwyg li'): + for beer in dom.css(".wysiwyg li"): price = None m = PRICE_REGEXP.search(beer.text()) if m: price = m.group(0) yield Lunch(name=beer.text(), price=price) + @restaurant("La Futura", "https://lafuturaostrava.cz/", Location.Dubina) def lafutura(dom): - container = dom.css_first('.jet-listing-dynamic-repeater__items') + container = dom.css_first(".jet-listing-dynamic-repeater__items") if not container: return - for item in container.css('.jet-listing-dynamic-repeater__item'): - tds = item.css('td') + for item in container.css(".jet-listing-dynamic-repeater__item"): + tds = item.css("td") if "POLÉVKA" in tds[0].text(strip=True).upper(): yield Soup(name=tds[1].text()) else: yield Lunch(name=tds[1].text(), price=tds[2].text()) + @restaurant("Srub", "https://www.menicka.cz/api/iframe/?id=5568", Location.Dubina) def srub(dom): yield from menicka_parser(dom) + @restaurant("U formana", "https://www.menicka.cz/api/iframe/?id=4405", Location.Dubina) def uformana(dom): yield from menicka_parser(dom) + @restaurant("Maston", "https://maston.cz/jidelni-listek/", Location.Dubina) async def maston(dom, http): - srcs = dom.css_first('.attachment-large').attrs['srcset'] - img_url = srcs.split(',')[-1].strip().split(' ')[0] + srcs = dom.css_first(".attachment-large").attrs["srcset"] + img_url = srcs.split(",")[-1].strip().split(" ")[0] img = (await http.get(img_url)).content text = await subprocess_check_output(["tesseract", "-l", "ces", "--psm", "4", "-", "-"], img) @@ -411,65 +446,73 @@ async def maston(dom, http): tomorrow = datetime.datetime.strftime(datetime.datetime.now() + datetime.timedelta(days=1), "%-d%-m") capturing = False for line in text.splitlines(): - txt = line.replace(' ', '').replace('.', '') + txt = line.replace(" ", "").replace(".", "") if txt.endswith(today): capturing = True elif capturing: - if 'SAMOSTATN' in txt.upper() or tomorrow in txt: + if "SAMOSTATN" in txt.upper() or tomorrow in txt: break - if 'POLÉVKA' in line: - yield Soup(line.split(':', 1)[1]) + if "POLÉVKA" in line: + yield Soup(line.split(":", 1)[1]) else: - m = re.search(r'((?P\d)\))?\s*(?P.+)(\s*(?P\d+),-)?', line) + m = re.search(r"((?P\d)\))?\s*(?P.+)(\s*(?P\d+),-)?", line) if m: yield Lunch(**m.groupdict()) + @restaurant("Kozlovna U Ježka", "https://www.menicka.cz/api/iframe/?id=5122", Location.Dubina) def kozlovna(dom): yield from menicka_parser(dom) + @restaurant("Fontána", "https://www.menicka.cz/api/iframe/?id=1456", Location.Dubina) def fontana(dom): yield from menicka_parser(dom) + @restaurant("Burger & Beer Brothers", "https://www.menicka.cz/api/iframe/?id=7863", Location.Olomouc) def bbbrothers(dom): yield from menicka_parser(dom) + @restaurant("Café Restaurant Caesar", "https://www.menicka.cz/api/iframe/?id=5293", Location.Olomouc) def caesar(dom): yield from menicka_parser(dom) + @restaurant("Morgans restaurant", "https://www.menicka.cz/api/iframe/?id=5294", Location.Olomouc) def morgans(dom): yield from menicka_parser(dom) + @restaurant("U Mořice", "https://www.menicka.cz/api/iframe/?id=5299", Location.Olomouc) def moric(dom): yield from menicka_parser(dom) + @restaurant("Kikiriki", "https://www.menicka.cz/api/iframe/?id=5309", Location.Olomouc) def kikiriki(dom): current_day = datetime.datetime.now().strftime("%-d.%-m.%Y") - for day_dom in dom.css('.content'): - day = day_dom.css_first('h2').text(strip=True).split(' ', 2)[1] + for day_dom in dom.css(".content"): + day = day_dom.css_first("h2").text(strip=True).split(" ", 2)[1] if current_day not in day: continue - parsed_food = [] - for food in day_dom.css('.soup'): - if 'Pro tento den nebylo zadáno menu' in food.text(): + for food in day_dom.css(".soup"): + if "Pro tento den nebylo zadáno menu" in food.text(): break - txt = food.css_first('.food').text() - txt = re.sub(r'^\s*.*[0-9]+\s*[,.]\s*[0-9]+\s*l?\s*', '', txt) + txt = food.css_first(".food").text() + txt = re.sub(r"^\s*.*[0-9]+\s*[,.]\s*[0-9]+\s*l?\s*", "", txt) lunch = txt - parsed_food.append(Lunch( - name=lunch, - price=food.css_first('.prize').text(), - )) + parsed_food.append( + Lunch( + name=lunch, + price=food.css_first(".prize").text(), + ) + ) soup = lcs([f.name for f in parsed_food]) yield Soup(soup) @@ -478,10 +521,12 @@ def kikiriki(dom): f.name = f.name[soup_len:-1] yield f + @restaurant("U Kristýna", "https://www.menicka.cz/api/iframe/?id=5471", Location.Olomouc) def kristyn(dom): yield from menicka_parser(dom) + @restaurant("Assen", "https://www.menicka.cz/api/iframe/?id=8767", Location.Zabreh) def assen(dom): yield from menicka_parser(dom) @@ -493,58 +538,60 @@ def fix_price(price): if not isinstance(price, str): return int(price) try: - sanitized = re.sub('kč', '', price, flags=re.IGNORECASE) - sanitized = sanitized.replace('.00', '').strip(string.punctuation + string.whitespace) + sanitized = re.sub("kč", "", price, flags=re.IGNORECASE) + sanitized = sanitized.replace(".00", "").strip(string.punctuation + string.whitespace) return int(sanitized) except ValueError as e: print(e) return None + async def gather_restaurants(allowed_restaurants=None): replacements = [ - (re.compile(r'^\s*(Polévka|BUSINESS MENU)', re.IGNORECASE), ''), - (re.compile(r'k menu\s*$'), ''), - (re.compile(r'(s|š|S|Š)vestk'), 'Trnk'), - (re.compile(r'\s*(,|:)\s*'), '\\1 '), - (re.compile(r'<[^<]+?>'), ''), - (re.compile(r'\d+\s*(g|ml|l|ks) '), ''), - (re.compile(r'\([^)]+\)'), ''), - (re.compile(r'(\s*[0-9]+\s*,)+\s*$'), ''), - (re.compile(r'A?\s*[0-9]+(,[0-9]+)*,? '), ''), - (re.compile(r' +'), ' '), + (re.compile(r"^\s*(Polévka|BUSINESS MENU)", re.IGNORECASE), ""), + (re.compile(r"k menu\s*$"), ""), + (re.compile(r"(s|š|S|Š)vestk"), "Trnk"), + (re.compile(r"\s*(,|:)\s*"), "\\1 "), + (re.compile(r"<[^<]+?>"), ""), + (re.compile(r"\d+\s*(g|ml|l|ks) "), ""), + (re.compile(r"\([^)]+\)"), ""), + (re.compile(r"(\s*[0-9]+\s*,)+\s*$"), ""), + (re.compile(r"A?\s*[0-9]+(,[0-9]+)*,? "), ""), + (re.compile(r" +"), " "), ] - UPPER_REGEXP = re.compile(r'[A-ZÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]') + UPPER_REGEXP = re.compile(r"[A-ZÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ]") def detect_encoding(text): - if b'windows-1250' in text: - return 'windows-1250' - return 'utf-8' - client = httpx.AsyncClient(default_encoding=detect_encoding, headers={'User-Agent': USER_AGENT}, timeout=15) + if b"windows-1250" in text: + return "windows-1250" + return "utf-8" + + client = httpx.AsyncClient(default_encoding=detect_encoding, headers={"User-Agent": USER_AGENT}, timeout=15) def cleanup(restaurant): def fix_name(name): name = unescape(name) for pattern, replacement in replacements: name = pattern.sub(replacement, name) - name = name.strip(string.punctuation + string.whitespace + string.digits + '–—\xa0') + name = name.strip(string.punctuation + string.whitespace + string.digits + "–—\xa0") uppers = len(UPPER_REGEXP.findall(name)) if uppers > len(name) / 2: name = name.lower() name = name.capitalize() return name - for t in ['lunches', 'soups']: + for t in ["lunches", "soups"]: num = 0 for food in restaurant.get(t, []): food.price = fix_price(food.price) food.name = fix_name(food.name) - if t == 'lunches': + if t == "lunches": if food.ingredients: food.ingredients = fix_name(food.ingredients) if isinstance(food.num, str): try: - food.num = int(food.num.replace('.', '')) + food.num = int(food.num.replace(".", "")) except ValueError: logging.warning("Failed to parse lunch position: %s", food.num) food.num = None @@ -556,24 +603,24 @@ def fix_name(name): async def collect(parser): start = time.time() res = { - 'name': parser.parser['title'], - 'url': parser.parser['url'], - 'location': parser.parser['location'], + "name": parser.parser["title"], + "url": parser.parser["url"], + "location": parser.parser["location"], } try: lunches = [] soups = [] args = {} - arg_names = parser.parser['args'] - if 'res' in arg_names or 'dom' in arg_names: - response = await client.get(parser.parser['url']) - if 'res' in arg_names: - args['res'] = response.text - elif 'dom' in arg_names: - args['dom'] = HTMLParser(response.text) - if 'http' in arg_names: - args['http'] = client + arg_names = parser.parser["args"] + if "res" in arg_names or "dom" in arg_names: + response = await client.get(parser.parser["url"]) + if "res" in arg_names: + args["res"] = response.text + elif "dom" in arg_names: + args["dom"] = HTMLParser(response.text) + if "http" in arg_names: + args["http"] = client html_request_time = time.time() - start start = time.time() parsed = parser(**args) @@ -587,60 +634,60 @@ async def collect(parser): else: raise "Unsupported item" match_time = time.time() - start - return cleanup({ - **res, - 'lunches': lunches, - 'soups': soups, - 'elapsed': html_request_time + match_time, - 'elapsed_html_request': html_request_time, - 'elapsed_parsing': match_time, - }) - except: # noqa: E722 + return cleanup( + { + **res, + "lunches": lunches, + "soups": soups, + "elapsed": html_request_time + match_time, + "elapsed_html_request": html_request_time, + "elapsed_parsing": match_time, + } + ) + except: # noqa: E722 return { **res, - 'error': traceback.format_exc(), - 'elapsed': time.time() - start, - 'elapsed_html_request': 0, - 'elapsed_parsing': 0, + "error": traceback.format_exc(), + "elapsed": time.time() - start, + "elapsed_html_request": 0, + "elapsed_parsing": 0, } - restaurants = [obj for _, obj in globals().items() if hasattr(obj, 'parser')] + restaurants = [obj for _, obj in globals().items() if hasattr(obj, "parser")] if not allowed_restaurants: - allowed_restaurants = [r.parser['name'] for r in restaurants] + allowed_restaurants = [r.parser["name"] for r in restaurants] + + return await asyncio.gather(*[collect(r) for r in restaurants if r.parser["name"] in allowed_restaurants]) - return await asyncio.gather(*[collect(r) for r in restaurants if r.parser['name'] in allowed_restaurants]) -if __name__ == '__main__': +if __name__ == "__main__": import argparse p = argparse.ArgumentParser() - p.add_argument('restaurant', nargs='*') - p.add_argument('--sort', '-s', choices=['error', 'time'], default='error') + p.add_argument("restaurant", nargs="*") + p.add_argument("--sort", "-s", choices=["error", "time"], default="error") args = p.parse_args() - logging.basicConfig( - format="[%(asctime)s] %(levelname)s %(name)s - %(message)s", - level=logging.INFO - ) + logging.basicConfig(format="[%(asctime)s] %(levelname)s %(name)s - %(message)s", level=logging.INFO) restaurants = asyncio.run(gather_restaurants(args.restaurant)) sorters = { - 'time': lambda r: r['elapsed'], - 'error': lambda r: ('error' in r, len(r.get('lunches', [])) == 0), + "time": lambda r: r["elapsed"], + "error": lambda r: ("error" in r, len(r.get("lunches", [])) == 0), } exit_code = 0 for restaurant in sorted(restaurants, key=sorters[args.sort]): print() - print(restaurant['name'], f"({restaurant['elapsed']:.3}s)") - if 'error' in restaurant: + print(restaurant["name"], f"({restaurant['elapsed']:.3}s)") + if "error" in restaurant: exit_code = 1 - print(restaurant['error']) + print(restaurant["error"]) else: - for soup in restaurant['soups']: - print(' ', soup) - for lunch in restaurant['lunches']: - print(' ', lunch) + for soup in restaurant["soups"]: + print(" ", soup) + for lunch in restaurant["lunches"]: + print(" ", lunch) exit(exit_code) diff --git a/poetry.lock b/poetry.lock index 73c4c96..e816326 100644 --- a/poetry.lock +++ b/poetry.lock @@ -58,6 +58,17 @@ files = [ {file = "certifi-2024.6.2.tar.gz", hash = "sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516"}, ] +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "click" version = "8.1.7" @@ -83,6 +94,17 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "distlib" +version = "0.3.8" +description = "Distribution utilities" +optional = false +python-versions = "*" +files = [ + {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"}, + {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, +] + [[package]] name = "dnspython" version = "2.6.1" @@ -176,6 +198,22 @@ typer = ">=0.12.3" [package.extras] standard = ["fastapi", "uvicorn[standard] (>=0.15.0)"] +[[package]] +name = "filelock" +version = "3.15.4" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.8" +files = [ + {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"}, + {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"] +typing = ["typing-extensions (>=4.8)"] + [[package]] name = "h11" version = "0.14.0" @@ -398,6 +436,20 @@ cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] +[[package]] +name = "identify" +version = "2.5.36" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"}, + {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "idna" version = "3.7" @@ -530,6 +582,17 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + [[package]] name = "orjson" version = "3.10.5" @@ -585,6 +648,40 @@ files = [ {file = "orjson-3.10.5.tar.gz", hash = "sha256:7a5baef8a4284405d96c90c7c62b755e9ef1ada84c2406c24a9ebec86b89f46d"}, ] +[[package]] +name = "platformdirs" +version = "4.2.2" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +optional = false +python-versions = ">=3.8" +files = [ + {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"}, + {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] +type = ["mypy (>=1.8)"] + +[[package]] +name = "pre-commit" +version = "3.5.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pre_commit-3.5.0-py2.py3-none-any.whl", hash = "sha256:841dc9aef25daba9a0238cd27984041fa0467b4199fc4852e27950664919f660"}, + {file = "pre_commit-3.5.0.tar.gz", hash = "sha256:5804465c675b659b0862f07907f96295d490822a450c4c40e747d0b1c6ebcb32"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "pydantic" version = "2.7.4" @@ -835,6 +932,32 @@ typing-extensions = {version = ">=4.0.0,<5.0", markers = "python_version < \"3.9 [package.extras] jupyter = ["ipywidgets (>=7.5.1,<9)"] +[[package]] +name = "ruff" +version = "0.4.10" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.4.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:5c2c4d0859305ac5a16310eec40e4e9a9dec5dcdfbe92697acd99624e8638dac"}, + {file = "ruff-0.4.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:a79489607d1495685cdd911a323a35871abfb7a95d4f98fc6f85e799227ac46e"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1dd1681dfa90a41b8376a61af05cc4dc5ff32c8f14f5fe20dba9ff5deb80cd6"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c75c53bb79d71310dc79fb69eb4902fba804a81f374bc86a9b117a8d077a1784"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18238c80ee3d9100d3535d8eb15a59c4a0753b45cc55f8bf38f38d6a597b9739"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d8f71885bce242da344989cae08e263de29752f094233f932d4f5cfb4ef36a81"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:330421543bd3222cdfec481e8ff3460e8702ed1e58b494cf9d9e4bf90db52b9d"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9e9b6fb3a37b772628415b00c4fc892f97954275394ed611056a4b8a2631365e"}, + {file = "ruff-0.4.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f54c481b39a762d48f64d97351048e842861c6662d63ec599f67d515cb417f6"}, + {file = "ruff-0.4.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:67fe086b433b965c22de0b4259ddfe6fa541c95bf418499bedb9ad5fb8d1c631"}, + {file = "ruff-0.4.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:acfaaab59543382085f9eb51f8e87bac26bf96b164839955f244d07125a982ef"}, + {file = "ruff-0.4.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:3cea07079962b2941244191569cf3a05541477286f5cafea638cd3aa94b56815"}, + {file = "ruff-0.4.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:338a64ef0748f8c3a80d7f05785930f7965d71ca260904a9321d13be24b79695"}, + {file = "ruff-0.4.10-py3-none-win32.whl", hash = "sha256:ffe3cd2f89cb54561c62e5fa20e8f182c0a444934bf430515a4b422f1ab7b7ca"}, + {file = "ruff-0.4.10-py3-none-win_amd64.whl", hash = "sha256:67f67cef43c55ffc8cc59e8e0b97e9e60b4837c8f21e8ab5ffd5d66e196e25f7"}, + {file = "ruff-0.4.10-py3-none-win_arm64.whl", hash = "sha256:dd1fcee327c20addac7916ca4e2653fbbf2e8388d8a6477ce5b4e986b68ae6c0"}, + {file = "ruff-0.4.10.tar.gz", hash = "sha256:3aa4f2bc388a30d346c56524f7cacca85945ba124945fe489952aadb6b5cd804"}, +] + [[package]] name = "selectolax" version = "0.3.21" @@ -1128,6 +1251,26 @@ files = [ docs = ["Sphinx (>=4.1.2,<4.2.0)", "sphinx-rtd-theme (>=0.5.2,<0.6.0)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"] test = ["Cython (>=0.29.36,<0.30.0)", "aiohttp (==3.9.0b0)", "aiohttp (>=3.8.1)", "flake8 (>=5.0,<6.0)", "mypy (>=0.800)", "psutil", "pyOpenSSL (>=23.0.0,<23.1.0)", "pycodestyle (>=2.9.0,<2.10.0)"] +[[package]] +name = "virtualenv" +version = "20.26.3" +description = "Virtual Python Environment builder" +optional = false +python-versions = ">=3.7" +files = [ + {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"}, + {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"}, +] + +[package.dependencies] +distlib = ">=0.3.7,<1" +filelock = ">=3.12.2,<4" +platformdirs = ">=3.9.1,<5" + +[package.extras] +docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"] +test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"] + [[package]] name = "watchfiles" version = "0.22.0" @@ -1299,4 +1442,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "7f2014a38e77cd46925813d450f2fe9dd1febc200ca14ad68487c12b93f8657c" +content-hash = "7d12a2b043f77e9dad21fec9e907891e21e31196fa9dd2a113d8339da1e94b99" diff --git a/public_transport.py b/public_transport.py index 933ee65..09a07d3 100755 --- a/public_transport.py +++ b/public_transport.py @@ -1,47 +1,51 @@ #!/usr/bin/env python3 -from selectolax.parser import HTMLParser -from time import time +import asyncio import datetime import itertools +from time import time + import httpx -import asyncio +from selectolax.parser import HTMLParser async def public_transport_connections(sources, destinations): async def fetch(http, source, destination): - url = f'https://idos.idnes.cz/odis/spojeni/vysledky/?f={source}&fc=303003&t={destination}&tc=303003' + url = f"https://idos.idnes.cz/odis/spojeni/vysledky/?f={source}&fc=303003&t={destination}&tc=303003" start = time() links = [] resp = await http.get(url) print(f"{url} took {time() - start} sec") dom = HTMLParser(resp.text) - for node in dom.css('.connection.box'): + for node in dom.css(".connection.box"): link = { - 'connections': [], + "connections": [], } - total = node.css('.total strong')[0].text() - if 'hod' in total: + total = node.css(".total strong")[0].text() + if "hod" in total: continue - link['total'] = int(total.split(' ')[0]) - for a in node.css('.outside-of-popup'): + link["total"] = int(total.split(" ")[0]) + for a in node.css(".outside-of-popup"): + def to_datetime(s): date = datetime.datetime.now() - hour, minute = s.split(':') + hour, minute = s.split(":") return date.replace(hour=int(hour), minute=int(minute), second=0) def p(node): return { - 'time': to_datetime(node.css_first('.time').text()), - 'station': node.css_first('.station strong').text(), + "time": to_datetime(node.css_first(".time").text()), + "station": node.css_first(".station strong").text(), } - link['connections'].append({ - 'link': a.css_first('.line-title h3').text(), - 'from': p(a.css_first('.stations .item')), - 'to': p(a.css('.stations .item')[1]), - }) + link["connections"].append( + { + "link": a.css_first(".line-title h3").text(), + "from": p(a.css_first(".stations .item")), + "to": p(a.css(".stations .item")[1]), + } + ) links.append(link) return links @@ -53,16 +57,22 @@ def p(node): def time_to_num(t): return t - p = t.split(':') + p = t.split(":") p[0] = int(p[0]) p[1] = int(p[1]) return p[0] * 60 + p[1] - all_links.sort(key=lambda i: (time_to_num(i['connections'][-1]['to']['time']), i["total"])) + all_links.sort(key=lambda i: (time_to_num(i["connections"][-1]["to"]["time"]), i["total"])) return all_links -if __name__ == '__main__': + +if __name__ == "__main__": from pprint import pprint - result = asyncio.run(public_transport_connections(["Václava Jiřikovského"], ["Hlavní třída", "Rektorát VŠB", "Pustkovecká", "Poruba,Studentské koleje"])) + + result = asyncio.run( + public_transport_connections( + ["Václava Jiřikovského"], ["Hlavní třída", "Rektorát VŠB", "Pustkovecká", "Poruba,Studentské koleje"] + ) + ) pprint(result) diff --git a/pyproject.toml b/pyproject.toml index 141ada0..2c12753 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,13 +8,48 @@ package-mode = false [tool.poetry.dependencies] python = "^3.8" + httpx = "^0.27.0" selectolax = "^0.3.21" + fastapi = "^0.111.0" uvicorn = {extras = ["standard"], version = "^0.30.1"} + redis = {extras = ["hiredis"], version = "^5.0.7"} +[tool.poetry.group.dev.dependencies] +pre-commit = "^3.5.0" +ruff = "^0.4.6" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.mypy] +strict = true + +[tool.ruff] +line-length = 120 + +[tool.ruff.format] +quote-style = "double" +docstring-code-format = true + +[tool.ruff.lint] +extend-select = [ + "C4", # flake8-comprehensions + "E", # Error + "I", # isort + "F", # pyflakes + "N", # pep8-naming + "Q", # flake8-quotes + "SIM", # flake8-simplify + "TRY", # tryceratops + "UP", # pyupgrade + "W", # Warning + "YTT", # flake8-2020 +] +ignore = ["N806"] + +[tool.ruff.lint.isort] +known-third-party = ["fastapi", "pydantic", "starlette"]